Module:string utilities: Difference between revisions

Module:string utilities (view source)

Revision as of 15:59, 24 January 2025

693 bytes added , 24 January 2025

no edit summary

Sware

Bureaucrats, Administrators

48,407

edits

@@ Line 55: / Line 55: @@
 local php_trim
 local replacement_escape
+local trim
 local u
 local ulen
@@ Line 81: / Line 82: @@
 	end
 	return pattern, ulen(str), ustring, callable
+end
+--[==[Returns {nil} if the input value is the empty string, or otherwise the same value.
+If the input is a string and `do_trim` is set, the input value will be trimmed before returning; if the trimmed value is the empty string, returns {nil}.
+If `quote_delimiters` is set, then any outer pair of quotation marks ({' '} or {" "}) surrounding the rest of the input string will be stripped, if present. The string will not be trimmed again, converted to {nil}, or have further quotation marks stripped, as it exists as a way to embed spaces or the empty string in an input. Genuine quotation marks may also be embedded this way (e.g. {"''foo''"} returns {"'foo'"}).]==]
+function export.is_not_empty(str, do_trim, quote_delimiters)
+	if str == "" then
+		return nil
+	elseif not (str and type(str) == "string") then
+		return str
+	elseif do_trim then
+		str = trim(str)
+		if str == "" then
+			return nil
+		end
+	end
+	return quote_delimiters and gsub(str, "^(['\"])(.*)%1$", "%2") or str
 end
@@ Line 105: / Line 125: @@
 	}, charset_chars)
-	--[==[Escapes the magic characters used in a [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] (Lua's version of regular expressions): <code>$%()*+-.?[]^</code>, and converts the null character to <code>%z</code>. For example, {{lua|"^$()%.[]*+-?\0"}} becomes {{lua|"%^%$%(%)%%%.%[%]%*%+%-%?%z"}}. This is necessary when constructing a pattern involving arbitrary text (e.g. from user input).]==]
+	--[==[Escapes the magic characters used in a [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] (Lua's version of regular expressions): {$%()*+-.?[]^}, and converts the null character to {%z}. For example, {"^$()%.[]*+-?\0"} becomes {"%^%$%(%)%%%.%[%]%*%+%-%?%z"}. This is necessary when constructing a pattern involving arbitrary text (e.g. from user input).]==]
 	function export.pattern_escape(str)
 		return (gsub(str, "[%z$%%()*+%-.?[%]^]", chars))
@@ Line 111: / Line 131: @@
 	pattern_escape = export.pattern_escape
-	--[==[Escapes the magic characters used in [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] character sets: <code>%-]^</code>, and converts the null character to <code>%z</code>.]==]
+	--[==[Escapes the magic characters used in [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] character sets: {%-]^}, and converts the null character to {%z}.]==]
 	function export.charset_escape(str)
 		return (gsub(str, "[%z%%%-%]^]", charset_chars))
@@ Line 117: / Line 137: @@
 	charset_escape = export.charset_escape
-	--[==[Escapes only <code>%</code>, which is the only magic character used in replacement [[mw:Extension:Scribunto/Lua reference manual#Patterns|patterns]] with string.gsub and mw.ustring.gsub.]==]
+	--[==[Escapes only {%}, which is the only magic character used in replacement [[mw:Extension:Scribunto/Lua reference manual#Patterns|patterns]] with string.gsub and mw.ustring.gsub.]==]
 	function export.replacement_escape(str)
 		return (gsub(str, "%%", "%%%%"))
@@ Line 505: / Line 525: @@
 end
---[==[Parses `charset`, the interior of a string or ustring library character set, and normalizes it into a string or ustring library pattern (e.g. {{lua|"abcd-g"}} becomes {{lua|"[abcd-g]"}}, and {{lua|"[]"}} becomes {{lua|"[[%]]"}}).
+--[==[Parses `charset`, the interior of a string or ustring library character set, and normalizes it into a string or ustring library pattern (e.g. {"abcd-g"} becomes {"[abcd-g]"}, and {"[]"} becomes {"[[%]]"}).
 The negative (`^`), range (`-`) and literal (`%`) magic characters work as normal, and character classes may be used (e.g. `%d` and `%w`), but opening and closing square brackets are sanitized so that they behave like ordinary characters.]==]
@@ Line 926: / Line 946: @@
 	return match(str, pattern)
 end
+trim = export.trim
 do
@@ Line 1,048: / Line 1,069: @@
 	--[==[Removes any HTML comments from the input text. `stage` can be one of three options:
-	* {{lua|"PRE"}} (default) applies the method used by MediaWiki's preprocessor: all {{code||<nowiki><!-- ... --></nowiki>}} pairs are removed, as well as any text after an unclosed {{code||<nowiki><!--</nowiki>}}. This is generally suitable when parsing raw template or [[mw:Parser extension tags|parser extension tag]] code. (Note, however, that the actual method used by the preprocessor is considerably more complex and differs under certain conditions (e.g. comments inside nowiki tags); if full accuracy is absolutely necessary, use [[Module:template parser]] instead).
+	* {"PRE"} (default) applies the method used by MediaWiki's preprocessor: all {{code|html|<nowiki><!-- ... --></nowiki>}} pairs are removed, as well as any text after an unclosed {{code|html|<nowiki><!--</nowiki>}}. This is generally suitable when parsing raw template or [[mw:Parser extension tags|parser extension tag]] code. (Note, however, that the actual method used by the preprocessor is considerably more complex and differs under certain conditions (e.g. comments inside nowiki tags); if full accuracy is absolutely necessary, use [[Module:template parser]] instead).
-	* {{lua|"POST"}} applies the method used to generate the final page output once all templates have been expanded: it loops over the text, removing any {{code||<nowiki><!-- ... --></nowiki>}} pairs until no more are found (e.g. {{code||<nowiki><!-<!-- ... -->- ... --></nowiki>}} would be fully removed), but any unclosed {{code||<nowiki><!--</nowiki>}} is ignored. This is suitable for handling links embedded in template inputs, where the {{lua|"PRE"}} method will have already been applied by the native parser.
+	* {"POST"} applies the method used to generate the final page output once all templates have been expanded: it loops over the text, removing any {{code|html|<nowiki><!-- ... --></nowiki>}} pairs until no more are found (e.g. {{code|html|<nowiki><!-<!-- ... -->- ... --></nowiki>}} would be fully removed), but any unclosed {{code|html|<nowiki><!--</nowiki>}} is ignored. This is suitable for handling links embedded in template inputs, where the {"PRE"} method will have already been applied by the native parser.
-	* {{lua|"BOTH"}} applies {{lua|"PRE"}} then {{lua|"POST"}}.]==]
+	* {"BOTH"} applies {"PRE"} then {"POST"}.]==]
 	function export.remove_comments(str, stage)
 		if not stage or stage == "PRE" then
@@ Line 1,066: / Line 1,087: @@
 end
---[==[Lua equivalent of PHP's {{code|php|trim($string)}}, which trims {{lua|"\0"}}, {{lua|"\t"}}, {{lua|"\n"}}, {{lua|"\v"}}, {{lua|"\r"}} and {{lua|" "}}. This is useful when dealing with template parameters, since the native parser trims them like this.]==]
+--[==[Lua equivalent of PHP's {{code|php|trim($string)}}, which trims {"\0"}, {"\t"}, {"\n"}, {"\v"}, {"\r"} and {" "}. This is useful when dealing with template parameters, since the native parser trims them like this.]==]
 function export.php_trim(str)
 	-- A frontier pattern with a greedy quantifier is faster than the algorithms used by export.trim, but can be only be used if the character set includes \0, since %z matches the start/end of the string, as well as \0. This is also immune to catastrophic backtracking.
@@ Line 1,073: / Line 1,094: @@
 php_trim = export.php_trim
---[==[Takes a parameter name as either a string or number, and returns the Scribunto-normalized form (i.e. the key that that parameter would have in a {{lua|frame.args}} table). For example, {{lua|"1"}} (a string) is normalized to {{lua|1}} (a number), {{lua|" foo "}} is normalized to {{lua|"foo"}}, and {{lua|1.5}} (a number) is normalized to {{lua|"1.5"}} (a string). Inputs which cannot be normalized (e.g. booleans) return {{lua|nil}}. If the `no_trim` flag is set, string parameters are not trimmed, but strings may still be converted to numbers if they do not contain whitespace; this is necessary when normalizing keys into the form received by PHP during callbacks, before any trimming occurs (e.g. in the table of arguments when calling {{lua|frame:expandTemplates()}}).
+--[==[Takes a parameter name as either a string or number, and returns the Scribunto-normalized form (i.e. the key that that parameter would have in a {frame.args} table). For example, {"1"} (a string) is normalized to {1} (a number), {" foo "} is normalized to {"foo"}, and {1.5} (a number) is normalized to {"1.5"} (a string). Inputs which cannot be normalized (e.g. booleans) return {nil}. If the `no_trim` flag is set, string parameters are not trimmed, but strings may still be converted to numbers if they do not contain whitespace; this is necessary when normalizing keys into the form received by PHP during callbacks, before any trimming occurs (e.g. in the table of arguments when calling {frame:expandTemplates()}).
-Strings are trimmed with {{lua|export.php_trim}}, unless the `no_trim` flag is set. They are then converted to numbers if '''all''' of the following are true:
+Strings are trimmed with {export.php_trim}, unless the `no_trim` flag is set. They are then converted to numbers if '''all''' of the following are true:
-# They are integers; i.e. no decimals or leading zeroes (e.g. {{lua|"2"}}, but not {{lua|"2.0"}} or {{lua|"02"}}).
+# They are integers; i.e. no decimals or leading zeroes (e.g. {"2"}, but not {"2.0"} or {"02"}).
 # They are ≤ 2{{sup|53}} and ≥ -2{{sup|53}}.
-# There is no leading sign unless < 0 (e.g. {{lua|"2"}} or {{lua|"-2"}}, but not {{lua|"+2"}} or {{lua|"-0"}}).
+# There is no leading sign unless < 0 (e.g. {"2"} or {"-2"}, but not {"+2"} or {"-0"}).
 # They contain no leading or trailing whitespace (which may be present when the `no_trim` flag is set).
 Numbers are converted to strings if '''either''':
-# They are not integers (e.g. {{lua|1.5}}).
+# They are not integers (e.g. {1.5}).
 # They are > 2{{sup|53}} or < -2{{sup|53}}.
-When converted to strings, integers ≤ 2{{sup|63}} and ≥ -2{{sup|63}} are formatted as integers (i.e. all digits are given), which is the range of PHP's integer precision, though the actual output may be imprecise since Lua's integer precision is > 2{{sup|53}} to < -2{{sup|53}}. All other numbers use the standard formatting output by {{lua|tostring()}}.]==]
+When converted to strings, integers ≤ 2{{sup|63}} and ≥ -2{{sup|63}} are formatted as integers (i.e. all digits are given), which is the range of PHP's integer precision, though the actual output may be imprecise since Lua's integer precision is > 2{{sup|53}} to < -2{{sup|53}}. All other numbers use the standard formatting output by {tostring()}.]==]
 function export.scribunto_param_key(key, no_trim)
 	local tp = type(key)
@@ Line 1,143: / Line 1,164: @@
 format_fun = export.format_fun
---[==[This function, unlike {{lua|string.format}} and {{lua|mw.ustring.format}}, takes just two parameters—a format string and a table—and replaces all instances of {{lua|{param_name}}} in the format string with the table's entry for {{lua|param_name}}. The opening and closing brace characters can be escaped with <code>{\op}</code> and <code>{\cl}</code>, respectively. A table entry beginning with a slash can be escaped by doubling the initial slash.
+--[==[This function, unlike {string.format} and {mw.ustring.format}, takes just two parameters—a format string and a table—and replaces all instances of { {param_name} } in the format string with the table's entry for {param_name}. The opening and closing brace characters can be escaped with { {\op} } and { {\cl} }, respectively. A table entry beginning with a slash can be escaped by doubling the initial slash.
 ====Examples====
-* {{lua|=string_utilities.format("{foo} fish, {bar} fish, {baz} fish, {quux} fish", {["foo"]="one", ["bar"]="two", ["baz"]="red", ["quux"]="blue"})}}
+* {string_utilities.format("{foo} fish, {bar} fish, {baz} fish, {quux} fish", {["foo"]="one", ["bar"]="two", ["baz"]="red", ["quux"]="blue"}) }
-*: produces: {{lua|"one fish, two fish, red fish, blue fish"}}
+*: produces: {"one fish, two fish, red fish, blue fish"}
-* {{lua|=string_utilities.format("The set {\\op}1, 2, 3{\\cl} contains {\\\\hello} elements.", {["\\hello"]="three"})}}
+* {string_utilities.format("The set {\\op}1, 2, 3{\\cl} contains {\\\\hello} elements.", {["\\hello"]="three"})}
-*: produces: {{lua|"The set {1, 2, 3} contains three elements."}}
+*: produces: {"The set {1, 2, 3} contains three elements."}
 *:* Note that the single and double backslashes should be entered as double and quadruple backslashes when quoted in a literal string.]==]
 function export.format(str, tbl)