48,407
edits
No edit summary |
No edit summary |
||
| Line 55: | Line 55: | ||
local php_trim | local php_trim | ||
local replacement_escape | local replacement_escape | ||
local trim | |||
local u | local u | ||
local ulen | local ulen | ||
| Line 81: | Line 82: | ||
end | end | ||
return pattern, ulen(str), ustring, callable | return pattern, ulen(str), ustring, callable | ||
end | |||
--[==[Returns {nil} if the input value is the empty string, or otherwise the same value. | |||
If the input is a string and `do_trim` is set, the input value will be trimmed before returning; if the trimmed value is the empty string, returns {nil}. | |||
If `quote_delimiters` is set, then any outer pair of quotation marks ({' '} or {" "}) surrounding the rest of the input string will be stripped, if present. The string will not be trimmed again, converted to {nil}, or have further quotation marks stripped, as it exists as a way to embed spaces or the empty string in an input. Genuine quotation marks may also be embedded this way (e.g. {"''foo''"} returns {"'foo'"}).]==] | |||
function export.is_not_empty(str, do_trim, quote_delimiters) | |||
if str == "" then | |||
return nil | |||
elseif not (str and type(str) == "string") then | |||
return str | |||
elseif do_trim then | |||
str = trim(str) | |||
if str == "" then | |||
return nil | |||
end | |||
end | |||
return quote_delimiters and gsub(str, "^(['\"])(.*)%1$", "%2") or str | |||
end | end | ||
| Line 105: | Line 125: | ||
}, charset_chars) | }, charset_chars) | ||
--[==[Escapes the magic characters used in a [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] (Lua's version of regular expressions): | --[==[Escapes the magic characters used in a [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] (Lua's version of regular expressions): {$%()*+-.?[]^}, and converts the null character to {%z}. For example, {"^$()%.[]*+-?\0"} becomes {"%^%$%(%)%%%.%[%]%*%+%-%?%z"}. This is necessary when constructing a pattern involving arbitrary text (e.g. from user input).]==] | ||
function export.pattern_escape(str) | function export.pattern_escape(str) | ||
return (gsub(str, "[%z$%%()*+%-.?[%]^]", chars)) | return (gsub(str, "[%z$%%()*+%-.?[%]^]", chars)) | ||
| Line 111: | Line 131: | ||
pattern_escape = export.pattern_escape | pattern_escape = export.pattern_escape | ||
--[==[Escapes the magic characters used in [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] character sets: | --[==[Escapes the magic characters used in [[mw:Extension:Scribunto/Lua reference manual#Patterns|pattern]] character sets: {%-]^}, and converts the null character to {%z}.]==] | ||
function export.charset_escape(str) | function export.charset_escape(str) | ||
return (gsub(str, "[%z%%%-%]^]", charset_chars)) | return (gsub(str, "[%z%%%-%]^]", charset_chars)) | ||
| Line 117: | Line 137: | ||
charset_escape = export.charset_escape | charset_escape = export.charset_escape | ||
--[==[Escapes only | --[==[Escapes only {%}, which is the only magic character used in replacement [[mw:Extension:Scribunto/Lua reference manual#Patterns|patterns]] with string.gsub and mw.ustring.gsub.]==] | ||
function export.replacement_escape(str) | function export.replacement_escape(str) | ||
return (gsub(str, "%%", "%%%%")) | return (gsub(str, "%%", "%%%%")) | ||
| Line 505: | Line 525: | ||
end | end | ||
--[==[Parses `charset`, the interior of a string or ustring library character set, and normalizes it into a string or ustring library pattern (e.g. { | --[==[Parses `charset`, the interior of a string or ustring library character set, and normalizes it into a string or ustring library pattern (e.g. {"abcd-g"} becomes {"[abcd-g]"}, and {"[]"} becomes {"[[%]]"}). | ||
The negative (`^`), range (`-`) and literal (`%`) magic characters work as normal, and character classes may be used (e.g. `%d` and `%w`), but opening and closing square brackets are sanitized so that they behave like ordinary characters.]==] | The negative (`^`), range (`-`) and literal (`%`) magic characters work as normal, and character classes may be used (e.g. `%d` and `%w`), but opening and closing square brackets are sanitized so that they behave like ordinary characters.]==] | ||
| Line 926: | Line 946: | ||
return match(str, pattern) | return match(str, pattern) | ||
end | end | ||
trim = export.trim | |||
do | do | ||
| Line 1,048: | Line 1,069: | ||
--[==[Removes any HTML comments from the input text. `stage` can be one of three options: | --[==[Removes any HTML comments from the input text. `stage` can be one of three options: | ||
* { | * {"PRE"} (default) applies the method used by MediaWiki's preprocessor: all {{code|html|<nowiki><!-- ... --></nowiki>}} pairs are removed, as well as any text after an unclosed {{code|html|<nowiki><!--</nowiki>}}. This is generally suitable when parsing raw template or [[mw:Parser extension tags|parser extension tag]] code. (Note, however, that the actual method used by the preprocessor is considerably more complex and differs under certain conditions (e.g. comments inside nowiki tags); if full accuracy is absolutely necessary, use [[Module:template parser]] instead). | ||
* { | * {"POST"} applies the method used to generate the final page output once all templates have been expanded: it loops over the text, removing any {{code|html|<nowiki><!-- ... --></nowiki>}} pairs until no more are found (e.g. {{code|html|<nowiki><!-<!-- ... -->- ... --></nowiki>}} would be fully removed), but any unclosed {{code|html|<nowiki><!--</nowiki>}} is ignored. This is suitable for handling links embedded in template inputs, where the {"PRE"} method will have already been applied by the native parser. | ||
* { | * {"BOTH"} applies {"PRE"} then {"POST"}.]==] | ||
function export.remove_comments(str, stage) | function export.remove_comments(str, stage) | ||
if not stage or stage == "PRE" then | if not stage or stage == "PRE" then | ||
| Line 1,066: | Line 1,087: | ||
end | end | ||
--[==[Lua equivalent of PHP's {{code|php|trim($string)}}, which trims { | --[==[Lua equivalent of PHP's {{code|php|trim($string)}}, which trims {"\0"}, {"\t"}, {"\n"}, {"\v"}, {"\r"} and {" "}. This is useful when dealing with template parameters, since the native parser trims them like this.]==] | ||
function export.php_trim(str) | function export.php_trim(str) | ||
-- A frontier pattern with a greedy quantifier is faster than the algorithms used by export.trim, but can be only be used if the character set includes \0, since %z matches the start/end of the string, as well as \0. This is also immune to catastrophic backtracking. | -- A frontier pattern with a greedy quantifier is faster than the algorithms used by export.trim, but can be only be used if the character set includes \0, since %z matches the start/end of the string, as well as \0. This is also immune to catastrophic backtracking. | ||
| Line 1,073: | Line 1,094: | ||
php_trim = export.php_trim | php_trim = export.php_trim | ||
--[==[Takes a parameter name as either a string or number, and returns the Scribunto-normalized form (i.e. the key that that parameter would have in a { | --[==[Takes a parameter name as either a string or number, and returns the Scribunto-normalized form (i.e. the key that that parameter would have in a {frame.args} table). For example, {"1"} (a string) is normalized to {1} (a number), {" foo "} is normalized to {"foo"}, and {1.5} (a number) is normalized to {"1.5"} (a string). Inputs which cannot be normalized (e.g. booleans) return {nil}. If the `no_trim` flag is set, string parameters are not trimmed, but strings may still be converted to numbers if they do not contain whitespace; this is necessary when normalizing keys into the form received by PHP during callbacks, before any trimming occurs (e.g. in the table of arguments when calling {frame:expandTemplates()}). | ||
Strings are trimmed with { | Strings are trimmed with {export.php_trim}, unless the `no_trim` flag is set. They are then converted to numbers if '''all''' of the following are true: | ||
# They are integers; i.e. no decimals or leading zeroes (e.g. { | # They are integers; i.e. no decimals or leading zeroes (e.g. {"2"}, but not {"2.0"} or {"02"}). | ||
# They are ≤ 2{{sup|53}} and ≥ -2{{sup|53}}. | # They are ≤ 2{{sup|53}} and ≥ -2{{sup|53}}. | ||
# There is no leading sign unless < 0 (e.g. { | # There is no leading sign unless < 0 (e.g. {"2"} or {"-2"}, but not {"+2"} or {"-0"}). | ||
# They contain no leading or trailing whitespace (which may be present when the `no_trim` flag is set). | # They contain no leading or trailing whitespace (which may be present when the `no_trim` flag is set). | ||
Numbers are converted to strings if '''either''': | Numbers are converted to strings if '''either''': | ||
# They are not integers (e.g. { | # They are not integers (e.g. {1.5}). | ||
# They are > 2{{sup|53}} or < -2{{sup|53}}. | # They are > 2{{sup|53}} or < -2{{sup|53}}. | ||
When converted to strings, integers ≤ 2{{sup|63}} and ≥ -2{{sup|63}} are formatted as integers (i.e. all digits are given), which is the range of PHP's integer precision, though the actual output may be imprecise since Lua's integer precision is > 2{{sup|53}} to < -2{{sup|53}}. All other numbers use the standard formatting output by { | When converted to strings, integers ≤ 2{{sup|63}} and ≥ -2{{sup|63}} are formatted as integers (i.e. all digits are given), which is the range of PHP's integer precision, though the actual output may be imprecise since Lua's integer precision is > 2{{sup|53}} to < -2{{sup|53}}. All other numbers use the standard formatting output by {tostring()}.]==] | ||
function export.scribunto_param_key(key, no_trim) | function export.scribunto_param_key(key, no_trim) | ||
local tp = type(key) | local tp = type(key) | ||
| Line 1,143: | Line 1,164: | ||
format_fun = export.format_fun | format_fun = export.format_fun | ||
--[==[This function, unlike { | --[==[This function, unlike {string.format} and {mw.ustring.format}, takes just two parameters—a format string and a table—and replaces all instances of { {param_name} } in the format string with the table's entry for {param_name}. The opening and closing brace characters can be escaped with { {\op} } and { {\cl} }, respectively. A table entry beginning with a slash can be escaped by doubling the initial slash. | ||
====Examples==== | ====Examples==== | ||
* { | * {string_utilities.format("{foo} fish, {bar} fish, {baz} fish, {quux} fish", {["foo"]="one", ["bar"]="two", ["baz"]="red", ["quux"]="blue"}) } | ||
*: produces: { | *: produces: {"one fish, two fish, red fish, blue fish"} | ||
* { | * {string_utilities.format("The set {\\op}1, 2, 3{\\cl} contains {\\\\hello} elements.", {["\\hello"]="three"})} | ||
*: produces: { | *: produces: {"The set {1, 2, 3} contains three elements."} | ||
*:* Note that the single and double backslashes should be entered as double and quadruple backslashes when quoted in a literal string.]==] | *:* Note that the single and double backslashes should be entered as double and quadruple backslashes when quoted in a literal string.]==] | ||
function export.format(str, tbl) | function export.format(str, tbl) | ||