45,322
edits
(Created page with "local str = {} -- Cannot include null byte. local UTF8_char = "[\1-\127\194-\244][\128-\191]*" --[[ ulen Counts UTF-8 characters. Faster than mw.ustring.len. Assumes that...") |
No edit summary |
||
Line 1: | Line 1: | ||
--[[ | |||
This module is intended to provide access to basic string functions. | |||
Most of the functions provided here can be invoked with named parameters, | |||
unnamed parameters, or a mixture. If named parameters are used, Mediawiki will | |||
automatically remove any leading or trailing whitespace from the parameter. | |||
Depending on the intended use, it may be advantageous to either preserve or | |||
remove such whitespace. | |||
Global options | |||
ignore_errors: If set to 'true' or 1, any error condition will result in | |||
an empty string being returned rather than an error message. | |||
error_category: If an error occurs, specifies the name of a category to | |||
include with the error message. The default category is | |||
[Category:Errors reported by Module String]. | |||
no_category: If set to 'true' or 1, no category will be added if an error | |||
is generated. | |||
Unit tests for this module are available at Module:String/tests. | |||
]] | |||
local str = {} | |||
--[[ | --[[ | ||
Line 27: | Line 31: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|len|target_string|}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|len|s=target_string}} | ||
Parameters | Parameters | ||
Line 37: | Line 41: | ||
trailing whitespace from the target string. | trailing whitespace from the target string. | ||
]] | ]] | ||
function str.len(frame) | function str.len( frame ) | ||
local new_args = str._getParameters(frame.args, { 's' }); | local new_args = str._getParameters( frame.args, {'s'} ); | ||
local s = new_args['s'] or ''; | local s = new_args['s'] or ''; | ||
return mw.ustring.len(s) | return mw.ustring.len( s ) | ||
end | end | ||
Line 49: | Line 53: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|sub|target_string|start_index|end_index}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|sub|s=target_string|i=start_index|j=end_index}} | ||
Parameters | Parameters | ||
Line 57: | Line 61: | ||
i: The fist index of the substring to return, defaults to 1. | i: The fist index of the substring to return, defaults to 1. | ||
j: The last index of the string to return, defaults to the last character. | j: The last index of the string to return, defaults to the last character. | ||
The first character of the string is assigned an index of 1. If either i or j | The first character of the string is assigned an index of 1. If either i or j | ||
is a negative value, it is interpreted the same as selecting a character by | is a negative value, it is interpreted the same as selecting a character by | ||
Line 66: | Line 70: | ||
reported. | reported. | ||
]] | ]] | ||
function str.sub(frame) | function str.sub( frame ) | ||
local new_args = str._getParameters(frame.args, { 's', 'i', 'j' }); | local new_args = str._getParameters( frame.args, { 's', 'i', 'j' } ); | ||
local s = new_args['s'] or ''; | local s = new_args['s'] or ''; | ||
local i = tonumber(new_args['i']) or 1; | local i = tonumber( new_args['i'] ) or 1; | ||
local j = tonumber(new_args['j']) or -1; | local j = tonumber( new_args['j'] ) or -1; | ||
local len = mw.ustring.len(s); | local len = mw.ustring.len( s ); | ||
-- Convert negatives for range checking | -- Convert negatives for range checking | ||
if i < 0 then | if i < 0 then | ||
Line 81: | Line 85: | ||
j = len + j + 1; | j = len + j + 1; | ||
end | end | ||
if i > len or j > len or i < 1 or j < 1 then | if i > len or j > len or i < 1 or j < 1 then | ||
return str._error('String subset index out of range'); | return str._error( 'String subset index out of range' ); | ||
end | end | ||
if j < i then | if j < i then | ||
return str._error('String subset indices out of order'); | return str._error( 'String subset indices out of order' ); | ||
end | end | ||
return mw.ustring.sub(s, i, j) | return mw.ustring.sub( s, i, j ) | ||
end | end | ||
Line 96: | Line 100: | ||
to maintain these older templates. | to maintain these older templates. | ||
]] | ]] | ||
function str.sublength(frame) | function str.sublength( frame ) | ||
local i = tonumber(frame.args.i) or 0 | local i = tonumber( frame.args.i ) or 0 | ||
local len = tonumber(frame.args.len) | local len = tonumber( frame.args.len ) | ||
return mw.ustring.sub(frame.args.s, i + 1, len and (i + len)) | return mw.ustring.sub( frame.args.s, i + 1, len and ( i + len ) ) | ||
end | end | ||
Line 109: | Line 113: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|match|source_string|pattern_string|start_index|match_number|plain_flag|nomatch}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|pos|s=source_string|pattern=pattern_string|start=start_index | ||
|match=match_number|plain=plain_flag|nomatch=nomatch_output}} | |match=match_number|plain=plain_flag|nomatch=nomatch_output}} | ||
Line 139: | Line 143: | ||
For information on constructing Lua patterns, a form of [regular expression], see: | For information on constructing Lua patterns, a form of [regular expression], see: | ||
* | * https://www.lua.org/manual/5.1/manual.html#5.4.1 | ||
* | * https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | ||
* | * https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Ustring_patterns | ||
]] | ]] | ||
function str. | -- This sub-routine is exported for use in other modules | ||
function str._match( s, pattern, start, match_index, plain, nomatch ) | |||
if s == '' then | if s == '' then | ||
return str._error('Target string is empty'); | return str._error( 'Target string is empty' ); | ||
end | end | ||
if pattern == '' then | if pattern == '' then | ||
return str._error('Pattern string is empty'); | return str._error( 'Pattern string is empty' ); | ||
end | end | ||
if math.abs(start) < 1 or math.abs(start) > mw.ustring.len(s) then | start = tonumber(start) or 1 | ||
return str._error('Requested start is out of range'); | if math.abs(start) < 1 or math.abs(start) > mw.ustring.len( s ) then | ||
return str._error( 'Requested start is out of range' ); | |||
end | end | ||
if match_index == 0 then | if match_index == 0 then | ||
return str._error('Match index is out of range'); | return str._error( 'Match index is out of range' ); | ||
end | end | ||
if plain_flag then | if plain_flag then | ||
pattern = str. | pattern = str._escapePattern( pattern ); | ||
end | end | ||
local result | local result | ||
if match_index == 1 then | if match_index == 1 then | ||
-- Find first match is simple case | -- Find first match is simple case | ||
result = mw.ustring.match(s, pattern, start) | result = mw.ustring.match( s, pattern, start ) | ||
else | else | ||
if start > 1 then | if start > 1 then | ||
s = mw.ustring.sub(s, start); | s = mw.ustring.sub( s, start ); | ||
end | end | ||
local iterator = mw.ustring.gmatch(s, pattern); | local iterator = mw.ustring.gmatch(s, pattern); | ||
if match_index > 0 then | if match_index > 0 then | ||
Line 196: | Line 194: | ||
count = count + 1; | count = count + 1; | ||
end | end | ||
result = result_table[count + match_index]; | result = result_table[ count + match_index ]; | ||
end | end | ||
end | end | ||
if result == nil then | if result == nil then | ||
if nomatch == nil then | if nomatch == nil then | ||
return str._error('Match not found'); | return str._error( 'Match not found' ); | ||
else | else | ||
return nomatch; | return nomatch; | ||
Line 210: | Line 208: | ||
return result; | return result; | ||
end | end | ||
end | |||
-- This is the entry point for #invoke:String|match | |||
function str.match( frame ) | |||
local new_args = str._getParameters( frame.args, {'s', 'pattern', 'start', 'match', 'plain', 'nomatch'} ); | |||
local s = new_args['s'] or ''; | |||
local start = tonumber( new_args['start'] ) or 1; | |||
local plain_flag = str._getBoolean( new_args['plain'] or false ); | |||
local pattern = new_args['pattern'] or ''; | |||
local match_index = math.floor( tonumber(new_args['match']) or 1 ); | |||
local nomatch = new_args['nomatch']; | |||
return str._match( s, pattern, start, match_index, plain, nomatch ) | |||
end | end | ||
Line 218: | Line 228: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|pos|target_string|index_value}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|pos|target=target_string|pos=index_value}} | ||
Parameters | Parameters | ||
Line 237: | Line 247: | ||
A requested value of zero, or a value greater than the length of the string returns an error. | A requested value of zero, or a value greater than the length of the string returns an error. | ||
]] | ]] | ||
function str.pos(frame) | function str.pos( frame ) | ||
local new_args = str._getParameters(frame.args, { 'target', 'pos' }); | local new_args = str._getParameters( frame.args, {'target', 'pos'} ); | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
local pos = tonumber(new_args['pos']) or 0; | local pos = tonumber( new_args['pos'] ) or 0; | ||
if pos == 0 or math.abs(pos) > mw.ustring.len(target_str) then | if pos == 0 or math.abs(pos) > mw.ustring.len( target_str ) then | ||
return str._error('String index out of range'); | return str._error( 'String index out of range' ); | ||
end | end | ||
return mw.ustring.sub(target_str, pos, pos); | return mw.ustring.sub( target_str, pos, pos ); | ||
end | end | ||
Line 263: | Line 273: | ||
separatetly. | separatetly. | ||
]] | ]] | ||
function str.str_find(frame) | function str.str_find( frame ) | ||
local new_args = str._getParameters(frame.args, { 'source', 'target' }); | local new_args = str._getParameters( frame.args, {'source', 'target'} ); | ||
local source_str = new_args['source'] or ''; | local source_str = new_args['source'] or ''; | ||
local target_str = new_args['target'] or ''; | local target_str = new_args['target'] or ''; | ||
if target_str == '' then | if target_str == '' then | ||
return 1; | return 1; | ||
end | end | ||
local start = mw.ustring.find(source_str, target_str, 1, true) | local start = mw.ustring.find( source_str, target_str, 1, true ) | ||
if start == nil then | if start == nil then | ||
start = -1 | start = -1 | ||
end | end | ||
return start | return start | ||
end | end | ||
Line 287: | Line 297: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|find|source_str|target_string|start_index|plain_flag}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|find|source=source_str|target=target_str|start=start_index|plain=plain_flag}} | ||
Parameters | Parameters | ||
Line 304: | Line 314: | ||
This function returns the first index >= "start" where "target" can be found | This function returns the first index >= "start" where "target" can be found | ||
within "source". Indices are 1-based. If "target" is not found, then this | within "source". Indices are 1-based. If "target" is not found, then this | ||
function returns | function returns 0. If either "source" or "target" are missing / empty, this | ||
function also returns | function also returns 0. | ||
This function should be safe for UTF-8 strings. | This function should be safe for UTF-8 strings. | ||
]] | ]] | ||
function str.find(frame) | function str.find( frame ) | ||
local | local new_args = str._getParameters( frame.args, {'source', 'target', 'start', 'plain' } ); | ||
local source_str = new_args['source'] or ''; | |||
local pattern = new_args['target'] or ''; | |||
local start_pos = tonumber(new_args['start']) or 1; | |||
local plain = new_args['plain'] or true; | |||
if source_str == '' or pattern == '' then | |||
return 0; | |||
end | |||
plain = str._getBoolean( plain ); | |||
local start = mw.ustring.find( source_str, pattern, start_pos, plain ) | |||
if start == nil then | |||
start = 0 | |||
end | |||
return start | |||
end | end | ||
Line 329: | Line 347: | ||
Usage: | Usage: | ||
{{#invoke: | {{#invoke:String|replace|source_str|pattern_string|replace_string|replacement_count|plain_flag}} | ||
OR | OR | ||
{{#invoke: | {{#invoke:String|replace|source=source_string|pattern=pattern_string|replace=replace_string| | ||
count=replacement_count|plain=plain_flag}} | count=replacement_count|plain=plain_flag}} | ||
Line 342: | Line 360: | ||
text and not as a Lua style regular expression, defaults to true | text and not as a Lua style regular expression, defaults to true | ||
]] | ]] | ||
function str.replace(frame) | function str.replace( frame ) | ||
local new_args = str._getParameters(frame.args, { 'source', 'pattern', 'replace', 'count', 'plain' }); | local new_args = str._getParameters( frame.args, {'source', 'pattern', 'replace', 'count', 'plain' } ); | ||
local source_str = new_args['source'] or ''; | local source_str = new_args['source'] or ''; | ||
local pattern = new_args['pattern'] or ''; | local pattern = new_args['pattern'] or ''; | ||
local replace = new_args['replace'] or ''; | local replace = new_args['replace'] or ''; | ||
local count = tonumber(new_args['count']); | local count = tonumber( new_args['count'] ); | ||
local plain = new_args['plain'] or true; | local plain = new_args['plain'] or true; | ||
if source_str == '' or pattern == '' then | if source_str == '' or pattern == '' then | ||
return source_str; | return source_str; | ||
end | end | ||
plain = str._getBoolean(plain); | plain = str._getBoolean( plain ); | ||
if plain then | if plain then | ||
pattern = str. | pattern = str._escapePattern( pattern ); | ||
replace = mw.ustring.gsub(replace, "%%", "%%%%"); --Only need to escape replacement sequences. | replace = mw.ustring.gsub( replace, "%%", "%%%%" ); --Only need to escape replacement sequences. | ||
end | end | ||
local result; | local result; | ||
if count ~= nil then | |||
result = mw.ustring.gsub( source_str, pattern, replace, count ); | |||
else | |||
result = mw.ustring.gsub( source_str, pattern, replace ); | |||
end | |||
return result; | |||
return | |||
end | end | ||
--[[ | --[[ | ||
Line 386: | Line 393: | ||
]] | ]] | ||
function str.rep(frame) | function str.rep( frame ) | ||
local repetitions = tonumber(frame.args[2]) | local repetitions = tonumber( frame.args[2] ) | ||
if not repetitions then | if not repetitions then | ||
return str._error('function rep expects a number as second parameter, received "' .. (frame.args[2] or '') .. '"') | return str._error( 'function rep expects a number as second parameter, received "' .. ( frame.args[2] or '' ) .. '"' ) | ||
end | end | ||
return string.rep(frame.args[1] or '', repetitions) | return string.rep( frame.args[1] or '', repetitions ) | ||
end | end | ||
function str. | --[[ | ||
local | escapePattern | ||
This function escapes special characters from a Lua string pattern. See [1] | |||
for details on how patterns work. | |||
[1] https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#Patterns | |||
Usage: | |||
{{#invoke:String|escapePattern|pattern_string}} | |||
Parameters | |||
pattern_string: The pattern string to escape. | |||
]] | |||
function str.escapePattern( frame ) | |||
local pattern_str = frame.args[1] | |||
if not pattern_str then | |||
return str._error( 'No pattern string specified' ); | |||
end | |||
local result = str._escapePattern( pattern_str ) | |||
return result | |||
end | end | ||
--[[ | --[[ | ||
Line 407: | Line 430: | ||
we sometimes want to either preserve or remove that whitespace depending on the application. | we sometimes want to either preserve or remove that whitespace depending on the application. | ||
]] | ]] | ||
function str._getParameters(frame_args, arg_list) | function str._getParameters( frame_args, arg_list ) | ||
local new_args = {}; | local new_args = {}; | ||
local index = 1; | local index = 1; | ||
local value; | local value; | ||
for | for i,arg in ipairs( arg_list ) do | ||
value = frame_args[arg] | value = frame_args[arg] | ||
if value == nil then | if value == nil then | ||
Line 420: | Line 443: | ||
new_args[arg] = value; | new_args[arg] = value; | ||
end | end | ||
return new_args; | return new_args; | ||
end | end | ||
Line 427: | Line 450: | ||
Helper function to handle error messages. | Helper function to handle error messages. | ||
]] | ]] | ||
function str._error(error_str) | function str._error( error_str ) | ||
local frame = mw.getCurrentFrame(); | local frame = mw.getCurrentFrame(); | ||
local error_category = frame.args.error_category or 'Errors reported by Module String'; | local error_category = frame.args.error_category or 'Errors reported by Module String'; | ||
local ignore_errors = frame.args.ignore_errors or false; | local ignore_errors = frame.args.ignore_errors or false; | ||
local no_category = frame.args.no_category or false; | local no_category = frame.args.no_category or false; | ||
if str._getBoolean(ignore_errors) then | if str._getBoolean(ignore_errors) then | ||
return ''; | return ''; | ||
end | end | ||
local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | local error_str = '<strong class="error">String Module Error: ' .. error_str .. '</strong>'; | ||
if error_category ~= '' and not str._getBoolean(no_category) then | if error_category ~= '' and not str._getBoolean( no_category ) then | ||
error_str = '[[Category:' .. error_category .. ']]' .. error_str; | error_str = '[[Category:' .. error_category .. ']]' .. error_str; | ||
end | end | ||
return error_str; | return error_str; | ||
end | end | ||
Line 448: | Line 471: | ||
Helper Function to interpret boolean strings | Helper Function to interpret boolean strings | ||
]] | ]] | ||
function str._getBoolean(boolean_str) | function str._getBoolean( boolean_str ) | ||
local boolean_value; | local boolean_value; | ||
if type(boolean_str) == 'string' then | if type( boolean_str ) == 'string' then | ||
boolean_str = boolean_str:lower(); | boolean_str = boolean_str:lower(); | ||
if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' | if boolean_str == 'false' or boolean_str == 'no' or boolean_str == '0' | ||
Line 459: | Line 482: | ||
boolean_value = true; | boolean_value = true; | ||
end | end | ||
elseif type(boolean_str) == 'boolean' then | elseif type( boolean_str ) == 'boolean' then | ||
boolean_value = boolean_str; | boolean_value = boolean_str; | ||
else | else | ||
error('No boolean value found'); | error( 'No boolean value found' ); | ||
end | end | ||
return boolean_value | return boolean_value | ||
Line 468: | Line 491: | ||
--[[ | --[[ | ||
Helper function that escapes all pattern characters | Helper function that escapes all pattern characters so that they will be treated | ||
as plain text. | as plain text. | ||
]] | ]] | ||
function str. | function str._escapePattern( pattern_str ) | ||
return mw.ustring.gsub( pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" ); | |||
end | end | ||
return str | return str |