48,403
edits
(Created page with "local str = {} -- Cannot include null byte. local UTF8_char = "[\1-\127\194-\244][\128-\191]*" --[[ ulen Counts UTF-8 characters. Faster than mw.ustring.len. Assumes that...") |
No edit summary |
||
| (3 intermediate revisions by the same user not shown) | |||
| Line 1: | Line 1: | ||
local m_string_utils = require("Module:string utilities") | |||
local str = {} | local str = {} | ||
local decode_uri = m_string_utils.decode_uri | |||
local | local gsub = string.gsub | ||
local pattern_escape = m_string_utils.pattern_escape | |||
local process_params = require("Module:parameters").process | |||
local replacement_escape = m_string_utils.replacement_escape | |||
local reverse = string.reverse | |||
local ufind = m_string_utils.find | |||
local ugmatch = m_string_utils.gmatch | |||
local ugsub = m_string_utils.gsub | |||
local ulen = m_string_utils.len | |||
local ulower = m_string_utils.lower | |||
local umatch = m_string_utils.match | |||
local unpack = unpack | |||
local usub = m_string_utils.sub | |||
--[[ | --[[ | ||
| Line 38: | Line 35: | ||
]] | ]] | ||
function str.len(frame) | function str.len(frame) | ||
return ulen(str._getParameters(frame.args, {"s"}).s or "") | |||
end | |||
return | |||
--[[ | |||
len_visible | |||
This function returns the length of the target string, excluding the text encompassed in < ... > | |||
Usage: exactly as len, above. | |||
]] | |||
function str.len_visible(frame) | |||
return ulen(ugsub(str._getParameters(frame.args, {"s"}).s or "", "<[^<>]+>", "")) | |||
end | end | ||
| Line 67: | Line 73: | ||
]] | ]] | ||
function str.sub(frame) | function str.sub(frame) | ||
local new_args = str._getParameters(frame.args, { | local new_args = str._getParameters(frame.args, {"s", "i", "j"}); | ||
local s = new_args[ | local s = new_args["s"] or ""; | ||
local i = tonumber(new_args[ | local i = tonumber(new_args["i"]) or 1; | ||
local j = tonumber(new_args[ | local j = tonumber(new_args["j"]) or -1; | ||
local len = | local len = ulen(s); | ||
-- Convert negatives for range checking | -- Convert negatives for range checking | ||
| Line 82: | Line 88: | ||
end | end | ||
if i > len or j > len | if i > len or j > len then | ||
return str._error('String subset index out of range'); | return str._error('String subset index out of range'); | ||
end | end | ||
| Line 89: | Line 95: | ||
end | end | ||
return | return usub(s, i, j) | ||
end | end | ||
| Line 99: | Line 105: | ||
local i = tonumber(frame.args.i) or 0 | local i = tonumber(frame.args.i) or 0 | ||
local len = tonumber(frame.args.len) | local len = tonumber(frame.args.len) | ||
return | return usub(frame.args.s, i + 1, len and (i + len)) | ||
end | end | ||
| Line 159: | Line 165: | ||
return str._error('Pattern string is empty'); | return str._error('Pattern string is empty'); | ||
end | end | ||
if math.abs(start) < 1 or math.abs(start) > | if math.abs(start) < 1 or math.abs(start) > ulen(s) then | ||
return str._error('Requested start is out of range'); | return str._error('Requested start is out of range'); | ||
end | end | ||
| Line 166: | Line 172: | ||
end | end | ||
if plain_flag then | if plain_flag then | ||
pattern = | pattern = pattern_escape(pattern); | ||
end | end | ||
| Line 172: | Line 178: | ||
if match_index == 1 then | if match_index == 1 then | ||
-- Find first match is simple case | -- Find first match is simple case | ||
result = | result = umatch(s, pattern, start) | ||
else | else | ||
if start > 1 then | if start > 1 then | ||
s = | s = usub(s, start); | ||
end | end | ||
local iterator = | local iterator = ugmatch(s, pattern); | ||
if match_index > 0 then | if match_index > 0 then | ||
-- Forward search | -- Forward search | ||
| Line 242: | Line 248: | ||
local pos = tonumber(new_args['pos']) or 0; | local pos = tonumber(new_args['pos']) or 0; | ||
if pos == 0 or math.abs(pos) > | if pos == 0 or math.abs(pos) > ulen(target_str) then | ||
return str._error('String index out of range'); | return str._error('String index out of range'); | ||
end | end | ||
return | return usub(target_str, pos, pos); | ||
end | end | ||
| Line 272: | Line 278: | ||
end | end | ||
local start = | local start = ufind(source_str, target_str, 1, true) | ||
if start == nil then | if start == nil then | ||
start = -1 | start = -1 | ||
| Line 310: | Line 316: | ||
]] | ]] | ||
function str.find(frame) | function str.find(frame) | ||
local | local main_param = {required = true, allow_empty = true} | ||
[1] = | return ufind(unpack(process_params(frame.args, { | ||
[2] = | [1] = main_param, | ||
[3] = { type = "number" }, | [2] = main_param, | ||
[4] = { type = "boolean" }, | [3] = {type = "number"}, | ||
} | [4] = {type = "boolean"}, | ||
}))) | |||
end | end | ||
| Line 356: | Line 359: | ||
if plain then | if plain then | ||
pattern = | pattern = pattern_escape(pattern); | ||
replace = | replace = replacement_escape(replace); | ||
end | end | ||
local result; | local result; | ||
result = | result = ugsub(source_str, pattern, replace, count); | ||
return result; | return result; | ||
| Line 369: | Line 372: | ||
function str.gsub(frame) | function str.gsub(frame) | ||
local | local main_param = {required = true, allow_empty = true, no_trim = true} | ||
return (ugsub(unpack(process_params(frame.args, { | |||
[ | [1] = main_param, | ||
[3] = | [2] = main_param, | ||
[4] = { type = "number" }, | [3] = main_param, | ||
} | [4] = {type = "number"}, | ||
})))) | |||
end | end | ||
| Line 395: | Line 395: | ||
function str.lower(frame) | function str.lower(frame) | ||
return ulower(frame.args[1] or "") | |||
end | end | ||
str.lc = str.lower | |||
--[[ | |||
format | |||
This function allows one to format strings according to a template. This is a direct interface onto | |||
str.format() in Lua, and works like the C printf() function. | |||
For example: | |||
{{#invoke:string|format|page_%04d.html|65}} | |||
will produce the result | |||
page_0065.html | |||
Parameters | |||
1: The format template. See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#string.format | |||
2, 3, ...: Arguments to be inserted into the template. | |||
str. | Note that leading and trailing whitespace is not removed from the arguments. | ||
]] | |||
function str.format(frame) | |||
local fmt = frame.args[1] | |||
-- You can't call unpack() directly on frame.args because it isn't really a | |||
-- table, and doesn't support the # operator. | |||
local args = {} | |||
local i = 2 | |||
while true do | |||
local val = frame.args[i] | |||
if not val then | |||
break | |||
end | |||
table.insert(args, val) | |||
i = i + 1 | |||
end | |||
return fmt:format(unpack(args)) | |||
end | |||
--[[ | --[[ | ||
| Line 465: | Line 497: | ||
end | end | ||
return boolean_value | return boolean_value | ||
end | end | ||
| Line 509: | Line 509: | ||
if plain then | if plain then | ||
pattern = | pattern = pattern_escape(pattern) | ||
end | end | ||
local _, count = | local _, count = ugsub(text, pattern, "") | ||
return count | return count | ||
end | end | ||
| Line 574: | Line 531: | ||
} | } | ||
local args = | local args = process_params(frame.args, params) | ||
text = args[1] | text = args[1] | ||
| Line 589: | Line 546: | ||
local matches = {} | local matches = {} | ||
local i = 0 | local i = 0 | ||
for match in | for match in ugmatch(text, pattern) do | ||
i = i + 1 | i = i + 1 | ||
matches[i] = match | matches[i] = match | ||
| Line 642: | Line 599: | ||
pattern = "(" .. pattern .. ")" | pattern = "(" .. pattern .. ")" | ||
end | end | ||
local find = use_basic_Lua_function and string.find or | local find = use_basic_Lua_function and string.find or ufind | ||
return function() | return function() | ||
i = i + 1 | i = i + 1 | ||
| Line 657: | Line 614: | ||
end | end | ||
function str. | function str.URIdecode(frame) | ||
return ( | return decode_uri(frame.args[1], frame.args[2] or "PATH") | ||
end | end | ||
function str | function str:__index(k) | ||
self[k] = package.loaders[2]("Module:string/" .. k)() | |||
return self[k] | |||
end | end | ||
return str | return setmetatable(str, str) | ||