Module:string: Difference between revisions

1,329 bytes removed ,  22 January 2025
no edit summary
(Created page with "local str = {} -- Cannot include null byte. local UTF8_char = "[\1-\127\194-\244][\128-\191]*" --[[ ulen Counts UTF-8 characters. Faster than mw.ustring.len. Assumes that...")
 
No edit summary
 
(3 intermediate revisions by the same user not shown)
Line 1: Line 1:
local m_string_utils = require("Module:string utilities")
local str = {}
local str = {}


-- Cannot include null byte.
local decode_uri = m_string_utils.decode_uri
local UTF8_char = "[\1-\127\194-\244][\128-\191]*"
local gsub = string.gsub
 
local pattern_escape = m_string_utils.pattern_escape
--[[
local process_params = require("Module:parameters").process
ulen
local replacement_escape = m_string_utils.replacement_escape
 
local reverse = string.reverse
Counts UTF-8 characters. Faster than mw.ustring.len.
local ufind = m_string_utils.find
 
local ugmatch = m_string_utils.gmatch
Assumes that the encoding is correct. Unlike mw.ustring.len, does not return nil
local ugsub = m_string_utils.gsub
if encoding is invalid.
local ulen = m_string_utils.len
 
local ulower = m_string_utils.lower
Does not count the bytes 192, 193, and 245-255. They are not used in UTF-8 and
local umatch = m_string_utils.match
will not occur if the string is valid. They are replaced with the replacement
local unpack = unpack
character (U+FFFD) on MediaWiki pages.
local usub = m_string_utils.sub
--]]
function str.ulen(text)
local _, length = string.gsub(text, UTF8_char, "")
return length
end


--[[
--[[
Line 38: Line 35:
]]
]]
function str.len(frame)
function str.len(frame)
local new_args = str._getParameters(frame.args, { 's' });
return ulen(str._getParameters(frame.args, {"s"}).s or "")
local s = new_args['s'] or '';
end
return mw.ustring.len(s)
 
--[[
len_visible
 
This function returns the length of the target string, excluding the text encompassed in < ... >
 
Usage: exactly as len, above.
]]
function str.len_visible(frame)
return ulen(ugsub(str._getParameters(frame.args, {"s"}).s or "", "<[^<>]+>", ""))
end
end


Line 67: Line 73:
]]
]]
function str.sub(frame)
function str.sub(frame)
local new_args = str._getParameters(frame.args, { 's', 'i', 'j' });
local new_args = str._getParameters(frame.args, {"s", "i", "j"});
local s = new_args['s'] or '';
local s = new_args["s"] or "";
local i = tonumber(new_args['i']) or 1;
local i = tonumber(new_args["i"]) or 1;
local j = tonumber(new_args['j']) or -1;
local j = tonumber(new_args["j"]) or -1;
local len = mw.ustring.len(s);
local len = ulen(s);
-- Convert negatives for range checking
-- Convert negatives for range checking
Line 82: Line 88:
end
end
if i > len or j > len or i < 1 or j < 1 then
if i > len or j > len then
return str._error('String subset index out of range');
return str._error('String subset index out of range');
end
end
Line 89: Line 95:
end
end
return mw.ustring.sub(s, i, j)
return usub(s, i, j)
end
end


Line 99: Line 105:
local i = tonumber(frame.args.i) or 0
local i = tonumber(frame.args.i) or 0
local len = tonumber(frame.args.len)
local len = tonumber(frame.args.len)
return mw.ustring.sub(frame.args.s, i + 1, len and (i + len))
return usub(frame.args.s, i + 1, len and (i + len))
end
end


Line 159: Line 165:
return str._error('Pattern string is empty');
return str._error('Pattern string is empty');
end
end
if math.abs(start) < 1 or math.abs(start) > mw.ustring.len(s) then
if math.abs(start) < 1 or math.abs(start) > ulen(s) then
return str._error('Requested start is out of range');
return str._error('Requested start is out of range');
end
end
Line 166: Line 172:
end
end
if plain_flag then
if plain_flag then
pattern = str.pattern_escape(pattern);
pattern = pattern_escape(pattern);
end
end
Line 172: Line 178:
if match_index == 1 then
if match_index == 1 then
-- Find first match is simple case
-- Find first match is simple case
result = mw.ustring.match(s, pattern, start)
result = umatch(s, pattern, start)
else
else
if start > 1 then
if start > 1 then
s = mw.ustring.sub(s, start);
s = usub(s, start);
end
end
local iterator = mw.ustring.gmatch(s, pattern);
local iterator = ugmatch(s, pattern);
if match_index > 0 then
if match_index > 0 then
-- Forward search
-- Forward search
Line 242: Line 248:
local pos = tonumber(new_args['pos']) or 0;
local pos = tonumber(new_args['pos']) or 0;
if pos == 0 or math.abs(pos) > mw.ustring.len(target_str) then
if pos == 0 or math.abs(pos) > ulen(target_str) then
return str._error('String index out of range');
return str._error('String index out of range');
end
end
return mw.ustring.sub(target_str, pos, pos);
return usub(target_str, pos, pos);
end
end


Line 272: Line 278:
end
end
local start = mw.ustring.find(source_str, target_str, 1, true)
local start = ufind(source_str, target_str, 1, true)
if start == nil then
if start == nil then
start = -1
start = -1
Line 310: Line 316:
]]
]]
function str.find(frame)
function str.find(frame)
local params = {
local main_param = {required = true, allow_empty = true}
[1] = { required = true },
return ufind(unpack(process_params(frame.args, {
[2] = { required = true },
[1] = main_param,
[3] = { type = "number" },
[2] = main_param,
[4] = { type = "boolean" },
[3] = {type = "number"},
}
[4] = {type = "boolean"},
})))
local args = require("Module:parameters").process(frame.args, params)
return mw.ustring.find(args[1], args[2], args[3], args[4])
end
end


Line 356: Line 359:
if plain then
if plain then
pattern = str.pattern_escape(pattern);
pattern = pattern_escape(pattern);
replace = mw.ustring.gsub(replace, "%%", "%%%%"); --Only need to escape replacement sequences.
replace = replacement_escape(replace);
end
end
local result;
local result;
result = mw.ustring.gsub(source_str, pattern, replace, count);
result = ugsub(source_str, pattern, replace, count);
return result;
return result;
Line 369: Line 372:


function str.gsub(frame)
function str.gsub(frame)
local params = {
local main_param = {required = true, allow_empty = true, no_trim = true}
[1] = { required = true, allow_empty = true, allow_whitespace = true},
return (ugsub(unpack(process_params(frame.args, {
[2] = { required = true, allow_empty = true, allow_whitespace = true},
[1] = main_param,
[3] = { required = true, allow_empty = true, allow_whitespace = true},
[2] = main_param,
[4] = { type = "number" },
[3] = main_param,
}
[4] = {type = "number"},
}))))
local args = require("Module:parameters").process(frame.args, params)
return (mw.ustring.gsub(args[1], args[2], args[3], args[4]))
end
end


Line 395: Line 395:


function str.lower(frame)
function str.lower(frame)
local text = frame.args[1] or ''
return ulower(frame.args[1] or "")
return mw.ustring.lower(text)
end
end
str.lc = str.lower
--[[
format
This function allows one to format strings according to a template. This is a direct interface onto
str.format() in Lua, and works like the C printf() function.
For example:
{{#invoke:string|format|page_%04d.html|65}}
will produce the result
page_0065.html
Parameters
    1: The format template. See https://www.mediawiki.org/wiki/Extension:Scribunto/Lua_reference_manual#string.format
    2, 3, ...: Arguments to be inserted into the template.


str.lc = str.lower
Note that leading and trailing whitespace is not removed from the arguments.
]]
function str.format(frame)
local fmt = frame.args[1]
-- You can't call unpack() directly on frame.args because it isn't really a
-- table, and doesn't support the # operator.
local args = {}
local i = 2
while true do
local val = frame.args[i]
if not val then
break
end
table.insert(args, val)
i = i + 1
end
return fmt:format(unpack(args))
end


--[[
--[[
Line 465: Line 497:
end
end
return boolean_value
return boolean_value
end
--[[
Helper function that escapes all pattern characters – ().%+-*?[^$] – so that they will be treated
as plain text.
]]
function str.pattern_escape(pattern_str)
local invoked = false
if type(pattern_str) == "table" then
if pattern_str.args then
local frame = pattern_str
invoked = true
if frame.args[1] then
pattern_str = frame.args[1]
else
pattern_str = frame:getParent().args[1]
end
else
error("First argument to pattern_escape should be a string, a number, or a frame object.")
end
elseif not (type(pattern_str) == "string" or type(pattern_str) == "number") then
error("First argument to pattern_escape should be a string or a number.")
end
if invoked then
local escaped = mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1")
return escaped
else
return mw.ustring.gsub(pattern_str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1");
end
end
end


Line 509: Line 509:
if plain then
if plain then
pattern = str.pattern_escape(pattern)
pattern = pattern_escape(pattern)
end
end
local _, count = mw.ustring.gsub(text, pattern, "")
local _, count = ugsub(text, pattern, "")
return count
return count
end
function str.plain_gsub(text, pattern, replacement)
local invoked = false
if type(text) == "table" then
invoked = true
if text.args then
local frame = text
local params = {
[1] = {},
[2] = {},
[3] = { allow_empty = true },
}
local args = require("Module:parameters").process(frame.args, params)
text = args[1]
pattern = args[2]
replacement = args[3]
else
error("If the first argument to plain_gsub is a table, it should be a frame object.")
end
else
if not (type(pattern) == "string" or type(pattern) == "number") then
error("The second argument to plain_gsub should be a string or a number.")
end
if not (type(replacement) == "string" or type(replacement) == "number") then
error("The third argument to plain_gsub should be a string or a number.")
end
end
pattern = str.pattern_escape(pattern)
if invoked then
text = mw.ustring.gsub(text, pattern, replacement)
return text
else
return mw.ustring.gsub(text, pattern, replacement)
end
end
end


Line 574: Line 531:
}
}
local args = require("Module:parameters").process(frame.args, params)
local args = process_params(frame.args, params)
text = args[1]
text = args[1]
Line 589: Line 546:
local matches = {}
local matches = {}
local i = 0
local i = 0
for match in mw.ustring.gmatch(text, pattern) do
for match in ugmatch(text, pattern) do
i = i + 1
i = i + 1
matches[i] = match
matches[i] = match
Line 642: Line 599:
pattern = "(" .. pattern .. ")"
pattern = "(" .. pattern .. ")"
end
end
local find = use_basic_Lua_function and string.find or mw.ustring.find
local find = use_basic_Lua_function and string.find or ufind
return function()
return function()
i = i + 1
i = i + 1
Line 657: Line 614:
end
end


function str.escapebytes(s)
function str.URIdecode(frame)
return (string.gsub(s,
return decode_uri(frame.args[1], frame.args[2] or "PATH")
'.',
function(char)
return ('\\%03d'):format(string.byte(char))
end))
end
end


function str.URIdecode(frame)
function str:__index(k)
return mw.uri.decode(frame.args[1], frame.args[2] or "PATH")
self[k] = package.loaders[2]("Module:string/" .. k)()
return self[k]
end
end


return str
return setmetatable(str, str)