Module:debug: Difference between revisions
Jump to navigation
Jump to search
(Created page with "local export = {} local escape do local escapes = { ["\a"] = "a", ["\b"] = "b", ["\f"] = "f", ["\n"] = "n", ["\r"] = "r", ["\t"] = "t", ["\v"] = "v", ["\\"] = "\\", ["\"...") |
No edit summary |
||
| (One intermediate revision by the same user not shown) | |||
| Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local escape | local string_utilities_module = "Module:string utilities" | ||
local table_module = "Module:table" | |||
local byte = string.byte | |||
local concat = table.concat | |||
local escape -- defined below | |||
local format = string.format | |||
local gsub = string.gsub | |||
local insert = table.insert | |||
local match = string.match | |||
local sub = string.sub | |||
local toNFC = mw.ustring.toNFC | |||
local function is_array(...) | |||
is_array = require(table_module).isArray | |||
return is_array(...) | |||
end | |||
local function isutf8(...) | |||
isutf8 = require(string_utilities_module).isutf8 | |||
return isutf8(...) | |||
end | |||
local function sorted_pairs(...) | |||
sorted_pairs = require(table_module).sortedPairs | |||
return sorted_pairs(...) | |||
end | |||
local function table_size(...) | |||
table_size = require(table_module).size | |||
return table_size(...) | |||
end | |||
do | do | ||
local escapes = { | local escapes | ||
local function get_escapes() | |||
escapes, get_escapes = { | |||
["\a"] = [[\a]], ["\b"] = [[\b]], ["\t"] = [[\t]], ["\n"] = [[\n]], | |||
["\v"] = [[\v]], ["\f"] = [[\f]], ["\r"] = [[\r]], ["\""] = [[\"]], | |||
["'"] = [[\']], ["\\"] = [[\\]], | |||
}, nil | |||
return escapes | |||
end | |||
local function escape_byte(ch) | |||
return (escapes or get_escapes())[ch] or format("\\%03d", byte(ch)) | |||
end | |||
local function | local function escape_bytes(ch) | ||
return | return (gsub(ch, ".", escape_byte)) | ||
or ( | end | ||
local function escape_char(ch) | |||
local ch_len = #ch | |||
if ch_len == 1 then | |||
return escape_byte(ch) | |||
end | |||
local b = byte(ch) | |||
-- Matching bytes below \128 are all to be escaped, \128 to \191 can't | |||
-- be leading bytes in UTF-8, \192 and \193 could only occur in overlong | |||
-- encodings, so can't occur in UTF-8, U+0080 (\194\128) to U+009F | |||
-- (\194\159) are control characters, U+00A0 (\194\160) is the no-break | |||
-- space, and \245 to \255 could only occur in encodings for codepoints | |||
-- above U+10FFFF, so can't occur in UTF-8. | |||
if b < 194 or b > 244 or (b == 194 and byte(ch, 2) < 161) then | |||
return escape_bytes(ch) | |||
-- 2-byte encodings starting \194 to \223 are all valid, so no need to | |||
-- check them with isutf8(). If there are additional trailing | |||
-- bytes, escape them. | |||
elseif b < 224 then | |||
return ch_len == 2 and ch or (sub(ch, 1, 2) .. escape_bytes(sub(ch, 3))) | |||
end | |||
-- Check 3- and 4-byte encodings with isutf8(), as they might be | |||
-- invalid due to overlong encodings or being above U+10FFFF. As above, | |||
-- escape any additional trailing bytes. | |||
local n = b < 240 and 3 or 4 | |||
if ch_len == n then | |||
return isutf8(ch) and ch or escape_bytes(ch) | |||
elseif ch_len > n then | |||
local init_ch = sub(ch, 1, n) | |||
if isutf8(init_ch) then | |||
return init_ch .. escape_bytes(sub(ch, n + 1)) | |||
end | |||
end | |||
return escape_bytes(ch) | |||
end | end | ||
-- | local function escape_non_NFC(str) | ||
local normalized = toNFC(str) | |||
if normalized == str then | |||
return str | |||
end | |||
-- | local str_len, i, start, offset, output = #str, 1, 1, 0 | ||
escape | while i <= str_len do | ||
return ( | local b = byte(str, i) | ||
if b == byte(normalized, i + offset) then | |||
i = i + 1 | |||
else | |||
if output == nil then | |||
output = {} | |||
end | |||
-- Backtrack to the start of the character. | |||
while b >= 128 and b < 192 do | |||
i = i - 1 | |||
b = byte(str, i) | |||
end | |||
-- Insert any intermediate characters up to this point. | |||
if start ~= i then | |||
insert(output, sub(str, start, i - 1)) | |||
end | |||
-- Get the first character, then find the sequence of characters | |||
-- which differs from the normalized string. | |||
local seq = match(str, "^.[\128-\191]*", i) | |||
-- Find the raw sequence and the normalized sequence by adding | |||
-- a character at a time to the raw sequence, and checking if | |||
-- it matches the current point in the normalized string. | |||
-- This is necessary to ensure that the offset between the two | |||
-- strings is correct, when comparing equivalent sections. | |||
local seq_len, poss_seq, norm_seq = #seq, seq | |||
while true do | |||
if not norm_seq then | |||
norm_seq = match(normalized, "^" .. toNFC(poss_seq), i + offset) | |||
-- Once a matching sequence has been found, check if it's | |||
-- still possible to match the same normalized sequence with | |||
-- a longer raw sequence, as form NFC will have taken the | |||
-- longest sequence when normalizing the input. | |||
elseif toNFC(poss_seq) ~= norm_seq then | |||
break | |||
end | |||
seq, seq_len = poss_seq, #poss_seq | |||
local nxt_ch = match(str, "^.[\128-\191]*", i + seq_len) | |||
if nxt_ch == nil then | |||
break | |||
end | |||
poss_seq = poss_seq .. nxt_ch | |||
end | |||
-- Modify the offset to account for the difference in length | |||
-- between the two sequences. Usually, the NFC form will be | |||
-- shorter, but in rare cases it is longer (e.g. U+0F73 | |||
-- normalizes to U+0F71 + U+0F72). | |||
offset = offset + #norm_seq - seq_len | |||
i = i + seq_len | |||
start = i | |||
-- Escape the non-ASCII portion of the sequence. This ensures | |||
-- that escapes added by escape_char don't end up double-escaped | |||
-- if they would otherwise be modified by form NFC; e.g. "\n" + | |||
-- U+0303 ("\ñ") needs to avoid escaping the "n". | |||
if seq ~= "" then | |||
insert(output, (gsub(seq, "[\128-\255]", escape_byte))) | |||
end | |||
end | |||
end | |||
if output == nil then | |||
return str | |||
end | |||
insert(output, sub(str, start)) | |||
return concat(output) | |||
end | |||
-- Escapes control characters, backslash, double quote, the no-break space, | |||
-- bytes that aren't used in UTF-8, invalid UTF-8 character sequences, and | |||
-- any bytes necessary to ensure that the output is Unicode form NFC, | |||
-- because MediaWiki automatically converts page content to form NFC; e.g. | |||
-- "e" + U+0301 ("é") results in "e\204\129", because otherwise the sequence | |||
-- would be converted to "é" (U+00E9)); this ensures that results can be | |||
-- relied upon to be stable if saved as part of page content. | |||
function export.escape(str) | |||
return escape_non_NFC(gsub(str, "[%c\"'\\\128-\255][\128-\191]*", escape_char)) | |||
end | end | ||
escape = export.escape | |||
end | end | ||
-- Convert a value to a string | -- Convert a value to a string | ||
| Line 37: | Line 184: | ||
local str_table = {} | local str_table = {} | ||
insert(str_table, " {") | |||
for key, val in | for key, val in sorted_pairs(value, tsort) do | ||
insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. gsub(export.dump(val, prefix .. "\t"), "^ ", "") .. ",") | |||
end | end | ||
insert(str_table, " " .. prefix .. "}") | |||
return | return concat(str_table, "\n") | ||
else | else | ||
return tostring(value) | return tostring(value) | ||
| Line 60: | Line 207: | ||
-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag). | -- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag). | ||
dump = | dump = gsub(dump, "%f[^%z\n] ", "") | ||
return export.highlight(dump) | return export.highlight(dump) | ||
| Line 69: | Line 215: | ||
-- Returns true if table contains a table as one of its values | -- Returns true if table contains a table as one of its values | ||
local function containsTable(t) | local function containsTable(t) | ||
for | for _, value in pairs(t) do | ||
if type(value) == "table" then | if type(value) == "table" then | ||
return true | return true | ||
| Line 79: | Line 225: | ||
local function containsTablesWithSize(t, size) | local function containsTablesWithSize(t, size) | ||
for | for _, value in pairs(t) do | ||
if type(value) == "table" and | if type(value) == "table" and table_size(value) ~= size then | ||
return false | return false | ||
end | end | ||
| Line 105: | Line 251: | ||
local containsTable = containsTable(value) | local containsTable = containsTable(value) | ||
local consecutive = | local consecutive = is_array(value) | ||
if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then | if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then | ||
insert(str_table, "{") | |||
for key, val in | for key, val in sorted_pairs(value, tsort) do | ||
if containsTable then | if containsTable then | ||
insert(str_table, "\n\t" .. prefix) | |||
else | else | ||
insert(str_table, " ") | |||
end | end | ||
if type(key) == "string" then | if type(key) == "string" then | ||
insert(str_table, "[" .. export.modified_dump(key) .. "] = ") | |||
end | end | ||
insert(str_table, type(key) == "number" and type(val) == "number" and format("0x%05X", val) or export.modified_dump(val)) | |||
if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then | if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then | ||
insert(str_table, ",") | |||
end | end | ||
end | end | ||
if containsTable then | if containsTable then | ||
insert(str_table, "\n" .. prefix) | |||
else | else | ||
insert(str_table, " ") | |||
end | end | ||
insert(str_table, "}") | |||
return | return concat(str_table) | ||
end | end | ||
insert(str_table, " {") | |||
for key, val in | for key, val in sorted_pairs(value, tsort) do | ||
insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. gsub(export.modified_dump(val, prefix .. "\t"), "^ ", "") .. ",") | |||
end | end | ||
insert(str_table, " " .. prefix .. "}") | |||
return | return concat(str_table, "\n") | ||
elseif t == "number" and value > 46 then | elseif t == "number" and value > 46 then | ||
return | return format("0x%05X", value) | ||
else | else | ||
return tostring(value) | return tostring(value) | ||
end | end | ||
end | end | ||
export.track = require("Module:debug/track") | |||
| Line 189: | Line 322: | ||
} | } | ||
return function(content) | return function(content) | ||
return mw.getCurrentFrame():extensionTag | return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, options) | ||
end | end | ||
else | else | ||
return mw.getCurrentFrame():extensionTag | return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, { | ||
lang = options and options.lang or "lua", | |||
inline = options and options.inline and true or nil | |||
}) | |||
} | |||
end | end | ||
end | end | ||
| Line 212: | Line 337: | ||
end | end | ||
track("unrecognized arg") | |||
local arg_list = {} | local arg_list = {} | ||
for arg, value in pairs(args) do | for arg, value in pairs(args) do | ||
track("unrecognized arg/" .. arg) | track("unrecognized arg/" .. arg) | ||
insert(arg_list, format("|%s=%s", arg, value)) | |||
end | |||
mw.log(format("Unrecognized parameter%s in {{%s}}: %s.", | |||
arg_list[2] and "s" or "", template_name, concat(arg_list, ", ") | |||
)) | |||
end | |||
do | |||
local placeholder = "_message_" | |||
function export._placeholder_error(frame) | |||
-- A dummy function that throws an error with a placeholder message. | |||
error(placeholder, (frame.args.level or 1) + 6) | |||
end | end | ||
mw. | -- Throw an error via callParserFunction, which generates a real error with traceback, automatic categorization in [[CAT:E]] etc., but the error message is returned as a string. Then, replace the placeholder error message with `message`, which is preprocessed. This is necessary when preprocessing needs to be applied (e.g. when using <pre> tags), since otherwise strip markers and other half-processed text gets displayed instead. | ||
( | function export.formatted_error(message, level) | ||
local frame = mw.getCurrentFrame() | |||
return (frame:callParserFunction("#invoke", {"debug", "_placeholder_error", level = level}) | |||
:gsub(placeholder, frame:preprocess(message))) | |||
end | |||
end | end | ||
return export | return export | ||
Latest revision as of 15:19, 15 November 2025
- The following documentation is located at Module:debug/doc.[edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local byte = string.byte
local concat = table.concat
local escape -- defined below
local format = string.format
local gsub = string.gsub
local insert = table.insert
local match = string.match
local sub = string.sub
local toNFC = mw.ustring.toNFC
local function is_array(...)
is_array = require(table_module).isArray
return is_array(...)
end
local function isutf8(...)
isutf8 = require(string_utilities_module).isutf8
return isutf8(...)
end
local function sorted_pairs(...)
sorted_pairs = require(table_module).sortedPairs
return sorted_pairs(...)
end
local function table_size(...)
table_size = require(table_module).size
return table_size(...)
end
do
local escapes
local function get_escapes()
escapes, get_escapes = {
["\a"] = [[\a]], ["\b"] = [[\b]], ["\t"] = [[\t]], ["\n"] = [[\n]],
["\v"] = [[\v]], ["\f"] = [[\f]], ["\r"] = [[\r]], ["\""] = [[\"]],
["'"] = [[\']], ["\\"] = [[\\]],
}, nil
return escapes
end
local function escape_byte(ch)
return (escapes or get_escapes())[ch] or format("\\%03d", byte(ch))
end
local function escape_bytes(ch)
return (gsub(ch, ".", escape_byte))
end
local function escape_char(ch)
local ch_len = #ch
if ch_len == 1 then
return escape_byte(ch)
end
local b = byte(ch)
-- Matching bytes below \128 are all to be escaped, \128 to \191 can't
-- be leading bytes in UTF-8, \192 and \193 could only occur in overlong
-- encodings, so can't occur in UTF-8, U+0080 (\194\128) to U+009F
-- (\194\159) are control characters, U+00A0 (\194\160) is the no-break
-- space, and \245 to \255 could only occur in encodings for codepoints
-- above U+10FFFF, so can't occur in UTF-8.
if b < 194 or b > 244 or (b == 194 and byte(ch, 2) < 161) then
return escape_bytes(ch)
-- 2-byte encodings starting \194 to \223 are all valid, so no need to
-- check them with isutf8(). If there are additional trailing
-- bytes, escape them.
elseif b < 224 then
return ch_len == 2 and ch or (sub(ch, 1, 2) .. escape_bytes(sub(ch, 3)))
end
-- Check 3- and 4-byte encodings with isutf8(), as they might be
-- invalid due to overlong encodings or being above U+10FFFF. As above,
-- escape any additional trailing bytes.
local n = b < 240 and 3 or 4
if ch_len == n then
return isutf8(ch) and ch or escape_bytes(ch)
elseif ch_len > n then
local init_ch = sub(ch, 1, n)
if isutf8(init_ch) then
return init_ch .. escape_bytes(sub(ch, n + 1))
end
end
return escape_bytes(ch)
end
local function escape_non_NFC(str)
local normalized = toNFC(str)
if normalized == str then
return str
end
local str_len, i, start, offset, output = #str, 1, 1, 0
while i <= str_len do
local b = byte(str, i)
if b == byte(normalized, i + offset) then
i = i + 1
else
if output == nil then
output = {}
end
-- Backtrack to the start of the character.
while b >= 128 and b < 192 do
i = i - 1
b = byte(str, i)
end
-- Insert any intermediate characters up to this point.
if start ~= i then
insert(output, sub(str, start, i - 1))
end
-- Get the first character, then find the sequence of characters
-- which differs from the normalized string.
local seq = match(str, "^.[\128-\191]*", i)
-- Find the raw sequence and the normalized sequence by adding
-- a character at a time to the raw sequence, and checking if
-- it matches the current point in the normalized string.
-- This is necessary to ensure that the offset between the two
-- strings is correct, when comparing equivalent sections.
local seq_len, poss_seq, norm_seq = #seq, seq
while true do
if not norm_seq then
norm_seq = match(normalized, "^" .. toNFC(poss_seq), i + offset)
-- Once a matching sequence has been found, check if it's
-- still possible to match the same normalized sequence with
-- a longer raw sequence, as form NFC will have taken the
-- longest sequence when normalizing the input.
elseif toNFC(poss_seq) ~= norm_seq then
break
end
seq, seq_len = poss_seq, #poss_seq
local nxt_ch = match(str, "^.[\128-\191]*", i + seq_len)
if nxt_ch == nil then
break
end
poss_seq = poss_seq .. nxt_ch
end
-- Modify the offset to account for the difference in length
-- between the two sequences. Usually, the NFC form will be
-- shorter, but in rare cases it is longer (e.g. U+0F73
-- normalizes to U+0F71 + U+0F72).
offset = offset + #norm_seq - seq_len
i = i + seq_len
start = i
-- Escape the non-ASCII portion of the sequence. This ensures
-- that escapes added by escape_char don't end up double-escaped
-- if they would otherwise be modified by form NFC; e.g. "\n" +
-- U+0303 ("\ñ") needs to avoid escaping the "n".
if seq ~= "" then
insert(output, (gsub(seq, "[\128-\255]", escape_byte)))
end
end
end
if output == nil then
return str
end
insert(output, sub(str, start))
return concat(output)
end
-- Escapes control characters, backslash, double quote, the no-break space,
-- bytes that aren't used in UTF-8, invalid UTF-8 character sequences, and
-- any bytes necessary to ensure that the output is Unicode form NFC,
-- because MediaWiki automatically converts page content to form NFC; e.g.
-- "e" + U+0301 ("é") results in "e\204\129", because otherwise the sequence
-- would be converted to "é" (U+00E9)); this ensures that results can be
-- relied upon to be stable if saved as part of page content.
function export.escape(str)
return escape_non_NFC(gsub(str, "[%c\"'\\\128-\255][\128-\191]*", escape_char))
end
escape = export.escape
end
-- Convert a value to a string
function export.dump(value, prefix, tsort)
local t = type(value)
prefix = prefix or ""
if t == "string" then
return '"' .. escape(value) .. '"'
elseif t == "table" then
local str_table = {}
insert(str_table, " {")
for key, val in sorted_pairs(value, tsort) do
insert(str_table, " " .. prefix .. "\t[" .. export.dump(key, prefix .. "\t") .. "] = " .. gsub(export.dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
insert(str_table, " " .. prefix .. "}")
return concat(str_table, "\n")
else
return tostring(value)
end
end
function export.highlight_dump(value, prefix, tsort, options)
options = options or {}
local func = options.modified and "modified_dump" or "dump"
local dump = export[func](value, prefix, tsort)
-- Remove spaces at beginnings of lines (which are simply to force a <pre></pre> tag).
dump = gsub(dump, "%f[^%z\n] ", "")
return export.highlight(dump)
end
-- Returns true if table contains a table as one of its values
local function containsTable(t)
for _, value in pairs(t) do
if type(value) == "table" then
return true
end
end
return false
end
local function containsTablesWithSize(t, size)
for _, value in pairs(t) do
if type(value) == "table" and table_size(value) ~= size then
return false
end
end
return true
end
--[=[
Convert a value to a string.
Like dump below, but if a table has consecutive numbered keys and does not
have a table as one of its values, it will be placed on a single line.
Used by [[Module:User:Erutuon/script recognition]].
]=]
function export.modified_dump(value, prefix, tsort)
local t = type(value)
prefix = prefix or ""
if t == "string" then
return '"' .. value .. '"'
elseif t == "table" then
local str_table = {}
local containsTable = containsTable(value)
local consecutive = is_array(value)
if consecutive and not containsTable or containsTable and containsTablesWithSize(value, 3) then
insert(str_table, "{")
for key, val in sorted_pairs(value, tsort) do
if containsTable then
insert(str_table, "\n\t" .. prefix)
else
insert(str_table, " ")
end
if type(key) == "string" then
insert(str_table, "[" .. export.modified_dump(key) .. "] = ")
end
insert(str_table, type(key) == "number" and type(val) == "number" and format("0x%05X", val) or export.modified_dump(val))
if not (consecutive and #value == 3) or type(key) == "number" and value[key + 1] then
insert(str_table, ",")
end
end
if containsTable then
insert(str_table, "\n" .. prefix)
else
insert(str_table, " ")
end
insert(str_table, "}")
return concat(str_table)
end
insert(str_table, " {")
for key, val in sorted_pairs(value, tsort) do
insert(str_table, " " .. prefix .. "\t[" .. export.modified_dump(key, prefix .. "\t") .. "] = " .. gsub(export.modified_dump(val, prefix .. "\t"), "^ ", "") .. ",")
end
insert(str_table, " " .. prefix .. "}")
return concat(str_table, "\n")
elseif t == "number" and value > 46 then
return format("0x%05X", value)
else
return tostring(value)
end
end
export.track = require("Module:debug/track")
-- Trigger a script error from a template
function export.error(frame)
error(frame.args[1] or "(no message specified)")
end
--[[
Convenience function for generating syntaxhighlight tags.
Display defaults to block.
Options is a table. To display inline text with HTML highlighting:
{ inline = true, lang = "html" }
]]
function export.highlight(content, options)
if type(content) == "table" then
options = content
options = {
lang = options.lang or "lua",
inline = options.inline and true
}
return function(content)
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, options)
end
else
return mw.getCurrentFrame():extensionTag("syntaxhighlight", content, {
lang = options and options.lang or "lua",
inline = options and options.inline and true or nil
})
end
end
function export.track_unrecognized_args(args, template_name)
local function track(code)
export.track(template_name .. "/" .. code)
end
track("unrecognized arg")
local arg_list = {}
for arg, value in pairs(args) do
track("unrecognized arg/" .. arg)
insert(arg_list, format("|%s=%s", arg, value))
end
mw.log(format("Unrecognized parameter%s in {{%s}}: %s.",
arg_list[2] and "s" or "", template_name, concat(arg_list, ", ")
))
end
do
local placeholder = "_message_"
function export._placeholder_error(frame)
-- A dummy function that throws an error with a placeholder message.
error(placeholder, (frame.args.level or 1) + 6)
end
-- Throw an error via callParserFunction, which generates a real error with traceback, automatic categorization in [[CAT:E]] etc., but the error message is returned as a string. Then, replace the placeholder error message with `message`, which is preprocessed. This is necessary when preprocessing needs to be applied (e.g. when using <pre> tags), since otherwise strip markers and other half-processed text gets displayed instead.
function export.formatted_error(message, level)
local frame = mw.getCurrentFrame()
return (frame:callParserFunction("#invoke", {"debug", "_placeholder_error", level = level})
:gsub(placeholder, frame:preprocess(message)))
end
end
return export