Module:utilities: Difference between revisions
Jump to navigation
Jump to search
Tag: Undo |
No edit summary Tag: Reverted |
||
Line 5: | Line 5: | ||
local neededhassubpage = data.neededhassubpage | local neededhassubpage = data.neededhassubpage | ||
-- A helper function to escape magic characters in a string | function export.require_when_needed(text) | ||
return setmetatable({}, { | |||
__index = function(t, k) | |||
t = require(text) | |||
return t[k] | |||
end, | |||
__call = function(t, ...) | |||
t = require(text) | |||
return t(...) | |||
end | |||
}) | |||
end | |||
-- A helper function to escape magic characters in a string. | |||
-- Magic characters: ^$()%.[]*+-? | -- Magic characters: ^$()%.[]*+-? | ||
function export.pattern_escape(text) | function export.pattern_escape(text) | ||
Line 11: | Line 24: | ||
text = text.args[1] | text = text.args[1] | ||
end | end | ||
text = mw.ustring.gsub(text, "([%^ | return (text:gsub("([%^$()%%.%[%]*+%-?])", "%%%1")) | ||
return text | end | ||
-- A helper function to resolve HTML entities into plaintext. | |||
-- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory. | |||
function export.get_entities(text) | |||
local entities | |||
return (text:gsub("&[#%w]-;", function(entity) | |||
-- Check if mw.text.decode is able to decode the entity. | |||
if entity:find("^&#") or | |||
entity == "<" or | |||
entity == ">" or | |||
entity == "&" or | |||
entity == """ or | |||
entity == " " | |||
then | |||
return mw.text.decode(entity) | |||
else | |||
-- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode. | |||
-- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway. | |||
entities = entities or require("Module:utilities/data/entities") | |||
local pattern = entity .. "(%Z+)" | |||
return entities:match(pattern) | |||
end | |||
end)) | |||
end | |||
-- A helper function to convert plaintext into HTML entities where these match the characters given in set. | |||
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter. | |||
function export.make_entities(text, set, raw) | |||
text = not raw and export.get_entities(text) or text | |||
return mw.text.encode(text, set) | |||
end | |||
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page. | |||
function export.get_plaintext(text) | |||
local u = mw.ustring.char | |||
text = text | |||
:gsub("%[%[", "\1") | |||
:gsub("%]%]", "\2") | |||
-- Remove strip markers and HTML tags. | |||
text = mw.text.unstrip(text) | |||
:gsub("<[^<>\1\2]+>", "") | |||
-- Parse internal links for the display text, and remove categories. | |||
text = require("Module:links").remove_links(text) | |||
-- Remove files. | |||
for _, falsePositive in ipairs({"File", "Image"}) do | |||
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "") | |||
end | |||
-- Parse external links for the display text. | |||
text = text:gsub("%[(https?://[^%[%]]+)%]", | |||
function(capture) | |||
return capture:match("https?://[^%s%]]+%s([^%]]+)") or "" | |||
end) | |||
text = text | |||
:gsub("\1", "[[") | |||
:gsub("\2", "]]") | |||
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links. | |||
text = text:gsub("[%[%]]", mw.text.nowiki) | |||
-- Strip bold, italics and soft hyphens. | |||
text = text | |||
:gsub("('*)'''(.-'*)'''", "%1%2") | |||
:gsub("('*)''(.-'*)''", "%1%2") | |||
:gsub("", "") | |||
-- Get any HTML entities. | |||
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used. | |||
text = export.get_entities(text) | |||
return mw.text.trim(text) | |||
end | end | ||
Line 30: | Line 118: | ||
} | } | ||
local args = require("Module:parameters").process(frame.args, params) | local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub") | ||
text = args[1] | text = args[1] | ||
Line 50: | Line 138: | ||
pattern = export.pattern_escape(pattern) | pattern = export.pattern_escape(pattern) | ||
local gsub = require("Module:string utilities").gsub | |||
if invoked then | if invoked then | ||
return (gsub(text, pattern, replacement)) | |||
else | else | ||
return | return gsub(text, pattern, replacement) | ||
end | end | ||
end | end | ||
Line 79: | Line 167: | ||
]] | ]] | ||
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) | function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) | ||
if type(lang) == "table" and not lang.getCode then | if type(lang) == "table" and not lang.getCode then | ||
error("The second argument to format_categories should be a language object.") | error("The second argument to format_categories should be a language object.") | ||
end | end | ||
if force_output or | local title_obj = mw.title.getCurrentTitle() | ||
local PAGENAME = | local allowedNamespaces = { | ||
local SUBPAGENAME = | [0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction | ||
} | |||
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then | |||
local PAGENAME = title_obj.text | |||
local SUBPAGENAME = title_obj.subpageText | |||
if not lang then | if not lang then | ||
Line 93: | Line 184: | ||
end | end | ||
-- Generate a default sort key | -- Generate a default sort key. | ||
sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc) | local upper = require("Module:string utilities").upper | ||
if sort_key ~= "-" then | |||
-- Determine the intended title if the page is an unsupported title. | |||
local unsupported; SUBPAGENAME, unsupported = SUBPAGENAME:gsub("^Unsupported titles/", "") | |||
if unsupported > 0 then | |||
for title, page in pairs(mw.loadData("Module:links/data").unsupported_titles) do | |||
if page == SUBPAGENAME then | |||
SUBPAGENAME = title | |||
break | |||
end | |||
end | |||
end | |||
sort_base = (lang:makeSortKey(sort_base or SUBPAGENAME, sc)) | |||
if sort_key and sort_key ~= "" then | |||
-- Gather some statistics regarding sort keys | |||
if upper(sort_key) == sort_base then | |||
table.insert(categories, "Sort key tracking/redundant") | |||
end | |||
else | |||
sort_key = sort_base | |||
end | |||
-- If the sortkey is empty, remove it. | |||
-- Leave the sortkey if it is equal to PAGENAME, because it still | |||
-- might be different from DEFAULTSORT and therefore have an effect; see | |||
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]]. | |||
if sort_key == "" then | |||
sort_key = nil | |||
end | end | ||
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual. | |||
else | else | ||
sort_key = sort_base | sort_key = upper(sort_base or SUBPAGENAME) | ||
end | end | ||
Line 122: | Line 227: | ||
return "" | return "" | ||
end | end | ||
end | end | ||
Line 196: | Line 256: | ||
} | } | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template") | ||
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) | local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) | ||
Line 242: | Line 302: | ||
} | } | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id") | ||
local langCode = args[1] | local langCode = args[1] | ||
Line 259: | Line 319: | ||
end | end | ||
local | local id = require("Module:senseid").anchor(lang, str) | ||
if invoked then | if invoked then | ||
Line 270: | Line 326: | ||
return id | return id | ||
end | end | ||
end | |||
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object. | |||
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function. | |||
function export.check_object(typ, noErr, ...) | |||
local function fail(message) | |||
if noErr then | |||
return false | |||
else | |||
error(message, 3) | |||
end | |||
end | |||
local objs = {...} | |||
if #objs == 0 then | |||
return fail("Must provide at least one object to check.") | |||
end | |||
for _, obj in ipairs{...} do | |||
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then | |||
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.") | |||
elseif not (typ == "object" or obj:hasType(typ)) then | |||
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do | |||
if obj:hasType(wrong_type) then | |||
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.") | |||
end | |||
end | |||
return fail("Function expected a " .. typ .. " object, but received another type of object instead.") | |||
end | |||
end | |||
return true | |||
end | end | ||
return export | return export |
Revision as of 13:59, 29 July 2023
- The following documentation is located at Module:utilities/doc.[edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local data = mw.loadData("Module:utilities/data")
local notneeded = data.notneeded
local neededhassubpage = data.neededhassubpage
function export.require_when_needed(text)
return setmetatable({}, {
__index = function(t, k)
t = require(text)
return t[k]
end,
__call = function(t, ...)
t = require(text)
return t(...)
end
})
end
-- A helper function to escape magic characters in a string.
-- Magic characters: ^$()%.[]*+-?
function export.pattern_escape(text)
if type(text) == "table" then
text = text.args[1]
end
return (text:gsub("([%^$()%%.%[%]*+%-?])", "%%%1"))
end
-- A helper function to resolve HTML entities into plaintext.
-- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory.
function export.get_entities(text)
local entities
return (text:gsub("&[#%w]-;", function(entity)
-- Check if mw.text.decode is able to decode the entity.
if entity:find("^&#") or
entity == "<" or
entity == ">" or
entity == "&" or
entity == """ or
entity == " "
then
return mw.text.decode(entity)
else
-- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode.
-- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway.
entities = entities or require("Module:utilities/data/entities")
local pattern = entity .. "(%Z+)"
return entities:match(pattern)
end
end))
end
-- A helper function to convert plaintext into HTML entities where these match the characters given in set.
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
function export.make_entities(text, set, raw)
text = not raw and export.get_entities(text) or text
return mw.text.encode(text, set)
end
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
function export.get_plaintext(text)
local u = mw.ustring.char
text = text
:gsub("%[%[", "\1")
:gsub("%]%]", "\2")
-- Remove strip markers and HTML tags.
text = mw.text.unstrip(text)
:gsub("<[^<>\1\2]+>", "")
-- Parse internal links for the display text, and remove categories.
text = require("Module:links").remove_links(text)
-- Remove files.
for _, falsePositive in ipairs({"File", "Image"}) do
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
end
-- Parse external links for the display text.
text = text:gsub("%[(https?://[^%[%]]+)%]",
function(capture)
return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
end)
text = text
:gsub("\1", "[[")
:gsub("\2", "]]")
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
text = text:gsub("[%[%]]", mw.text.nowiki)
-- Strip bold, italics and soft hyphens.
text = text
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("", "")
-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = export.get_entities(text)
return mw.text.trim(text)
end
function export.plain_gsub(text, pattern, replacement)
local invoked = false
if type(text) == "table" then
invoked = true
if text.args then
local frame = text
local params = {
[1] = {},
[2] = {},
[3] = { allow_empty = true },
}
local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub")
text = args[1]
pattern = args[2]
replacement = args[3]
else
error("If the first argument to plain_gsub is a table, it should be a frame object.")
end
else
if not ( type(pattern) == "string" or type(pattern) == "number" ) then
error("The second argument to plain_gsub should be a string or a number.")
end
if not ( type(replacement) == "string" or type(replacement) == "number" ) then
error("The third argument to plain_gsub should be a string or a number.")
end
end
pattern = export.pattern_escape(pattern)
local gsub = require("Module:string utilities").gsub
if invoked then
return (gsub(text, pattern, replacement))
else
return gsub(text, pattern, replacement)
end
end
--[[
Format the categories with the appropriate sort key. CATEGORIES is a list of
categories.
-- LANG is an object encapsulating a language; if nil, the object for
language code 'und' (undetermined) will be used.
-- SORT_KEY is placed in the category invocation, and indicates how the
page will sort in the respective category. Normally this should be nil,
and a default sort key based on the subpage name (the part after the
colon) will be used.
-- SORT_BASE lets you override the default sort key used when SORT_KEY is
nil. Normally, this should be nil, and a language-specific default sort
key is computed from the subpage name (e.g. for Russian this converts
Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF,
so that effectively ё sorts after е instead of the default Wikimedia
sort, which (I think) is based on Unicode sort order and puts ё after я,
the last letter of the Cyrillic alphabet.
-- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing
is output if the page isn't in the main, Appendix:, Reconstruction: or
Citations: namespaces.
]]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
end
local title_obj = mw.title.getCurrentTitle()
local allowedNamespaces = {
[0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction
}
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then
local PAGENAME = title_obj.text
local SUBPAGENAME = title_obj.subpageText
if not lang then
lang = require("Module:languages").getByCode("und")
end
-- Generate a default sort key.
local upper = require("Module:string utilities").upper
if sort_key ~= "-" then
-- Determine the intended title if the page is an unsupported title.
local unsupported; SUBPAGENAME, unsupported = SUBPAGENAME:gsub("^Unsupported titles/", "")
if unsupported > 0 then
for title, page in pairs(mw.loadData("Module:links/data").unsupported_titles) do
if page == SUBPAGENAME then
SUBPAGENAME = title
break
end
end
end
sort_base = (lang:makeSortKey(sort_base or SUBPAGENAME, sc))
if sort_key and sort_key ~= "" then
-- Gather some statistics regarding sort keys
if upper(sort_key) == sort_base then
table.insert(categories, "Sort key tracking/redundant")
end
else
sort_key = sort_base
end
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
if sort_key == "" then
sort_key = nil
end
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
else
sort_key = upper(sort_base or SUBPAGENAME)
end
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]"
end
return table.concat(out_categories, "")
else
return ""
end
end
function export.catfix(lang, sc)
local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
if sc and not sc.getCode then
error('The second argument to the function "catfix" should be a script object from Module:scripts.')
end
-- To add script classes to links on pages created by category boilerplate templates.
if not sc then
sc = data.catfix_scripts[lang:getCode()]
if sc then
sc = require("Module:scripts").getByCode(sc)
end
end
return "<span id=\"catfix\" style=\"display:none;\" class=\"CATFIX-" .. mw.uri.anchorEncode(canonicalName) .. "\">" ..
require("Module:script utilities").tag_text(" ", lang, sc, nil) ..
"</span>"
end
function export.catfix_template(frame)
local params = {
[1] = {},
[2] = { alias_of = "sc" },
["sc"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template")
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local sc = args.sc
if sc then
sc = require("Module:scripts").getByCode(sc) or error('The script code "' .. sc .. '", provided in the second parameter, is not valid.')
end
return export.catfix(lang, sc)
end
-- Not exporting because it is not used yet.
local function getDateTense(frame)
local name_num_mapping = {["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6,
["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12,
[1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, [6] = 6, [7] = 7, [8] = 8, [9] = 9, [10] = 10, [11] = 11, [12] = 12}
local month = name_num_mapping[frame.args[2]]
local date = os.time({year = frame.args[1], day = frame.args[3], month = month})
local today = os.time() -- 12 AM/PM
local diff = os.difftime(date, today)
local daylength = 24 * 3600
if diff < -daylength / 2 then return "past"
else
if diff > daylength / 2 then return "future"
else return "present" end
end
end
function export.make_id(lang, str)
--[[ If called with invoke, first argument is a frame object.
If called by a module, first argument is a language object. ]]
local invoked = false
if type(lang) == "table" then
if lang.args then
invoked = true
local frame = lang
local params = {
[1] = {},
[2] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id")
local langCode = args[1]
str = args[2]
local m_languages = require("Module:languages")
lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1)
elseif not lang.getCanonicalName then
error("The first argument to make_id should be a language object.")
end
end
if not ( type(str) == "string" or type(str) == "number" ) then
error("The second argument to make_id should be a string or a number.")
end
local id = require("Module:senseid").anchor(lang, str)
if invoked then
return '<li class="senseid" id="' .. id .. '">'
else
return id
end
end
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.
function export.check_object(typ, noErr, ...)
local function fail(message)
if noErr then
return false
else
error(message, 3)
end
end
local objs = {...}
if #objs == 0 then
return fail("Must provide at least one object to check.")
end
for _, obj in ipairs{...} do
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
elseif not (typ == "object" or obj:hasType(typ)) then
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
if obj:hasType(wrong_type) then
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
end
end
return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
end
end
return true
end
return export