Module:utilities: Difference between revisions

no edit summary
No edit summary
No edit summary
Line 5: Line 5:
local neededhassubpage = data.neededhassubpage
local neededhassubpage = data.neededhassubpage


-- A helper function to escape magic characters in a string
-- A helper function to escape magic characters in a string.
-- Magic characters: ^$()%.[]*+-?
-- Magic characters: ^$()%.[]*+-?
function export.pattern_escape(text)
function export.pattern_escape(text)
Line 11: Line 11:
text = text.args[1]
text = text.args[1]
end
end
text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1")
return (text:gsub("([%^$()%%.%[%]*+%-?])", "%%%1"))
end
 
-- A helper function to resolve HTML entities into plaintext.
-- Iterates over entities in a string, and uses the MW decode function. Selectively uses the decodeNamedEntities parameter to save memory where possible.
function export.get_entities(text)
for entity in text:gmatch("&[^;]+;") do
if entity:match("^&[^#][^;]+;$") and entity ~= "<" and entity ~= ">" and entity ~= "&" and entity ~= """ and entity ~= " " then
text = text:gsub(export.pattern_escape(entity), function(cap1) return mw.text.decode(cap1, true) end)
else
text = text:gsub(export.pattern_escape(entity), mw.text.decode)
end
end
return text
return text
end
-- A helper function to convert plaintext into HTML entities where these match the characters given in set.
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
function export.make_entities(text, set, raw)
text = not raw and export.get_entities(text) or text
return mw.text.encode(text, set)
end
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
function export.get_plaintext(text)
local u = mw.ustring.char
-- Remove strip markers and HTML tags.
text = mw.text.unstrip(text)
:gsub("<[^<>]+>", "")
-- Parse internal links for the display text, and remove categories.
text = require("Module:links").remove_links(text)
-- Remove files.
for _, falsePositive in ipairs({"File", "Image"}) do
text = text:gsub("^%[%[" .. falsePositive .. ":.-%]%]", "")
end
-- Parse external links for the display text.
text = text:gsub("%[(https?://[^%[%]]+)%]",
function(capture)
return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
end)
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
text = text:gsub("[%[%]]", mw.text.nowiki)
-- Strip bold, italics and soft hyphens.
text = text
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("­", "")
-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = export.get_entities(text)
return mw.text.trim(text)
end
end


Line 30: Line 86:
}
}
local args = require("Module:parameters").process(frame.args, params)
local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub")
text = args[1]
text = args[1]
Line 50: Line 106:
pattern = export.pattern_escape(pattern)
pattern = export.pattern_escape(pattern)
local gsub = require("Module:string utilities").gsub
if invoked then
if invoked then
text = mw.ustring.gsub(text, pattern, replacement)
return (gsub(text, pattern, replacement))
return text
else
else
return mw.ustring.gsub(text, pattern, replacement)
return gsub(text, pattern, replacement)
end
end
end
end
Line 79: Line 135:
]]
]]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
local NAMESPACE = mw.title.getCurrentTitle().nsText
if type(lang) == "table" and not lang.getCode then
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
error("The second argument to format_categories should be a language object.")
end
end
 
if force_output or data.allowedNamespaces[NAMESPACE] then
local title_obj = mw.title.getCurrentTitle()
local PAGENAME = mw.title.getCurrentTitle().text
local allowedNamespaces = {
local SUBPAGENAME = mw.title.getCurrentTitle().subpageText
[0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction
}
 
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then
local PAGENAME = title_obj.text
local SUBPAGENAME = title_obj.subpageText
if not lang then
if not lang then
Line 93: Line 152:
end
end
-- Generate a default sort key
-- Generate a default sort key.
sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc)
local upper = require("Module:string utilities").upper
if sort_key ~= "-" then
if sort_key and sort_key ~= "" then
-- Determine the intended title if the page is an unsupported title.
-- Gather some statistics regarding sort keys
local unsupported; SUBPAGENAME, unsupported = SUBPAGENAME:gsub("^Unsupported titles/", "")
if mw.ustring.upper(sort_key) == sort_base then
if unsupported > 0 then
table.insert(categories, "Sort key tracking/redundant")
for title, page in pairs(mw.loadData("Module:links/data").unsupported_titles) do
if page == SUBPAGENAME then
SUBPAGENAME = title
break
end
end
end
sort_base = (lang:makeSortKey(sort_base or SUBPAGENAME, sc))
if sort_key and sort_key ~= "" then
-- Gather some statistics regarding sort keys
if upper(sort_key) == sort_base then
table.insert(categories, "Sort key tracking/redundant")
end
else
sort_key = sort_base
end
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
if sort_key == "" then
sort_key = nil
end
end
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
else
else
sort_key = sort_base
sort_key = upper(sort_base or SUBPAGENAME)
end
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
if sort_key == "" then
sort_key = nil
end
end
Line 124: Line 197:
end
end


-- Used by {{categorize}}
function export.catfix(lang, sc)
function export.template_categorize(frame)
local NAMESPACE = mw.title.getCurrentTitle().nsText
local format = frame.args["format"]
local args = frame:getParent().args
local langcode = args[1]; if langcode == "" then langcode = nil end
local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
local categories = {}
if not langcode then
if NAMESPACE == "Template" then return "" end
error("Language code has not been specified. Please pass parameter 1 to the template.")
end
local lang = require("Module:languages").getByCode(langcode)
if not lang then
if not lang then
if NAMESPACE == "Template" then return "" end
return nil
error("The language code \"" .. langcode .. "\" is not valid.")
elseif type(lang) ~= "table" then
end
return nil
local prefix = ""
if format == "pos" then
prefix = lang:getCanonicalName() .. " "
elseif format == "topic" then
prefix = lang:getCode() .. ":"
end
end
local i = 2
local cat = args[i]
while cat do
if cat ~= "" then
table.insert(categories, prefix .. cat)
end
i = i + 1
cat = args[i]
end
return export.format_categories(categories, lang, sort_key)
end
function export.catfix(lang, sc)
local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
Line 196: Line 229:
}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template")
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
Line 242: Line 275:
}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id")
local langCode = args[1]
local langCode = args[1]
Line 259: Line 292:
end
end
local canonicalName = lang:getCanonicalName()
local id = require("Module:senseid").anchor(lang, str)
str = mw.uri.encode(str, "WIKI")
local id = canonicalName .. "-" .. str
if invoked then
if invoked then
Line 270: Line 299:
return id
return id
end
end
end
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.
function export.check_object(typ, noErr, ...)
local function fail(message)
if noErr then
return false
else
error(message, 3)
end
end
local objs = {...}
if #objs == 0 then
return fail("Must provide at least one object to check.")
end
for _, obj in ipairs{...} do
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
elseif not (typ == "object" or obj:hasType(typ)) then
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
if obj:hasType(wrong_type) then
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
end
end
return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
end
end
return true
end
end


return export
return export