|
|
Line 5: |
Line 5: |
| local neededhassubpage = data.neededhassubpage | | local neededhassubpage = data.neededhassubpage |
|
| |
|
| function export.require_when_needed(text)
| | -- A helper function to escape magic characters in a string |
| return setmetatable({}, {
| |
| __index = function(t, k)
| |
| t = require(text)
| |
| return t[k]
| |
| end,
| |
| __call = function(t, ...)
| |
| t = require(text)
| |
| return t(...)
| |
| end
| |
| })
| |
| end
| |
| | |
| -- A helper function to escape magic characters in a string. | |
| -- Magic characters: ^$()%.[]*+-? | | -- Magic characters: ^$()%.[]*+-? |
| function export.pattern_escape(text) | | function export.pattern_escape(text) |
Line 24: |
Line 11: |
| text = text.args[1] | | text = text.args[1] |
| end | | end |
| return (text:gsub("([%^$()%%.%[%]*+%-?])", "%%%1")) | | text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1") |
| end
| | return text |
| | |
| -- A helper function to resolve HTML entities into plaintext.
| |
| -- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory.
| |
| function export.get_entities(text)
| |
| local entities
| |
| return (text:gsub("&[#%w]-;", function(entity)
| |
| -- Check if mw.text.decode is able to decode the entity.
| |
| if entity:find("^&#") or
| |
| entity == "<" or
| |
| entity == ">" or
| |
| entity == "&" or
| |
| entity == """ or
| |
| entity == " "
| |
| then
| |
| return mw.text.decode(entity)
| |
| else
| |
| -- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode.
| |
| -- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway.
| |
| entities = entities or require("Module:utilities/data/entities")
| |
| local pattern = entity .. "(%Z+)"
| |
| return entities:match(pattern)
| |
| end
| |
| end))
| |
| end
| |
| | |
| -- A helper function to convert plaintext into HTML entities where these match the characters given in set.
| |
| -- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
| |
| function export.make_entities(text, set, raw)
| |
| text = not raw and export.get_entities(text) or text
| |
| return mw.text.encode(text, set)
| |
| end
| |
| | |
| -- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
| |
| function export.get_plaintext(text)
| |
| local u = mw.ustring.char
| |
| text = text
| |
| :gsub("%[%[", "\1")
| |
| :gsub("%]%]", "\2")
| |
|
| |
| -- Remove strip markers and HTML tags.
| |
| text = mw.text.unstrip(text)
| |
| :gsub("<[^<>\1\2]+>", "")
| |
|
| |
| -- Parse internal links for the display text, and remove categories.
| |
| text = require("Module:links").remove_links(text)
| |
|
| |
| -- Remove files.
| |
| for _, falsePositive in ipairs({"File", "Image"}) do
| |
| text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
| |
| end
| |
| | |
| -- Parse external links for the display text.
| |
| text = text:gsub("%[(https?://[^%[%]]+)%]",
| |
| function(capture)
| |
| return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
| |
| end)
| |
|
| |
| text = text
| |
| :gsub("\1", "[[")
| |
| :gsub("\2", "]]")
| |
|
| |
| -- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
| |
| text = text:gsub("[%[%]]", mw.text.nowiki)
| |
|
| |
| -- Strip bold, italics and soft hyphens.
| |
| text = text
| |
| :gsub("('*)'''(.-'*)'''", "%1%2")
| |
| :gsub("('*)''(.-'*)''", "%1%2")
| |
| :gsub("", "")
| |
|
| |
| -- Get any HTML entities.
| |
| -- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
| |
| text = export.get_entities(text)
| |
|
| |
| return mw.text.trim(text) | |
| end | | end |
|
| |
|
Line 118: |
Line 30: |
| } | | } |
| | | |
| local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub") | | local args = require("Module:parameters").process(frame.args, params) |
| | | |
| text = args[1] | | text = args[1] |
Line 138: |
Line 50: |
| pattern = export.pattern_escape(pattern) | | pattern = export.pattern_escape(pattern) |
| | | |
| local gsub = require("Module:string utilities").gsub
| |
| if invoked then | | if invoked then |
| return (gsub(text, pattern, replacement)) | | text = mw.ustring.gsub(text, pattern, replacement) |
| | return text |
| else | | else |
| return gsub(text, pattern, replacement) | | return mw.ustring.gsub(text, pattern, replacement) |
| end | | end |
| end | | end |
Line 167: |
Line 79: |
| ]] | | ]] |
| function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) | | function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc) |
| | local NAMESPACE = mw.title.getCurrentTitle().nsText |
| | |
| if type(lang) == "table" and not lang.getCode then | | if type(lang) == "table" and not lang.getCode then |
| error("The second argument to format_categories should be a language object.") | | error("The second argument to format_categories should be a language object.") |
| end | | end |
| | | |
| local title_obj = mw.title.getCurrentTitle()
| | if force_output or data.allowedNamespaces[NAMESPACE] then |
| local allowedNamespaces = {
| | local PAGENAME = mw.title.getCurrentTitle().text |
| [0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction
| | local SUBPAGENAME = mw.title.getCurrentTitle().subpageText |
| } | |
| | |
| if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then | |
| local PAGENAME = title_obj.text | |
| local SUBPAGENAME = title_obj.subpageText | |
| | | |
| if not lang then | | if not lang then |
Line 184: |
Line 93: |
| end | | end |
| | | |
| -- Generate a default sort key. | | -- Generate a default sort key |
| local upper = require("Module:string utilities").upper | | sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc) |
| if sort_key ~= "-" then | | |
| -- Determine the intended title if the page is an unsupported title. | | if sort_key and sort_key ~= "" then |
| local unsupported; SUBPAGENAME, unsupported = SUBPAGENAME:gsub("^Unsupported titles/", "")
| | -- Gather some statistics regarding sort keys |
| if unsupported > 0 then
| | if mw.ustring.upper(sort_key) == sort_base then |
| for title, page in pairs(mw.loadData("Module:links/data").unsupported_titles) do | | table.insert(categories, "Sort key tracking/redundant") |
| if page == SUBPAGENAME then
| |
| SUBPAGENAME = title
| |
| break
| |
| end
| |
| end
| |
| end | | end |
| sort_base = (lang:makeSortKey(sort_base or SUBPAGENAME, sc))
| |
| if sort_key and sort_key ~= "" then
| |
| -- Gather some statistics regarding sort keys
| |
| if upper(sort_key) == sort_base then
| |
| table.insert(categories, "Sort key tracking/redundant")
| |
| end
| |
| else
| |
| sort_key = sort_base
| |
| end
| |
| -- If the sortkey is empty, remove it.
| |
| -- Leave the sortkey if it is equal to PAGENAME, because it still
| |
| -- might be different from DEFAULTSORT and therefore have an effect; see
| |
| -- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
| |
| if sort_key == "" then
| |
| sort_key = nil
| |
| end
| |
| -- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
| |
| else | | else |
| sort_key = upper(sort_base or SUBPAGENAME) | | sort_key = sort_base |
| | end |
| | |
| | -- If the sortkey is empty, remove it. |
| | -- Leave the sortkey if it is equal to PAGENAME, because it still |
| | -- might be different from DEFAULTSORT and therefore have an effect; see |
| | -- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]]. |
| | if sort_key == "" then |
| | sort_key = nil |
| end | | end |
| | | |
Line 227: |
Line 122: |
| return "" | | return "" |
| end | | end |
| | end |
| | |
| | -- Used by {{categorize}} |
| | function export.template_categorize(frame) |
| | local NAMESPACE = mw.title.getCurrentTitle().nsText |
| | local format = frame.args["format"] |
| | local args = frame:getParent().args |
| | |
| | local langcode = args[1]; if langcode == "" then langcode = nil end |
| | local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end |
| | local categories = {} |
| | |
| | if not langcode then |
| | if NAMESPACE == "Template" then return "" end |
| | error("Language code has not been specified. Please pass parameter 1 to the template.") |
| | end |
| | |
| | local lang = require("Module:languages").getByCode(langcode) |
| | |
| | if not lang then |
| | if NAMESPACE == "Template" then return "" end |
| | error("The language code \"" .. langcode .. "\" is not valid.") |
| | end |
| | |
| | local prefix = "" |
| | |
| | if format == "pos" then |
| | prefix = lang:getCanonicalName() .. " " |
| | elseif format == "topic" then |
| | prefix = lang:getCode() .. ":" |
| | end |
| | |
| | local i = 2 |
| | local cat = args[i] |
| | |
| | while cat do |
| | if cat ~= "" then |
| | table.insert(categories, prefix .. cat) |
| | end |
| | |
| | i = i + 1 |
| | cat = args[i] |
| | end |
| | |
| | return export.format_categories(categories, lang, sort_key) |
| end | | end |
|
| |
|
Line 256: |
Line 196: |
| } | | } |
| | | |
| local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template") | | local args = require("Module:parameters").process(frame:getParent().args, params) |
| | | |
| local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) | | local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1) |
Line 302: |
Line 242: |
| } | | } |
| | | |
| local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id") | | local args = require("Module:parameters").process(frame:getParent().args, params) |
| | | |
| local langCode = args[1] | | local langCode = args[1] |
Line 319: |
Line 259: |
| end | | end |
| | | |
| local id = require("Module:senseid").anchor(lang, str) | | local canonicalName = lang:getCanonicalName() |
| | |
| | str = mw.uri.encode(str, "WIKI") |
| | |
| | local id = canonicalName .. "-" .. str |
| | | |
| if invoked then | | if invoked then |
Line 326: |
Line 270: |
| return id | | return id |
| end | | end |
| end
| |
|
| |
| -- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.
| |
| -- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.
| |
| function export.check_object(typ, noErr, ...)
| |
| local function fail(message)
| |
| if noErr then
| |
| return false
| |
| else
| |
| error(message, 3)
| |
| end
| |
| end
| |
|
| |
| local objs = {...}
| |
| if #objs == 0 then
| |
| return fail("Must provide at least one object to check.")
| |
| end
| |
| for _, obj in ipairs{...} do
| |
| if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
| |
| return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
| |
| elseif not (typ == "object" or obj:hasType(typ)) then
| |
| for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
| |
| if obj:hasType(wrong_type) then
| |
| return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
| |
| end
| |
| end
| |
| return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
| |
| end
| |
| end
| |
| return true
| |
| end | | end |
|
| |
|
| return export | | return export |