Module:utilities: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
Tag: Reverted
(Undo revision 320503 by Sware (talk))
Tag: Undo
 
Line 5: Line 5:
local neededhassubpage = data.neededhassubpage
local neededhassubpage = data.neededhassubpage


function export.require_when_needed(text)
-- A helper function to escape magic characters in a string
return setmetatable({}, {
__index = function(t, k)
t = require(text)
return t[k]
end,
__call = function(t, ...)
t = require(text)
return t(...)
end
})
end
 
-- A helper function to escape magic characters in a string.
-- Magic characters: ^$()%.[]*+-?
-- Magic characters: ^$()%.[]*+-?
function export.pattern_escape(text)
function export.pattern_escape(text)
Line 24: Line 11:
text = text.args[1]
text = text.args[1]
end
end
return (text:gsub("([%^$()%%.%[%]*+%-?])", "%%%1"))
text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1")
end
return text
 
-- A helper function to resolve HTML entities into plaintext.
-- Iterates over entities in a string, and decodes them into plaintext. We use iteration (instead of decoding the whole string in one go) because it means we can avoid loading the lookup string unnecessarily, as it uses more memory.
function export.get_entities(text)
local entities
return (text:gsub("&[#%w]-;", function(entity)
-- Check if mw.text.decode is able to decode the entity.
if entity:find("^&#") or
entity == "<" or
entity == ">" or
entity == "&" or
entity == """ or
entity == " "
then
return mw.text.decode(entity)
else
-- [[Module:utilities/data/entities]] is a lookup string of every named HTML entity (except the ones listed above), as they aren't covered by mw.text.decode.
-- mw.text.decode can decode lots of named entities if the second parameter is true, but around 600 are still not covered, and it's less efficient than doing it this way anyway.
entities = entities or require("Module:utilities/data/entities")
local pattern = entity .. "(%Z+)"
return entities:match(pattern)
end
end))
end
 
-- A helper function to convert plaintext into HTML entities where these match the characters given in set.
-- By default, this resolves any pre-existing entities into plaintext first, to allow mixed input and to avoid accidental double-conversion. This can be turned off with the raw parameter.
function export.make_entities(text, set, raw)
text = not raw and export.get_entities(text) or text
return mw.text.encode(text, set)
end
 
-- A helper function to strip wiki markup, giving the plaintext of what is displayed on the page.
function export.get_plaintext(text)
local u = mw.ustring.char
text = text
:gsub("%[%[", "\1")
:gsub("%]%]", "\2")
-- Remove strip markers and HTML tags.
text = mw.text.unstrip(text)
:gsub("<[^<>\1\2]+>", "")
-- Parse internal links for the display text, and remove categories.
text = require("Module:links").remove_links(text)
-- Remove files.
for _, falsePositive in ipairs({"File", "Image"}) do
text = text:gsub("\1" .. falsePositive .. ":[^\1\2]+\2", "")
end
 
-- Parse external links for the display text.
text = text:gsub("%[(https?://[^%[%]]+)%]",
function(capture)
return capture:match("https?://[^%s%]]+%s([^%]]+)") or ""
end)
text = text
:gsub("\1", "[[")
:gsub("\2", "]]")
-- Any remaining square brackets aren't involved in links, but must be escaped to avoid creating new links.
text = text:gsub("[%[%]]", mw.text.nowiki)
-- Strip bold, italics and soft hyphens.
text = text
:gsub("('*)'''(.-'*)'''", "%1%2")
:gsub("('*)''(.-'*)''", "%1%2")
:gsub("­", "")
-- Get any HTML entities.
-- Note: don't decode URL percent encoding, as it shouldn't be used in display text and may cause problems if % is used.
text = export.get_entities(text)
return mw.text.trim(text)
end
end


Line 118: Line 30:
}
}
local args = require("Module:parameters").process(frame.args, params, nil, "utilities", "plain_gsub")
local args = require("Module:parameters").process(frame.args, params)
text = args[1]
text = args[1]
Line 138: Line 50:
pattern = export.pattern_escape(pattern)
pattern = export.pattern_escape(pattern)
local gsub = require("Module:string utilities").gsub
if invoked then
if invoked then
return (gsub(text, pattern, replacement))
text = mw.ustring.gsub(text, pattern, replacement)
return text
else
else
return gsub(text, pattern, replacement)
return mw.ustring.gsub(text, pattern, replacement)
end
end
end
end
Line 167: Line 79:
]]
]]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
local NAMESPACE = mw.title.getCurrentTitle().nsText
if type(lang) == "table" and not lang.getCode then
if type(lang) == "table" and not lang.getCode then
error("The second argument to format_categories should be a language object.")
error("The second argument to format_categories should be a language object.")
end
end
 
local title_obj = mw.title.getCurrentTitle()
if force_output or data.allowedNamespaces[NAMESPACE] then
local allowedNamespaces = {
local PAGENAME = mw.title.getCurrentTitle().text
[0] = true, [100] = true, [114] = true, [118] = true -- (main), Appendix, Citations, Reconstruction
local SUBPAGENAME = mw.title.getCurrentTitle().subpageText
}
 
if force_output or allowedNamespaces[title_obj.namespace] or title_obj.prefixedText == "Wiktionary:Sandbox" then
local PAGENAME = title_obj.text
local SUBPAGENAME = title_obj.subpageText
if not lang then
if not lang then
Line 184: Line 93:
end
end
-- Generate a default sort key.
-- Generate a default sort key
local upper = require("Module:string utilities").upper
sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc)
if sort_key ~= "-" then
-- Determine the intended title if the page is an unsupported title.
if sort_key and sort_key ~= "" then
local unsupported; SUBPAGENAME, unsupported = SUBPAGENAME:gsub("^Unsupported titles/", "")
-- Gather some statistics regarding sort keys
if unsupported > 0 then
if mw.ustring.upper(sort_key) == sort_base then
for title, page in pairs(mw.loadData("Module:links/data").unsupported_titles) do
table.insert(categories, "Sort key tracking/redundant")
if page == SUBPAGENAME then
SUBPAGENAME = title
break
end
end
end
end
sort_base = (lang:makeSortKey(sort_base or SUBPAGENAME, sc))
if sort_key and sort_key ~= "" then
-- Gather some statistics regarding sort keys
if upper(sort_key) == sort_base then
table.insert(categories, "Sort key tracking/redundant")
end
else
sort_key = sort_base
end
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
if sort_key == "" then
sort_key = nil
end
-- If the sort key is "-", bypass the process of generating a sort key altogether. This is desirable when categorising (e.g.) translation requests, as the pages to be categorised are always in English/Translingual.
else
else
sort_key = upper(sort_base or SUBPAGENAME)
sort_key = sort_base
end
-- If the sortkey is empty, remove it.
-- Leave the sortkey if it is equal to PAGENAME, because it still
-- might be different from DEFAULTSORT and therefore have an effect; see
-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
if sort_key == "" then
sort_key = nil
end
end
Line 227: Line 122:
return ""
return ""
end
end
end
-- Used by {{categorize}}
function export.template_categorize(frame)
local NAMESPACE = mw.title.getCurrentTitle().nsText
local format = frame.args["format"]
local args = frame:getParent().args
local langcode = args[1]; if langcode == "" then langcode = nil end
local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
local categories = {}
if not langcode then
if NAMESPACE == "Template" then return "" end
error("Language code has not been specified. Please pass parameter 1 to the template.")
end
local lang = require("Module:languages").getByCode(langcode)
if not lang then
if NAMESPACE == "Template" then return "" end
error("The language code \"" .. langcode .. "\" is not valid.")
end
local prefix = ""
if format == "pos" then
prefix = lang:getCanonicalName() .. " "
elseif format == "topic" then
prefix = lang:getCode() .. ":"
end
local i = 2
local cat = args[i]
while cat do
if cat ~= "" then
table.insert(categories, prefix .. cat)
end
i = i + 1
cat = args[i]
end
return export.format_categories(categories, lang, sort_key)
end
end


Line 256: Line 196:
}
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "catfix_template")
local args = require("Module:parameters").process(frame:getParent().args, params)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
Line 302: Line 242:
}
}
local args = require("Module:parameters").process(frame:getParent().args, params, nil, "utilities", "make_id")
local args = require("Module:parameters").process(frame:getParent().args, params)
local langCode = args[1]
local langCode = args[1]
Line 319: Line 259:
end
end
local id = require("Module:senseid").anchor(lang, str)
local canonicalName = lang:getCanonicalName()
str = mw.uri.encode(str, "WIKI")
local id = canonicalName .. "-" .. str
if invoked then
if invoked then
Line 326: Line 270:
return id
return id
end
end
end
-- Given a type (as a string) and an arbitrary number of entities, checks whether all of those entities are language, family, script, writing system or Wikimedia language objects. Useful for error handling in functions that require one of these kinds of object.
-- If noErr is set, the function returns false instead of throwing an error, which allows customised error handling to be done in the calling function.
function export.check_object(typ, noErr, ...)
local function fail(message)
if noErr then
return false
else
error(message, 3)
end
end
local objs = {...}
if #objs == 0 then
return fail("Must provide at least one object to check.")
end
for _, obj in ipairs{...} do
if type(obj) ~= "table" or type(obj.hasType) ~= "function" then
return fail("Function expected a " .. typ .. " object, but received a " .. type(obj) .. " instead.")
elseif not (typ == "object" or obj:hasType(typ)) then
for _, wrong_type in ipairs{"family", "language", "script", "Wikimedia language", "writing system"} do
if obj:hasType(wrong_type) then
return fail("Function expected a " .. typ .. " object, but received a " .. wrong_type .. " object instead.")
end
end
return fail("Function expected a " .. typ .. " object, but received another type of object instead.")
end
end
return true
end
end


return export
return export

Latest revision as of 20:03, 31 July 2023



local export = {}

local data = mw.loadData("Module:utilities/data")
local notneeded = data.notneeded
local neededhassubpage = data.neededhassubpage

-- A helper function to escape magic characters in a string
-- Magic characters: ^$()%.[]*+-?
function export.pattern_escape(text)
	if type(text) == "table" then
		text = text.args[1]
	end
	text = mw.ustring.gsub(text, "([%^$()%%.%[%]*+%-?])", "%%%1")
	return text
end

function export.plain_gsub(text, pattern, replacement)
	local invoked = false
	
	if type(text) == "table" then
		invoked = true
		
		if text.args then
			local frame = text
			
			local params = {
				[1] = {},
				[2] = {},
				[3] = { allow_empty = true },
			}
			
			local args = require("Module:parameters").process(frame.args, params)
			
			text = args[1]
			pattern = args[2]
			replacement = args[3]
		else
			error("If the first argument to plain_gsub is a table, it should be a frame object.")
		end
	else
		if not ( type(pattern) == "string" or type(pattern) == "number" ) then
			error("The second argument to plain_gsub should be a string or a number.")
		end
		
		if not ( type(replacement) == "string" or type(replacement) == "number" ) then
			error("The third argument to plain_gsub should be a string or a number.")
		end
	end
	
	pattern = export.pattern_escape(pattern)
	
	if invoked then
		text = mw.ustring.gsub(text, pattern, replacement)
		return text
	else
		return mw.ustring.gsub(text, pattern, replacement)
	end
end

--[[
Format the categories with the appropriate sort key. CATEGORIES is a list of
categories.
	-- LANG is an object encapsulating a language; if nil, the object for
	   language code 'und' (undetermined) will be used.
	-- SORT_KEY is placed in the category invocation, and indicates how the
	   page will sort in the respective category. Normally this should be nil,
	   and a default sort key based on the subpage name (the part after the
	   colon) will be used.
	-- SORT_BASE lets you override the default sort key used when SORT_KEY is
	   nil. Normally, this should be nil, and a language-specific default sort
	   key is computed from the subpage name (e.g. for Russian this converts
	   Cyrillic ё to a string consisting of Cyrillic е followed by U+10FFFF,
	   so that effectively ё sorts after е instead of the default Wikimedia
	   sort, which (I think) is based on Unicode sort order and puts ё after я,
	   the last letter of the Cyrillic alphabet.
	-- FORCE_OUTPUT forces normal output in all namespaces. Normally, nothing
	   is output if the page isn't in the main, Appendix:, Reconstruction: or
	   Citations: namespaces.
]]
function export.format_categories(categories, lang, sort_key, sort_base, force_output, sc)
	local NAMESPACE = mw.title.getCurrentTitle().nsText
	
	if type(lang) == "table" and not lang.getCode then
		error("The second argument to format_categories should be a language object.")
	end
	
	if force_output or data.allowedNamespaces[NAMESPACE] then
		local PAGENAME = mw.title.getCurrentTitle().text
		local SUBPAGENAME = mw.title.getCurrentTitle().subpageText
		
		if not lang then
			lang = require("Module:languages").getByCode("und")
		end
		
		-- Generate a default sort key
		sort_base = lang:makeSortKey(sort_base or SUBPAGENAME, sc)
		
		if sort_key and sort_key ~= "" then
			-- Gather some statistics regarding sort keys
			if mw.ustring.upper(sort_key) == sort_base then
				table.insert(categories, "Sort key tracking/redundant")
			end
		else
			sort_key = sort_base
		end
		
		-- If the sortkey is empty, remove it.
		-- Leave the sortkey if it is equal to PAGENAME, because it still
		-- might be different from DEFAULTSORT and therefore have an effect; see
		-- [[Wiktionary:Grease pit/2020/April#Module:utilities#format categories]].
		if sort_key == "" then
			sort_key = nil
		end
		
		local out_categories = {}
		for key, cat in ipairs(categories) do
			out_categories[key] = "[[Category:" .. cat .. (sort_key and "|" .. sort_key or "") .. "]]"
		end
		
		return table.concat(out_categories, "")
	else
		return ""
	end
end

-- Used by {{categorize}}
function export.template_categorize(frame)
	local NAMESPACE = mw.title.getCurrentTitle().nsText
	local format = frame.args["format"]
	local args = frame:getParent().args
	
	local langcode = args[1]; if langcode == "" then langcode = nil end
	local sort_key = args["sort"]; if sort_key == "" then sort_key = nil end
	local categories = {}
	
	if not langcode then
		if NAMESPACE == "Template" then return "" end
		error("Language code has not been specified. Please pass parameter 1 to the template.")
	end
	
	local lang = require("Module:languages").getByCode(langcode)
	
	if not lang then
		if NAMESPACE == "Template" then return "" end
		error("The language code \"" .. langcode .. "\" is not valid.")
	end
	
	local prefix = ""
	
	if format == "pos" then
		prefix = lang:getCanonicalName() .. " "
	elseif format == "topic" then
		prefix = lang:getCode() .. ":"
	end
	
	local i = 2
	local cat = args[i]
	
	while cat do
		if cat ~= "" then
			table.insert(categories, prefix .. cat)
		end
		
		i = i + 1
		cat = args[i]
	end
	
	return export.format_categories(categories, lang, sort_key)
end

function export.catfix(lang, sc)
	local canonicalName = lang:getCanonicalName() or error('The first argument to the function "catfix" should be a language object from Module:languages.')
	
	if sc and not sc.getCode then
		error('The second argument to the function "catfix" should be a script object from Module:scripts.')
	end
	
	-- To add script classes to links on pages created by category boilerplate templates.
	if not sc then
		sc = data.catfix_scripts[lang:getCode()]
		if sc then
			sc = require("Module:scripts").getByCode(sc)
		end
	end
	
	return "<span id=\"catfix\" style=\"display:none;\" class=\"CATFIX-" .. mw.uri.anchorEncode(canonicalName) .. "\">" ..
		require("Module:script utilities").tag_text("&nbsp;", lang, sc, nil) ..
		"</span>"
end

function export.catfix_template(frame)
	local params = {
		[1] = {},
		[2] = { alias_of = "sc" },
		["sc"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	
	local lang = require("Module:languages").getByCode(args[1]) or require("Module:languages").err(args[1], 1)
	
	local sc = args.sc
	if sc then
		sc = require("Module:scripts").getByCode(sc) or error('The script code "' .. sc .. '", provided in the second parameter, is not valid.')
	end
	
	return export.catfix(lang, sc)
end

-- Not exporting because it is not used yet.
local function getDateTense(frame) 
	local name_num_mapping = {["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6, 
		["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12, 
		[1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 5, [6] = 6, [7] = 7, [8] = 8, [9] = 9, [10] = 10, [11] = 11, [12] = 12}
	local month = name_num_mapping[frame.args[2]]
	local date = os.time({year = frame.args[1], day = frame.args[3], month = month})
	local today = os.time() -- 12 AM/PM
	local diff = os.difftime(date, today)
	local daylength = 24 * 3600
	
	if diff < -daylength / 2 then return "past"
	else 
		if diff > daylength / 2  then return "future"
		else return "present" end
	end
end

function export.make_id(lang, str)
	--[[	If called with invoke, first argument is a frame object.
			If called by a module, first argument is a language object. ]]
	local invoked = false
	
	if type(lang) == "table" then
		if lang.args then
			invoked = true
			
			local frame = lang
			
			local params = {
				[1] = {},
				[2] = {},
			}
			
			local args = require("Module:parameters").process(frame:getParent().args, params)
			
			local langCode = args[1]
			str = args[2]
			
			local m_languages = require("Module:languages")
			
			lang = m_languages.getByCode(langCode) or m_languages.err(langCode, 1)
		elseif not lang.getCanonicalName then
			error("The first argument to make_id should be a language object.")
		end
	end

	if not ( type(str) == "string" or type(str) == "number" ) then
		error("The second argument to make_id should be a string or a number.")
	end
	
	local canonicalName = lang:getCanonicalName()
	
	str = mw.uri.encode(str, "WIKI")
	
	local id = canonicalName .. "-" .. str
	
	if invoked then
		return '<li class="senseid" id="' .. id .. '">'
	else
		return id
	end
end

return export