Module:letter def

From Linguifex
Revision as of 10:47, 8 May 2026 by Sware (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search


-- Author: Benwing

local export = {}

local force_cat = false

local require_when_needed = require("Module:utilities/require when needed")

local ConvertNumeric_module = "Module:ConvertNumeric"
local headword_module = "Module:headword"
local headword_utilities_module = "Module:headword utilities"
local languages_module = "Module:languages"
local links_module = "Module:links"
local parameters_module = "Module:parameters"
local scripts_module = "Module:scripts"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local utilities_module = "Module:utilities"

local m_links = require(links_module)
local full_link = m_links.full_link
local m_string_utilities = require(string_utilities_module)
local glossary_link = require_when_needed(headword_utilities_module, "glossary_link")
local lang_getByCode = require_when_needed(languages_module, "getByCode")
local format_categories = require_when_needed(utilities_module, "format_categories")
local serial_comma_join = require_when_needed(table_module, "serialCommaJoin")

local uupper = m_string_utilities.upper
local ulower = m_string_utilities.lower
local ufind = m_string_utilities.find
local insert = table.insert
local concat = table.concat

local function ine(val)
	if not val then
		return val
	end
	val = mw.text.trim(val)
	if val == "" then return nil else return val end
end

local function ordinal_to_word(num)
	-- [[Module:ConvertNumeric]] is taken from Wikipedia and is one of the worst pieces of shit I've ever seen.
	-- For example, spell_number has 13 numbered params.
	return require(ConvertNumeric_module).spell_number(
		num,
		nil, -- numerator
		nil, -- denominator
		nil, -- capitalize
		true, -- use_and; mimics default behavior of {{ordinal to word}}, which includes supposedly British "and"
			  -- before the final number
		nil, -- hyphenate
		true -- ordinal
	)
end

local function add_initial_colon_to_term(term)
	if term ~= "-" and term ~= "+" and not term:find("^:") then
		-- Make sure we link to the specified term even if it has a diacritic that would normally be stripped off.
		term = ":" .. term
	end
	return term
end

local function parse_param(value, paramname)
	if not value then
		return nil
	end
	local termobjs
	if value:find("[,<]") then
		termobjs = require(headword_utilities_module).parse_term_with_modifiers {
			val = value,
			paramname = paramname,
			splitchar = ",",
			include_mods = {"tr", "ts", "t", "sc"},
		}
	else
		termobjs = {{ term = value }}
	end
	return termobjs
end

local function parse_char(value, paramname, no_prefix_colon)
	if not value then
		return nil
	end
	local termobjs = parse_param(value, paramname)
	for _, termobj in ipairs(termobjs) do
		if not no_prefix_colon then
			termobj.term = add_initial_colon_to_term(termobj.term)
		end
	end
	return termobjs
end

local function join_items(items, conj)
	if not items[2] then
		return items[1]
	else
		return serial_comma_join(items, {conj = conj})
	end
end


--[==[
Implementation of {{tl|Latn-def}}, {{tl|Cyrl-def}} and the like. Supports the following invocation parameter:
; {{para|sc}}
: Specify the script code. If omitted, taken from the template parameter {{para|sc}}; if that is omitted, autodetected
  from the pagename and/or character specified in 3=. If neither method is possible, an error is thrown.
]==]
function export.show(frame)
	local list_param = {list = true, disallow_holes = true}
	local boolean_param = {type = "boolean"}
	local output = {}
	local function ins(txt)
		insert(output, txt)
	end
	local iargs = require(parameters_module).process(frame.args, {
		sc = {type = "script"},
	})
	local parent_args = frame:getParent().args
	local deftype = ine(parent_args[2])
	if deftype == "ordinal" then
		deftype = "numsym"
	end
	local canontype = deftype
	if canontype == "digraph" or canontype == "trigraph" or canontype == "tetragraph" or canontype == "pentagraph" or
		canontype == "multigraph" then
		canontype = "letter"
	end
	-- FIXME: convert 'ordinal' to 'numsym'
	local deftypes = {"letter", "digraph", "trigraph", "tetragraph", "pentagraph", "multigraph", "numsym", "ordinal", "name", "diacritic", "syllable"}
	local params = {
		[1] = {type = "language", required = true, template_default = "und"},
		[2] = {set = deftypes, required = true},
		sc = {type = "script"},
		nocap = boolean_param,
		dot = true,
		nodot = boolean_param,
		addl = true,
		pagename = true,
	}
	local function merge_params(extra_params)
		for k, v in pairs(extra_params) do
			params[k] = v
		end
	end
	if canontype == "letter" or canontype == "numsym" then
		merge_params {
			[3] = true,
			[4] = list_param,
			linklang = boolean_param, -- only used for prec/foll
			alphabet = true,
			alphvar = true,
			prec = true,
			foll = true,
			last = boolean_param,
		}
	elseif canontype == "name" then
		merge_params {
			[3] = {required = true},
			[4] = true,
			linklang = boolean_param,
			alphabet = true,
			alphvar = true,
			lit = true,
			eq = true,
		}
	elseif canontype == "diacritic" then
		merge_params {
			[3] = list_param,
			name = true,
			alphabet = true,
			alphvar = true,
			nopairs = boolean_param,
			moreexamples = boolean_param,
		}
	elseif canontype == "syllable" then
		merge_params {
			[3] = {required = true},
			[4] = {required = true},
			[5] = {required = true},
		}
	else
		-- no need to throw an error here, as it will be thrown by [[Module:parameters]]
	end
	local args = require(parameters_module).process(parent_args, params)
	local lang = args[1]
	local sc = args.sc or iargs.sc

	if not sc then
		if canontype == "letter" or canontype == "numsym" then
			sc = lang:findBestScript(args.pagename or mw.loadData("Module:headword/data").pagename)
		elseif canontype == "diacritic" or canontype == "name" then
			local test_char = args[3]
			if type(test_char) == "table" then
				test_char = test_char[1]
			end
			if not test_char then
				error("No letter is specified in 3= from which the script can be derived; you must specify the script explicitly in sc=")
			end
			sc = lang:findBestScript(test_char)
		else
			sc = require(scripts_module).getByCode("Latn") -- not actually used
		end
	end
	local sccode = sc:getCode()
	local scname = sc:getCanonicalName()
	local sccatname = sc:getCategoryName()
	local scdisplay = sc:getDisplayForm()
	local linked_script = ("[[wikt:Appendix:%s|%s]]"):format(sccatname, sccatname)
	local categories = {}
	ins("<span class='use-with-mention'>")
	local function link_to_lang_or_mul(char, paramname, notr)
		local termobjs = parse_char(char, paramname)
		for i, termobj in ipairs(termobjs) do
			-- Either link a character using the language in 1= or using 'mul' (Translingual). We do this to avoid
			-- yellow links from trying to link to a nonexistent character. Basically, if linklang=1, we always link
			-- using the language in 1=; otherwise we try to see if the character is in the language's standard_chars,
			-- and if not, link to Translingual. If the standardChars for the language is missing or the character can't
			-- be looked up (e.g. it's a digraph or trigraph), assume it's in the language and link using the language.
			local lang_for_linking
			local char = termobj.term
			if args.linklang then
				lang_for_linking = lang
			elseif #char > 1 then
				-- If the character is a digraph or trigraph, we can't check it against standard_chars, which only lists
				-- single Unicode chars.
				lang_for_linking = lang
			else
				local standard_chars = lang:getStandardCharacters(sc)
				if type(standard_chars) ~= "string" or ufind(standard_chars, char) then
					-- No standard_chars, or character in standard_chars; link using lang.
					lang_for_linking = lang
				else
					lang_for_linking = lang_getByCode("mul", true)
				end
			end
			termobj.lang = lang_for_linking
			termobj.sc = sc
			if notr and not termobj.tr then
				termobj.tr = "-"
			end
			termobjs[i] = full_link(termobj, "term")
		end
		return join_items(termobjs, "or")
	end

	if canontype == "letter" or canontype == "numsym" then
		local indef = not args[3] and not args.last
		local article = indef and "A" or "The"
		if args.nocap then
			article = article:lower()
		end
		ins(article)
		if args[3] then
			ins(" ")
			ins(type(args[3]) == "number" and ordinal_to_word(args[3]) or args[3])
		end
		if args.last then
			if args[3] then
				ins(" and")
			end
			ins(" last")
		end
		-- If we're Translingual, don't say we're a letter of the "Translingual alphabet" because there is no such
		-- thing; instead, say we're a letter of the given script, and omit the coda that says "written in the Foo
		-- script" because it's redundant.
		local is_mul = lang:getFullCode() == "mul"
		local lang_for_linking = is_mul and lang_getByCode("en", true) or lang
		ins(" ")
		if canontype == "numsym" then
			ins("[[wikt:numeral|numeral]] [[wikt:symbol|symbol]]")
		elseif args[3] and deftype ~= "letter" then
			ins("[[wikt:letter|letter]] (a [[wikt:" .. deftype .. "|" .. deftype .. "]])")
		else
			ins("[[wikt:" .. deftype .. "|" .. deftype .. "]]")
		end
		ins(" of ")
		if args.alphabet then
			ins(args.alphabet)
		elseif is_mul then
			ins("the ")
			if sccode:find("Lat") and (args.pagename or mw.loadData("Module:headword/data").pagename):match("^[a-zA-Z]$") then
				-- Latn, Latf, Latg, pjt-Latn; if in ASCII a-z or A-Z, display as "basic modern Latin alphabet",
				-- otherwise as "Latin script" as all other scripts display for mul.
				ins(("[[wikt:%s|%s]]"):format(sccatname, "basic modern Latin alphabet"))
			else
				ins(linked_script)
			end
		else
			ins("the ")
			ins(lang:getCanonicalName())
			ins(" [[wikt:alphabet|alphabet]]")
		end
		if args.alphvar then
			ins(" (" .. args.alphvar .. ")")
		end
		if args[4][1] then
			ins(", called ")
			local formatted_names = {}
			for i, name in ipairs(args[4]) do
				local nameobjs = parse_param(name, i + 3)
				for _, nameobj in ipairs(nameobjs) do
					nameobj.lang = lang_for_linking
					insert(formatted_names, full_link(nameobj, "term"))
				end
			end
			ins(mw.text.listToText(formatted_names, nil, " or "))
			if not is_mul then
				ins(" and ")
			end
		elseif not is_mul then
			ins(", ")
		end
		if not is_mul then
			ins(("written in the %s"):format(linked_script))
		end
		if args.prec then
			ins("; preceded by ")
			ins(link_to_lang_or_mul(args.prec, "prec"))
		end
		if args.foll then
			if args.prec then
				ins(" and ")
			else
				ins("; ")
			end
			ins("followed by ")
			ins(link_to_lang_or_mul(args.foll, "foll"))
		end
		if canontype == "numsym" then
			-- FIXME: Rethink the name of this category.
			insert(categories, ("%s ordinal numbers"):format(lang:getFullName()))
		end

	elseif canontype == "name" then
		ins(args.nocap and "the" or "The")
		ins((" name of the %s letter "):format(linked_script))
		ins(link_to_lang_or_mul(args[3], 3, "notr"))
		if args[4] then
			ins("/")
			ins(link_to_lang_or_mul(args[4], 4, "notr"))
		end
		if args.alphabet then
			ins(", in " .. args.alphabet)
			if args.alphvar then
				ins(" (" .. args.alphvar .. ")")
			end
		elseif args.alphvar then
			ins(", in " .. args.alphvar)
		end
		if args.lit then
			ins(", literally “")
			ins(args.lit)
			ins("”")
		end
		if args.eq then
			ins(", called ")
			ins(full_link({lang = lang_getByCode("en", true), term = args.eq}, "term"))
			ins(" in English")
		end
		insert(categories, ("%s:%s letter names"):format(lang:getFullCode(), scname))

	elseif canontype == "diacritic" then
		ins(args.nocap and "a" or "A")
		ins((" [[wikt:diacritical mark|diacritical mark]] of the %s"):format(linked_script))
		if args.alphabet then
			ins(" in " .. args.alphabet)
			if args.alphvar then
				ins(" (" .. args.alphvar .. ")")
			end
		elseif args.alphvar then
			ins(" in " .. args.alphvar)
		end
		if args.name then
			ins(", called ")
			local formatted_names = {}
			local nameobjs = parse_param(args.name, "name")
			for _, nameobj in ipairs(nameobjs) do
				nameobj.lang = lang
				insert(formatted_names, full_link(nameobj, "term"))
			end
			ins(mw.text.listToText(formatted_names, nil, " or "))
		end
		ins(" in ")
		ins(lang:getCanonicalName())
		if args[3][1] then
			ins(", and found on ")
			local formatted_letters = {}
			local function format_letter(letter)
				return ("<span class='mention'>%s</span>"):format(full_link {lang = lang, term = letter, sc = sc})
			end
			if args.nopairs then
				for _, letter in ipairs(args[3]) do
					insert(formatted_letters, format_letter(letter))
				end
			elseif #args[3] % 2 == 1 then
				error(("Saw %s letters but need an even number when nopairs= is not given"):format(#args[3]))
			else
				for i = 1, #args[3], 2 do
					insert(formatted_letters, ("%s/%s"):format(format_letter(args[3][i]), format_letter(args[3][i + 1])))
				end
			end
			ins(mw.text.listToText(formatted_letters))
			if args.moreexamples then
				ins(", among others")
			end
		end

	elseif canontype == "syllable" then
		ins(args.nocap and "the " or "The ")
		if args[3] ~= "-" then
			ins("[[wikt:Appendix:Hiragana script|hiragana]] syllable ")
			ins(full_link({lang = lang, term = args[3], tr = args[5]}, "term"))
			ins(" or the ")
		end
		ins("[[wikt:Appendix:Katakana script|katakana]] syllable ")
		ins(full_link({lang = lang, term = args[4], tr = args[5]}, "term"))
		ins(" in [[wikt:Hepburn|Hepburn]] romanization")

	else
		error(("Internal error: Unhandled canontype %s"):format(mw.dumpObject(canontype)))
	end
	local addl = args.addl
	if addl then
		if addl:find("^[;:.]") then
			ins(addl)
		elseif addl:find("^_") then
			ins(" " .. addl:sub(2))
		else
			ins(", " .. addl)
		end
	end
	if args.dot then
		ins(args.dot)
	elseif not args.nodot then
		ins(".")
	end
	ins("</span>")
	if categories[1] then
		ins(format_categories(categories, lang, nil, nil, force_cat))
	end
	return concat(output)
end

return export