Module:siwa-headword

From Linguifex
Revision as of 15:15, 29 January 2021 by Sware (talk | contribs)
Jump to navigation Jump to search


local export = {}

local spacingPunctuation = "[%s%p]+"
--[[ List of punctuation or spacing characters that are found inside of words.
Used to exclude characters from the regex above. ]]
local wordPunc = "-־׳״'.·*’་"
local notWordPunc = "[^" .. wordPunc .. "]+"
local pos_for_gender_number_cat = {
	["nouns"] = "nouns",
	["proper nouns"] = "nouns",
	-- We include verbs because impf and pf are valid "genders".
	["verbs"] = "verbs",
}

local function format_inflection_parts(data, parts)
	local m_links = require("Module:links")
	
	for key, part in ipairs(parts) do
		if type(part) ~= "table" then
			part = {term = part}
		end
		
		-- Convert the term into a full link
		-- Don't show a transliteration here, the consensus seems to be not to
		-- show them in headword lines to avoid clutter.
		part = m_links.full_link(
			{
				term = not nolink and part.term or nil,
				alt = part.alt or (nolink and part.term or nil),
				lang = part.lang or data.lang,
				genders = part.genders,
			},
			face,
			false
			)
		
		part = qualifiers .. part
		
		parts[key] = part
	end
	
	local parts_output = ""
	
	if #parts > 0 then
		parts_output = " " .. table.concat(parts, " <i>or</i> ")
	end
	
	return "<i>" .. parts.label .. "</i>" .. parts_output
end

-- Format the inflections following the headword
local function format_inflections(data)
	if data.inflections and #data.inflections > 0 then
		-- Format each inflection individually
		for key, infl in ipairs(data.inflections) do
			data.inflections[key] = format_inflection_parts(data, infl)
		end
		
		return " (" .. table.concat(data.inflections, ", ") .. ")"
	else
		return ""
	end
end

local function format_genders(data)
	if data.genders and #data.genders > 0 then
		local pos_for_cat
		if not data.nogendercat then
			local pos_category = pos_category:gsub("^reconstructed ", "")
			pos_for_cat = pos_for_gender_number_cat[pos_category]
		end
		local gen = require("Module:getn")
		local text, cats = gen.format_genders(data.genders, data.lang, pos_for_cat)
		for _, cat in ipairs(cats) do
			table.insert(data.categories, cat)
		end
		return "&nbsp;" .. text
	else
		return ""
	end
end

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	local args = frame:getParent().args
	PAGENAME = mw.title.getCurrentTitle().subpageText
	
	local head = args["head"]; if head == "" then head = nil end
	
	-- The part of speech. This is also the name of the category that
	-- entries go in. However, the two are separate (the "cat" parameter)
	-- because you sometimes want something to behave as an adjective without
	-- putting it in the adjectives category.
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	
	local data = {pos_category = poscat, categories = {}, heads = {head}, genders = {}, inflections = {}}
	
	if poscat == "adjectives" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa adjective-forming suffixes"}
		end
		
		adjective(args, data)
	elseif poscat == "adverbs" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa adverb-forming suffixes"}
		end
		
		adverb(args, data)
	elseif poscat == "determiners" then
		adjective(args, data)
	elseif poscat == "nouns" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa noun-forming suffixes"}
		end
		
		noun_gender(args, data)
	elseif poscat == "proper nouns" then
		noun_gender(args, data)
	elseif poscat == "verbs" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa verb-forming suffixes"}
		end
	end
	
	return full_headword(data)
end

-- Display information for a noun's gender
-- This is separate so that it can also be used for proper nouns
function noun_gender(args, data)
	local valid_genders = {
		["in"] = true,
		["an"] = true,
		["in-p"] = true,
		["an-p"] = true,
	}
	
	-- Iterate over all gn parameters (g2, g3 and so on) until one is empty
	local g = args[1] or ""; if g == "" then g = "?" end
	local i = 2
	
	while g ~= "" do
		if not valid_genders[g] then
			g = "?"
		end
		
		table.insert(data.genders, g)
		g = args["g" .. i] or ""
		i = i + 1
	end
end

function adjective(args, data)
	local adverb = args["adv"]; if adverb == "" then adverb = nil end
	local comparative = args[1]; if comparative == "" then comparative = nil end
	local superlative = args[2]; if superlative == "" then superlative = nil end
	
	if adverb then
		table.insert(data.inflections, {label = "adverb", adverb})
	end
	
	if comparative then
		table.insert(data.inflections, {label = "comparative", comparative})
	end
	
	if superlative then
		table.insert(data.inflections, {label = "superlative", superlative})
	end
end

function adverb(args, data)
	local adjective = args["adj"]; if adjective == "" then adjective = nil end
	local comparative = args[1]; if comparative == "" then comparative = nil end
	local superlative = args[2]; if superlative == "" then superlative = nil end
	
	if adjective then
		table.insert(data.inflections, {label = "adjective", adjective})
	end
	
	if comparative then
		table.insert(data.inflections, {label = "comparative", comparative})
	end
	
	if superlative then
		table.insert(data.inflections, {label = "superlative", superlative})
	end
end




-- Format a headword with transliterations
local function format_headword(data)
	for i, head in ipairs(data.heads) do
		
		-- Apply processing to the headword, for formatting links and such
		if head:find("[[", nil, true) then
			head = {term = head, lang = data.lang}
		end
		
		data.heads[i] = head
	end
	
	return table.concat(data.heads, " <i>or</i> ")
end

-- Add links to a multiword head.
function export.add_multiword_links(head)
	local function workaround_to_exclude_chars(s)
		return mw.ustring.gsub(s, notWordPunc, "]]%1[[Contionary:")
	end
	
	head = "[[Contionary:"
		.. mw.ustring.gsub(
			head,
			spacingPunctuation,
			workaround_to_exclude_chars
			)
		.. "]]"
	head = mw.ustring.gsub(head, "%[%[%]%]", "")
	return head
end

-- Return true if the given head is multiword according to the algorithm used
-- in full_headword().
function export.head_is_multiword(head)
	
	for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do
		if mw.ustring.find(possibleWordBreak, notWordPunc) then
			return true
		end
	end

	return false
end

local function preprocess(data, postype)
	if type(data.heads) ~= "table" then
		data.heads = { data.heads }
	end
	
	if not data.heads or #data.heads == 0 then
		data.heads = {""}
	end
	
	local default_head = mw.title.getCurrentTitle().text
	local unmodified_default_head = default_head

	-- Add links to multi-word page names when appropriate
	if export.head_is_multiword(default_head) then
		default_head = export.add_multiword_links(default_head)
	end
	
	-- If a head is the empty string "", then replace it with the default
	for i, head in ipairs(data.heads) do
		if head == "" then
			head = default_head
		end
		data.heads[i] = head
	end
end

local function show_headword_line(data)
	local namespace = mw.title.getCurrentTitle().nsText

	if not data.noposcat then	
		local pos_category = "[sS]iwa " .. data.pos_category
	end
	
	-- Preprocess
	preprocess(data, postype)
	
	-- Format and return all the gathered information
	return
		format_headword(data) ..
		format_genders(data) ..
		format_inflections(data) ..
		require("Module:utilities").format_categories(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end

function full_headword(data)
	local tracking_categories = {}
	
	-- Were any categories specified?
	if data.categories and #data.categories > 0 then
		
		if not data.pos_category
			and mw.ustring.find(data.categories[1], "^[sS]iwa")
				then
			data.pos_category = mw.ustring.gsub(data.categories[1], "^[sS]iwa ", "")
			table.remove(data.categories, 1)
		end
	end
	
	if not data.pos_category then
		error(
			'No valid part-of-speech categories were found in the list '
			.. 'of categories passed to the function "full_headword". '
			.. 'The part-of-speech category should consist of a language\'s '
			.. 'canonical name plus a part of speech.'
			)
	end
	
	-- This may add more categories (e.g. gender categories), so make sure it gets
	-- evaluated first.
	local text = show_headword_line(data)
	return
		text ..
		require("Module:utilities").format_categories(
			data.categories, nil,
			data.force_cat_output
			) ..
		require("Module:utilities").format_categories(
			tracking_categories, nil,
			data.force_cat_output
			)
end

return export