Module:siwa-headword: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(101 intermediate revisions by the same user not shown)
Line 1: Line 1:
local export = {}
local export = {}
local pos_functions = {}


local spacingPunctuation = "[%s%p]+"
local sub = mw.ustring.sub
--[[ List of punctuation or spacing characters that are found inside of words.
local find = mw.ustring.find
Used to exclude characters from the regex above. ]]
local match = mw.ustring.match
local wordPunc = "-־׳״'.·*’་"
local gmatch = mw.ustring.gmatch
local notWordPunc = "[^" .. wordPunc .. "]+"
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit


local isLemma = {
local PAGENAME = mw.title.getCurrentTitle().text
"abbreviations",
local NAMESPACE = mw.title.getCurrentTitle().nsText
"acronyms",
local SUBPAGENAME = mw.title.getCurrentTitle().subpageText
"adjectives",
 
"adnominals",
local gender_key = {
"adpositions",
["i"] = "in",
"adverbs",
["a"] = "an",
"affixes",
"ambipositions",
"articles",
"circumfixes",
"circumpositions",
"classifiers",
"cmavo",
"cmavo clusters",
"cmene",
"combining forms",
"conjunctions",
"counters",
"determiners",
"diacritical marks",
"equative adjectives",
"fu'ivla",
"gismu",
"Han characters",
"Han tu",
"hanzi",
"hanja",
"ideophones",
"idioms",
"infixes",
"interfixes",
"initialisms",
"interjections",
"kanji",
"letters",
"ligatures",
"lujvo",
"morphemes",
"non-constituents",
"nouns",
"numbers",
"numeral symbols",
"numerals",
"particles",
"phrases",
"postpositions",
"postpositional phrases",
"predicatives",
"prefixes",
"prepositions",
"prepositional phrases",
"preverbs",
"pronominal adverbs",
"pronouns",
"proverbs",
"proper nouns",
"punctuation marks",
"relatives",
"roots",
"stems",
"suffixes",
"syllables",
"symbols",
"verbs",
}
}


local isNonLemma = {
local verb_key = {
"active participles",
["aditr"] = "agentive ditransitive",
"adjectival participles",
["adit"] = "agentive ditransitive",
"adjective forms",
["ai"] = "agentive intransitive",
"adjective feminine forms",
["asubj"] = "agentive subjective",
"adjective plural forms",
["at"] = "agentive transitive",
"adverb forms",
["imp"] = "impersonal",
"adverbial participles",
["pass"] = "passive",
"agent participles",
["udit"] = "unagentive ditransitive",
"article forms",
["ui"] = "unagentive intransitive",
"circumfix forms",
["usubj"] = "unagentive subjective",
"combined forms",
["ut"] = "unagentive transitive",
"comparative adjective forms",
["utrans"] = "translative",
"comparative adjectives",
["?"] = "?",
"comparative adverb forms",
"comparative adverbs",
"contractions",
"converbs",
"determiner comparative forms",
"determiner forms",
"determiner superlative forms",
"diminutive nouns",
"equative adjective forms",
"equative adjectives",
"future participles",
"gerunds",
"infinitive forms",
"infinitives",
"interjection forms",
"jyutping",
"kanji readings",
"misspellings",
"negative participles",
"nominal participles",
"noun case forms",
"noun dual forms",
"noun forms",
"noun plural forms",
"noun possessive forms",
"noun singulative forms",
"numeral forms",
"participles",
"participle forms",
"particle forms",
"passive participles",
"past active participles",
"past participles",
"past participle forms",
"past passive participles",
"perfect active participles",
"perfect participles",
"perfect passive participles",
"pinyin",
"plurals",
"postposition forms",
"prefix forms",
"preposition contractions",
"preposition forms",
"prepositional pronouns",
"present active participles",
"present participles",
"present passive participles",
"pronoun forms",
"pronoun possessive forms",
"proper noun forms",
"proper noun plural forms",
"rafsi",
"romanizations",
"root forms",
"singulatives",
"suffix forms",
"superlative adjective forms",
"superlative adjectives",
"superlative adverb forms",
"superlative adverbs",
"verb forms",
"verbal nouns",
}
}


local lang = require("Module:languages").getByCode("siwa")
local function glossary_link(entry, text)
return "[[wikt:Appendix:Glossary#" .. entry .. "|" .. (text or entry) .. "]]"
end


-- The main entry point.
-- The main entry point.
-- This is the only function that can be invoked from a template.
-- This is the only function that can be invoked from a template.
function export.show(frame)
function export.show(frame)
local args = frame:getParent().args
if NAMESPACE == "Template" and SUBPAGENAME ~= "doc" then return end
PAGENAME = mw.title.getCurrentTitle().subpageText
local parent_args = frame:getParent().args
local head = args["head"]; if head == "" then head = nil end
-- The part of speech. This is also the name of the category that
-- entries go in. However, the two are separate (the "cat" parameter)
-- because you sometimes want something to behave as an adjective without
-- putting it in the adjectives category.
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local class = frame.args[2]; if class == "" then class = nil end
local data = {pos_category = poscat, categories = {}, heads = {head}, genders = {}, inflections = {}}
local data = {
lang = lang,
heads = {},
inflections = {},
genders = {},
pos_category = poscat,
categories = {"Siwa " .. poscat}
}
if poscat == "adjectives" then
if pos_functions[poscat] then
if PAGENAME:find("^-") then
pos_functions[poscat](class, parent_args, data)
data.pos_category = "suffixes"
data.categories = {"Siwa adjective-forming suffixes"}
end
adjective(args, data)
elseif poscat == "adverbs" then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.categories = {"Siwa adverb-forming suffixes"}
end
adverb(args, data)
elseif poscat == "determiners" then
adjective(args, data)
elseif poscat == "nouns" then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.categories = {"Siwa noun-forming suffixes"}
end
noun_gender(args, data)
elseif poscat == "proper nouns" then
noun_gender(args, data)
elseif poscat == "verbs" then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.categories = {"Siwa verb-forming suffixes"}
end
end
end
return full_headword(data)
return require("Module:headword").full_headword(data)
end
end


-- Display information for a noun's gender
pos_functions.nouns = function(class, args, data)
-- This is separate so that it can also be used for proper nouns
local params = {
function noun_gender(args, data)
[1] = {required = true},
local valid_genders = {
[2] = {},
["in"] = true,
["m"] = {list = true},
["an"] = true,
["head"] = {default = PAGENAME},
["in-p"] = true,
["decl"] = {},
["an-p"] = true,
["cat2"] = {},
["cat3"] = {},
["sort"] = {},
["affix"] = {list = true},
}
}
-- Iterate over all gn parameters (g2, g3 and so on) until one is empty
local args = require("Module:parameters").process(args, params)
local g = args[1] or ""; if g == "" then g = "?" end
data.heads = {args["head"]}
local i = 2
data.affix = args["affix"]
while g ~= "" do
if not valid_genders[g] then
g = "?"
end
table.insert(data.genders, g)
g = args["g" .. i] or ""
i = i + 1
end
end
 
function adjective(args, data)
local adverb = args["adv"]; if adverb == "" then adverb = nil end
local comparative = args[1]; if comparative == "" then comparative = nil end
local superlative = args[2]; if superlative == "" then superlative = nil end
if adverb then
table.insert(data.inflections, {label = "adverb", adverb})
end
if comparative then
table.insert(data.inflections, {label = "comparative", comparative})
end
if superlative then
table.insert(data.genders, gender_key[args[1]] or args[1])
table.insert(data.inflections, {label = "superlative", superlative})
if args[2] then table.insert(data.genders, gender_key[args[2]] or args[2]) end
end
end
 
function adverb(args, data)
local adjective = args["adj"]; if adjective == "" then adjective = nil end
local comparative = args[1]; if comparative == "" then comparative = nil end
local superlative = args[2]; if superlative == "" then superlative = nil end
if adjective then
data.inflections[1] = args.m
table.insert(data.inflections, {label = "adjective", adjective})
data.inflections[1].label = "marked"
end
if comparative then
if args.decl then table.insert(data.categories, "Siwa " .. args.decl .. "-declension " .. data.pos_category) end
table.insert(data.inflections, {label = "comparative", comparative})
if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end
end
if args.cat3 then table.insert(data.categories, "Siwa " .. args["cat3"]) end
if superlative then
data.sort_key = args["sort"] or nil
table.insert(data.inflections, {label = "superlative", superlative})
end
end
end


pos_functions["proper nouns"] = pos_functions.nouns


pos_functions["proper nouns"] = pos_functions.nouns


 
pos_functions.verbs = function(class, args, data)
-- Format a headword with transliterations
local params = {
local function format_headword(data)
[1] = {required = true},
for i, head in ipairs(data.heads) do
[2] = {list = "inf", required = true},
[3] = {list = "p", required = true},
-- Apply processing to the headword, for formatting links and such
[4] = {type = "boolean"},
if head:find("[[", nil, true) then
["head"] = {},
head = {term = head, lang = data.lang}
["cat2"] = {},
end
["sort"] = {},
["affix"] = {list = true},
data.heads[i] = head
}
end
return table.concat(data.heads, " <i>or</i> ")
end
 
-- Add links to a multiword head.
function export.add_multiword_links(head)
local function workaround_to_exclude_chars(s)
return mw.ustring.gsub(s, notWordPunc, "]]%1[[Contionary:")
end
head = "[[Contionary:"
.. mw.ustring.gsub(
head,
spacingPunctuation,
workaround_to_exclude_chars
)
.. "]]"
head = mw.ustring.gsub(head, "%[%[%]%]", "")
return head
end
 
-- Return true if the given head is multiword according to the algorithm used
-- in full_headword().
function export.head_is_multiword(head)
for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do
local args = require("Module:parameters").process(args, params)
if mw.ustring.find(possibleWordBreak, notWordPunc) then
data.heads = {args["head"]}
return true
end
end
 
return false
end
 
local function preprocess(data, postype)
if type(data.heads) ~= "table" then
data.heads = { data.heads }
end
if not data.heads or #data.heads == 0 then
data.heads = {""}
end
local default_head = mw.title.getCurrentTitle().text
local unmodified_default_head = default_head
 
-- Add links to multi-word page names when appropriate
if export.head_is_multiword(default_head) then
default_head = export.add_multiword_links(default_head)
end
-- If a head is the empty string "", then replace it with the default
for n, kind in ipairs(mw.text.split(args[1], "/")) do
for i, head in ipairs(data.heads) do
if kind ~= "?" then
if head == "" then
data.inflections[n] = {nil}
head = default_head
data.inflections[n].label = verb_key[kind]
table.insert(data.categories, "Siwa " .. verb_key[kind] .. " verbs")
end
end
data.heads[i] = head
end
end
-- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
-- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
-- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to
-- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
--
-- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess
-- based on whether it ends in " forms"; otherwise, return nil.
function pos_lemma_or_nonlemma(plpos)
-- Is it a lemma category?
if isLemma[plpos] or isLemma[plpos:gsub("^reconstructed ", "")] then
return "lemma"
-- Is it a nonlemma category?
elseif isNonLemma[plpos] then
return "non-lemma form"
else
return nil
end
end
local function show_headword_line(data)
local namespace = mw.title.getCurrentTitle().nsText
if not data.noposcat then
local pos_category = "[sS]iwa " .. data.pos_category
end
end
-- Is it a lemma category?
args[2].label = "infinitive"
local postype = pos_lemma_or_nonlemma(data.pos_category)
table.insert(data.inflections, args[2])
if not data.noposcat then
table.insert(data.categories, 1, "[sS]iwa " .. postype .. "s")
end
 
-- Preprocess
preprocess(data, postype)
-- Format and return all the gathered information
args[3].label = "past"
return
table.insert(data.inflections, args[3])
format_headword(data) ..
format_genders(data) ..
format_inflections(data) ..
require("Module:utilities").format_categories(
tracking_categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
)
end
 
function full_headword(data)
local tracking_categories = {}
-- Were any categories specified?
if args[4] then table.insert(data.categories, "Siwa irregular verbs") end
if data.categories and #data.categories > 0 then
if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end
if not data.pos_category
and mw.ustring.find(data.categories[1], "^[sS]iwa")
then
data.pos_category = mw.ustring.gsub(data.categories[1], "^[sS]iwa ", "")
table.remove(data.categories, 1)
end
end
if not data.pos_category then
if args[1] == "?" or args[2] == "?" or args[3] == "?" then table.insert(data.categories, "Contionary stubs") end
error(
'No valid part-of-speech categories were found in the list '
.. 'of categories passed to the function "full_headword". '
.. 'The part-of-speech category should consist of a language\'s '
.. 'canonical name plus a part of speech.'
)
end
-- This may add more categories (e.g. gender categories), so make sure it gets
data.sort_key = args["sort"] or nil
-- evaluated first.
data.affix = args["affix"]
local text = show_headword_line(data)
return
text ..
require("Module:utilities").format_categories(
data.categories, nil,
data.force_cat_output
) ..
require("Module:utilities").format_categories(
tracking_categories, nil,
data.force_cat_output
)
end
end


return export
return export

Latest revision as of 22:49, 4 July 2023



local export = {}
local pos_functions = {}

local sub = mw.ustring.sub
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local PAGENAME = mw.title.getCurrentTitle().text
local NAMESPACE = mw.title.getCurrentTitle().nsText
local SUBPAGENAME = mw.title.getCurrentTitle().subpageText

local gender_key = {
	["i"] = "in",
	["a"] = "an",
}

local verb_key = {
	["aditr"] = "agentive ditransitive",
	["adit"] = "agentive ditransitive",
	["ai"] = "agentive intransitive",
	["asubj"] = "agentive subjective",
	["at"] = "agentive transitive",
	["imp"] = "impersonal",
	["pass"] = "passive",
	["udit"] = "unagentive ditransitive",
	["ui"] = "unagentive intransitive",
	["usubj"] = "unagentive subjective",
	["ut"] = "unagentive transitive",
	["utrans"] = "translative",
	["?"] = "?",
}

local lang = require("Module:languages").getByCode("siwa")

local function glossary_link(entry, text)
	return "[[wikt:Appendix:Glossary#" .. entry .. "|" .. (text or entry) .. "]]"
end

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	if NAMESPACE == "Template" and SUBPAGENAME ~= "doc" then return end
	local parent_args = frame:getParent().args
	
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	local class = frame.args[2]; if class == "" then class = nil end
	
	local data = {
		lang = lang,
		heads = {},
		inflections = {},
		genders = {},
		pos_category = poscat,
		categories = {"Siwa " .. poscat}
	}
	
	if pos_functions[poscat] then
		pos_functions[poscat](class, parent_args, data)
	end
	
	return require("Module:headword").full_headword(data)
end

pos_functions.nouns = function(class, args, data)
	local params = {
		[1] = {required = true},
		[2] = {},
		["m"] = {list = true},
		["head"] = {default = PAGENAME},
		["decl"] = {},
		["cat2"] = {},
		["cat3"] = {},
		["sort"] = {},
		["affix"] = {list = true},
	}
	
	local args = require("Module:parameters").process(args, params)
	data.heads = {args["head"]}
	data.affix = args["affix"]
	
	table.insert(data.genders, gender_key[args[1]] or args[1])
	if args[2] then table.insert(data.genders, gender_key[args[2]] or args[2]) end
	
	data.inflections[1] = args.m
	data.inflections[1].label = "marked"
	
	if args.decl then table.insert(data.categories, "Siwa " .. args.decl .. "-declension " .. data.pos_category) end
	if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end
	if args.cat3 then table.insert(data.categories, "Siwa " .. args["cat3"]) end
	
	data.sort_key = args["sort"] or nil
end

pos_functions["proper nouns"] = pos_functions.nouns

pos_functions["proper nouns"] = pos_functions.nouns

pos_functions.verbs = function(class, args, data)
	local params = {
		[1] = {required = true},
		[2] = {list = "inf", required = true},
		[3] = {list = "p", required = true},
		[4] = {type = "boolean"},
		["head"] = {},
		["cat2"] = {},
		["sort"] = {},
		["affix"] = {list = true},
	}
	
	local args = require("Module:parameters").process(args, params)
	data.heads = {args["head"]}
	
	for n, kind in ipairs(mw.text.split(args[1], "/")) do
		if kind ~= "?" then
			data.inflections[n] = {nil}
			data.inflections[n].label = verb_key[kind]
			table.insert(data.categories, "Siwa " .. verb_key[kind] .. " verbs")
		end
	end
	
	args[2].label = "infinitive"
	table.insert(data.inflections, args[2])
	
	args[3].label = "past"
	table.insert(data.inflections, args[3])
	
	if args[4] then table.insert(data.categories, "Siwa irregular verbs") end
	if args.cat2 then table.insert(data.categories, "Siwa " .. args["cat2"]) end
	
	if args[1] == "?" or args[2] == "?" or args[3] == "?" then table.insert(data.categories, "Contionary stubs") end
	
	data.sort_key = args["sort"] or nil
	data.affix = args["affix"]
end

return export