Module:siwa-headword: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 1: Line 1:
local export = {}
local export = {}
local lang = require("Module:languages").getByCode("gem-pro")


-- The main entry point.
-- The main entry point.
Line 7: Line 5:
function export.show(frame)
function export.show(frame)
local args = frame:getParent().args
local args = frame:getParent().args
SUBPAGENAME = mw.title.getCurrentTitle().subpageText
PAGENAME = mw.title.getCurrentTitle().subpageText
local head = args["head"]; if head == "" then head = nil end
local head = args["head"]; if head == "" then head = nil end
Line 21: Line 19:
if poscat == "adjectives" then
if poscat == "adjectives" then
if SUBPAGENAME:find("^-") then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.pos_category = "suffixes"
data.categories = {"Proto-Germanic adjective-forming suffixes"}
data.categories = {"Siwa adjective-forming suffixes"}
end
end
adjective(args, data)
adjective(args, data)
elseif poscat == "adverbs" then
elseif poscat == "adverbs" then
if SUBPAGENAME:find("^-") then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.pos_category = "suffixes"
data.categories = {"Proto-Germanic adverb-forming suffixes"}
data.categories = {"Siwa adverb-forming suffixes"}
end
end
Line 37: Line 35:
adjective(args, data)
adjective(args, data)
elseif poscat == "nouns" then
elseif poscat == "nouns" then
if SUBPAGENAME:find("^-") then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.pos_category = "suffixes"
data.categories = {"Proto-Germanic noun-forming suffixes"}
data.categories = {"Siwa noun-forming suffixes"}
end
end
Line 46: Line 44:
noun_gender(args, data)
noun_gender(args, data)
elseif poscat == "verbs" then
elseif poscat == "verbs" then
if SUBPAGENAME:find("^-") then
if PAGENAME:find("^-") then
data.pos_category = "suffixes"
data.pos_category = "suffixes"
data.categories = {"Proto-Germanic verb-forming suffixes"}
data.categories = {"Siwa verb-forming suffixes"}
end
end
end
end
return require("Module:headword").full_headword(data)
return full_headword(data)
end
end


Line 59: Line 57:
function noun_gender(args, data)
function noun_gender(args, data)
local valid_genders = {
local valid_genders = {
["m"] = true,
["in"] = true,
["f"] = true,
["an"] = true,
["n"] = true,
["in-p"] = true,
["m-p"] = true,
["an-p"] = true,
["f-p"] = true,
}
["n-p"] = true}
-- Iterate over all gn parameters (g2, g3 and so on) until one is empty
-- Iterate over all gn parameters (g2, g3 and so on) until one is empty
Line 75: Line 72:
end
end
-- If any of the specifications is a "?", add the entry
-- to a cleanup category.
if g == "?" then
table.insert(data.categories, "Requests for gender in Proto-Germanic entries")
elseif g == "m-p" or g == "f-p" or g == "n-p" then
table.insert(data.categories, "Proto-Germanic pluralia tantum")
end
table.insert(data.genders, g)
table.insert(data.genders, g)
g = args["g" .. i] or ""
g = args["g" .. i] or ""
Line 124: Line 113:
end
end
end
end
function export.full_headword(data)
local tracking_categories = {}
-- Script-tags the topmost header.
local pagename = title.text
local fullPagename = title.fullText
local namespace = title.nsText
if not data.lang or type(data.lang) ~= "table" or not data.lang.getCode then
error("In data, the first argument to full_headword, data.lang should be a language object.")
end
if not data.sc then
data.sc = require("Module:scripts").findBestScript(data.heads and data.heads[1] ~= "" and data.heads[1] or pagename, data.lang)
else
-- Track uses of sc parameter
local best = require("Module:scripts").findBestScript(pagename, data.lang)
require("Module:debug").track("headword/sc")
if data.sc:getCode() == best:getCode() then
require("Module:debug").track("headword/sc/redundant")
require("Module:debug").track("headword/sc/redundant/" .. data.sc:getCode())
else
require("Module:debug").track("headword/sc/needed")
require("Module:debug").track("headword/sc/needed/" .. data.sc:getCode())
end
end
local displayTitle
-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
-- Avoid tagging ASCII as Hani even when it is tagged as Hani in the
-- headword, as in [[check]]. The check for ASCII might need to be expanded
-- to a check for any Latin characters and whitespace or punctuation.
if (namespace == "" and data.sc and toBeTagged[data.sc:getCode()]
and not pagename:find "^[%z\1-\127]+$")
or (data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana"))) then
displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
elseif namespace == "Reconstruction" then
displayTitle, matched = mw.ustring.gsub(
fullPagename,
"^(Reconstruction:[^/]+/)(.+)$",
function(before, term)
return before ..
require("Module:script utilities").tag_text(
term,
data.lang,
data.sc
)
end
)
if matched == 0 then
displayTitle = nil
end
end
if displayTitle then
local frame = mw.getCurrentFrame()
frame:callParserFunction(
"DISPLAYTITLE",
displayTitle
)
end
if data.force_cat_output then
--[=[
[[Special:WhatLinksHere/Template:tracking/headword/force cat output]]
]=]
require("Module:debug").track("headword/force cat output")
end
if data.getCanonicalName then
error('The "data" variable supplied to "full_headword" should not be a language object.')
end
-- Were any categories specified?
if data.categories and #data.categories > 0 then
local lang_name = require("Module:string").pattern_escape(data.lang:getCanonicalName())
for _, cat in ipairs(data.categories) do
-- Does the category begin with the language name? If not, tag it with a tracking category.
if not mw.ustring.find(cat, "^" .. lang_name) then
mw.log(cat, data.lang:getCanonicalName())
table.insert(tracking_categories, "head tracking/no lang category")
--[=[
[[Special:WhatLinksHere/Template:tracking/head tracking/no lang category]]
]=]
require("Module:debug").track{
"headword/no lang category",
"headword/no lang category/lang/" .. data.lang:getCode()
}
end
end
if not data.pos_category
and mw.ustring.find(data.categories[1], "^" .. data.lang:getCanonicalName())
then
data.pos_category = mw.ustring.gsub(data.categories[1], "^" .. data.lang:getCanonicalName() .. " ", "")
table.remove(data.categories, 1)
end
end
if not data.pos_category then
error(
'No valid part-of-speech categories were found in the list '
.. 'of categories passed to the function "full_headword". '
.. 'The part-of-speech category should consist of a language\'s '
.. 'canonical name plus a part of speech.'
)
end
-- Categorise for unusual characters
local standard = data.lang:getStandardCharacters()
if standard then
if mw.ustring.len(title.subpageText) ~= 1 and not non_categorizable() then
for character in mw.ustring.gmatch(title.subpageText, "([^" .. standard .. "])") do
local upper = mw.ustring.upper(character)
if not mw.ustring.find(upper, "[" .. standard .. "]") then
character = upper
end
table.insert(
data.categories,
data.lang:getCanonicalName() .. " terms spelled with " .. character
)
end
end
end
-- Categorise for palindromes
if title.nsText ~= "Reconstruction" and mw.ustring.len(title.subpageText)>2
and require('Module:palindromes').is_palindrome(
title.subpageText, data.lang, data.sc
) then
table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
end
-- This may add more categories (e.g. gender categories), so make sure it gets
-- evaluated first.
local text = show_headword_line(data)
return
text ..
require("Module:utilities").format_categories(
data.categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
) ..
require("Module:utilities").format_categories(
tracking_categories, data.lang, data.sort_key, nil,
data.force_cat_output or test_force_categories, data.sc
)
end


return export
return export

Revision as of 14:27, 29 January 2021



local export = {}

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	local args = frame:getParent().args
	PAGENAME = mw.title.getCurrentTitle().subpageText
	
	local head = args["head"]; if head == "" then head = nil end
	
	-- The part of speech. This is also the name of the category that
	-- entries go in. However, the two are separate (the "cat" parameter)
	-- because you sometimes want something to behave as an adjective without
	-- putting it in the adjectives category.
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	local postype = args["type"]; if postype == "" then postype = nil end
	
	local data = {lang = lang, pos_category = (postype and postype .. " " or "") .. poscat, categories = {}, heads = {head}, genders = {}, inflections = {}}
	
	if poscat == "adjectives" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa adjective-forming suffixes"}
		end
		
		adjective(args, data)
	elseif poscat == "adverbs" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa adverb-forming suffixes"}
		end
		
		adverb(args, data)
	elseif poscat == "determiners" then
		adjective(args, data)
	elseif poscat == "nouns" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa noun-forming suffixes"}
		end
		
		noun_gender(args, data)
	elseif poscat == "proper nouns" then
		noun_gender(args, data)
	elseif poscat == "verbs" then
		if PAGENAME:find("^-") then
			data.pos_category = "suffixes"
			data.categories = {"Siwa verb-forming suffixes"}
		end
	end
	
	return full_headword(data)
end

-- Display information for a noun's gender
-- This is separate so that it can also be used for proper nouns
function noun_gender(args, data)
	local valid_genders = {
		["in"] = true,
		["an"] = true,
		["in-p"] = true,
		["an-p"] = true,
	}
	
	-- Iterate over all gn parameters (g2, g3 and so on) until one is empty
	local g = args[1] or ""; if g == "" then g = "?" end
	local i = 2
	
	while g ~= "" do
		if not valid_genders[g] then
			g = "?"
		end
		
		table.insert(data.genders, g)
		g = args["g" .. i] or ""
		i = i + 1
	end
end

function adjective(args, data)
	local adverb = args["adv"]; if adverb == "" then adverb = nil end
	local comparative = args[1]; if comparative == "" then comparative = nil end
	local superlative = args[2]; if superlative == "" then superlative = nil end
	
	if adverb then
		table.insert(data.inflections, {label = "adverb", adverb})
	end
	
	if comparative then
		table.insert(data.inflections, {label = "comparative", comparative})
	end
	
	if superlative then
		table.insert(data.inflections, {label = "superlative", superlative})
	end
end

function adverb(args, data)
	local adjective = args["adj"]; if adjective == "" then adjective = nil end
	local comparative = args[1]; if comparative == "" then comparative = nil end
	local superlative = args[2]; if superlative == "" then superlative = nil end
	
	if adjective then
		table.insert(data.inflections, {label = "adjective", adjective})
	end
	
	if comparative then
		table.insert(data.inflections, {label = "comparative", comparative})
	end
	
	if superlative then
		table.insert(data.inflections, {label = "superlative", superlative})
	end
end

function export.full_headword(data)
	local tracking_categories = {}
	
	-- Script-tags the topmost header.
	local pagename = title.text
	local fullPagename = title.fullText
	local namespace = title.nsText
	
	if not data.lang or type(data.lang) ~= "table" or not data.lang.getCode then
		error("In data, the first argument to full_headword, data.lang should be a language object.")
	end
	
	if not data.sc then
		data.sc = require("Module:scripts").findBestScript(data.heads and data.heads[1] ~= "" and data.heads[1] or pagename, data.lang)
	else
		-- Track uses of sc parameter
		local best = require("Module:scripts").findBestScript(pagename, data.lang)
		require("Module:debug").track("headword/sc")
		
		if data.sc:getCode() == best:getCode() then
			require("Module:debug").track("headword/sc/redundant")
			require("Module:debug").track("headword/sc/redundant/" .. data.sc:getCode())
		else
			require("Module:debug").track("headword/sc/needed")
			require("Module:debug").track("headword/sc/needed/" .. data.sc:getCode())
		end
	end
	
	local displayTitle
	-- Assumes that the scripts in "toBeTagged" will never occur in the Reconstruction namespace.
	-- Avoid tagging ASCII as Hani even when it is tagged as Hani in the
	-- headword, as in [[check]]. The check for ASCII might need to be expanded
	-- to a check for any Latin characters and whitespace or punctuation.
	if (namespace == "" and data.sc and toBeTagged[data.sc:getCode()]
			and not pagename:find "^[%z\1-\127]+$")
			or (data.sc:getCode() == "Jpan" and (test_script(pagename, "Hira") or test_script(pagename, "Kana"))) then
		displayTitle = '<span class="' .. data.sc:getCode() .. '">' .. pagename .. '</span>'
	elseif namespace == "Reconstruction" then
		displayTitle, matched = mw.ustring.gsub(
			fullPagename,
			"^(Reconstruction:[^/]+/)(.+)$",
			function(before, term)
				return before ..
					require("Module:script utilities").tag_text(
						term,
						data.lang,
						data.sc
					)
			end
		)
		
		if matched == 0 then
			displayTitle = nil
		end
	end
	
	if displayTitle then
		local frame = mw.getCurrentFrame()
		frame:callParserFunction(
			"DISPLAYTITLE",
			displayTitle
		)
	end
	
	if data.force_cat_output then
		--[=[
		[[Special:WhatLinksHere/Template:tracking/headword/force cat output]]
		]=]
		require("Module:debug").track("headword/force cat output")
	end
	
	if data.getCanonicalName then
		error('The "data" variable supplied to "full_headword" should not be a language object.')
	end
		
	-- Were any categories specified?
	if data.categories and #data.categories > 0 then
		local lang_name = require("Module:string").pattern_escape(data.lang:getCanonicalName())
		for _, cat in ipairs(data.categories) do
			-- Does the category begin with the language name? If not, tag it with a tracking category.
			if not mw.ustring.find(cat, "^" .. lang_name) then
				mw.log(cat, data.lang:getCanonicalName())
				table.insert(tracking_categories, "head tracking/no lang category")
				
				--[=[
				[[Special:WhatLinksHere/Template:tracking/head tracking/no lang category]]
				]=]
				require("Module:debug").track{
					"headword/no lang category",
					"headword/no lang category/lang/" .. data.lang:getCode()
				}
			end
		end
		
		if not data.pos_category
			and mw.ustring.find(data.categories[1], "^" .. data.lang:getCanonicalName())
				then
			data.pos_category = mw.ustring.gsub(data.categories[1], "^" .. data.lang:getCanonicalName() .. " ", "")
			table.remove(data.categories, 1)
		end
	end
	
	if not data.pos_category then
		error(
			'No valid part-of-speech categories were found in the list '
			.. 'of categories passed to the function "full_headword". '
			.. 'The part-of-speech category should consist of a language\'s '
			.. 'canonical name plus a part of speech.'
			)
	end
	
	-- Categorise for unusual characters
	local standard = data.lang:getStandardCharacters()
	
	if standard then
		if mw.ustring.len(title.subpageText) ~= 1 and not non_categorizable() then
			for character in mw.ustring.gmatch(title.subpageText, "([^" .. standard .. "])") do
				local upper = mw.ustring.upper(character)
				if not mw.ustring.find(upper, "[" .. standard .. "]") then
					character = upper
				end
				table.insert(
					data.categories,
					data.lang:getCanonicalName() .. " terms spelled with " .. character
				)
			end
		end
	end
	
	-- Categorise for palindromes
	if title.nsText ~= "Reconstruction" and mw.ustring.len(title.subpageText)>2
		and require('Module:palindromes').is_palindrome(
			title.subpageText, data.lang, data.sc
			) then
		table.insert(data.categories, data.lang:getCanonicalName() .. " palindromes")
	end

	-- This may add more categories (e.g. gender categories), so make sure it gets
	-- evaluated first.
	local text = show_headword_line(data)
	return
		text ..
		require("Module:utilities").format_categories(
			data.categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			) ..
		require("Module:utilities").format_categories(
			tracking_categories, data.lang, data.sort_key, nil,
			data.force_cat_output or test_force_categories, data.sc
			)
end



return export