Module:siwa-noun/common: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 36: Line 36:


function export.coalescence(word, stress)
function export.coalescence(word, stress)
local diphthongs = { -- TODO: [ůy]V > ỷ in stressed
local diphthongs = {
["[aę]▫[aę]"] = "ả", ["e▫e"] = "ẻ", ["i▫i"] = "ỉ", ["u▫[yuů]"] = "ủ", ["[yů]▫u"] = "ủ",
["[aę]▫[aę]"] = "ả", ["e▫e"] = "ẻ", ["i▫i"] = "ỉ", ["u▫[yuů]"] = "ủ", ["[yů]▫u"] = "ủ",
["a▫e"] = "ai", ["a▫o"] = "au", ["ę▫[uů]"] = "ay",
["a▫e"] = "ai", ["a▫o"] = "au", ["ę▫[uů]"] = "ay",
Line 46: Line 46:
["([ae])▫ů"] = "%1y", ["([iou])▫ę"] = "%1a",  
["([ae])▫ů"] = "%1y", ["([iou])▫ę"] = "%1a",  
["[yů]▫[aę]"] = "ůa", ["[yů]▫[ei]"] = "ůi", -- to screen out stressed vs. unstressed
["[yů]▫[aę]"] = "ůa", ["[yů]▫[ei]"] = "ůi", -- to screen out stressed vs. unstressed
}
local triphthongs = {
["([aoe])i([aoue])"] = "%1į%2", ["([aoe])ii"] = "%1gįi", ["([aoe])ỉ"] = "%1gįi",
["ai[yů]"] = "ey", ["([eo])i[yů]"] = "%1įů",
["u[ie]([aoue])"] = "uį%1", ["u[ie]i"] = "ugįi", ["uỉ"] = "ugįi", ["[uů][ie][yů]"] = "ůgįů",
["ie([aouy])"] = "igį%1", ["ieů"] = "igįy", ["iee"] = "iẻ",
["ůi([aoe])"] = "ůį%1", ["ůii"] = "ůgįi", ["ůỉ"] = "ůgįi", ["ůiu"] = "ůgįů",
["ay([ae])"] = "ęm%1", ["ay([oiů])"] = "ęb%1", ["ay[uy]"] = "ębů",
["uo([aue])"] = "um%1", ["uo([oi])"] = "ub%1", ["uo[yů]"] = "ůbů",
["au([aue])"] = "am%1", ["au([oi])"] = "ab%1", ["au[yů]"] = "amů",
["o[ua]([aue])"] = "om%1", ["o[ua]([oi])"] = "ob%1", ["o[ua][yů]"] = "omů",
["[eẻ][uy]([ae])"] = "ům%1", ["[eẻ][uy]([oi])"] = "ůb%1", ["[eẻ][ua][yůu]"] = "ůbů",
["iu([ae])"] = "ivv%1", ["iu([oui])"] = "ib%1", ["iu[yů]"] = "ivvů",
["ả([aoue])"] = "ęį%1", ["ải"] = "ęgįi", ["[ảẻ][yů]"] = "ey",
["ỏ([aue])"] = "om%1", ["([ỏỷ])([oi])"] = "%1b%2", ["ỏ[yů]"] = "omů",
["([ủỷ])([ae])"] = "%1m%2", ["ủ([oui])"] = "ub%1", ["ủ[yů]"] = "ůbů",
["([ẻỉ])([aou])"] = "%1vv%2", ["ẻ([ie])"] = "egį%1",
["ỉi"] = "iddįi", ["ỷ[yů]"] = "yby",
}
}
Line 74: Line 94:
["kį"] = "c", ["gį"] = "ɟ", ["hh"] = "ħ", ["ḍḍ"] = "ð", ["dl"] = "ɬ", ["o̊"] = "ȯ",
["kį"] = "c", ["gį"] = "ɟ", ["hh"] = "ħ", ["ḍḍ"] = "ð", ["dl"] = "ɬ", ["o̊"] = "ȯ",
["õu"] = "ở", ["ẻu"] = "ử",
["õu"] = "ở", ["ẻu"] = "ử",
}
export.lenition_patterns = {
{"bb", "b"}, {"dd", "d"}, {"gg", "g"}, {"ɟ", "į"}, {"mm", "m"}, {"ll", "l"}, {"nn", "n"},
{"rr", "r"}, {"bġ", "p"}, {"pr", "p"}, {"dġ", "t"}, {"tr", "t"}, {"ḍb", "p"}, {"ð", "hh"},
{"ḍg", "k"}, {"bm", "m"}, {"dn", "n"}, {"kn", "ng"}, {"([lr])pp", "%1p"}, {"([lr])tt", "%1t"}, {"([lrms])kk", "%1k"},
{"k([lvs])", "g%1"}, {"ps", "bs"}, {"(.)[vųbhḥg](.)", "%1%2"}, {"d[aou]", "l"}, {"ġ[aou]", "vv"}, {"[dġ][eůy]", ""}, {"[rġ]i", "ṡi"},
{"di", ""}, {"ɲi", "ɟi"}, {"ħį", "ṡ"}, {"[ou]ų", "ů"}, {"ɬ", "l"},
}
--[[
I'm sure someone more experienced than I am (not too difficult to achieve)
would do this more elegantly and more efficient, but I couldn't come up with anything better.
]]
export.endings = {}
-- [gender]-[declension]-[subgroup]
export.endings.a = {
["m"] = "i-a-mV", ["n"] = "i-a-nV", ["p"] = "i-a-pV", ["b"] = "i-a-bV", ["t"] = "i-a-tV",
["r"] = "i-a-rV", ["vv"] = "i-a-vvV", ["lk"] = "i-a-CkV", ["rk"] = "i-a-CkV", ["sk"] = "i-a-CkV",
}
-- [gender]-[declension]-[final vowel(s)]-[subgroup]
export.endings.e = {
["na"] = "i-e-ae-n", ["ne"] = "i-e-ae-n",
["ba"] = "i-e-ae-b", ["be"] = "i-e-ae-b",
["ka"] = "i-e-ae-k", ["ke"] = "i-e-ae-k",
["ma"] = "i-e-ae-m", ["me"] = "i-e-ae-m",
["va"] = "i-e-ae-v", ["ve"] = "i-e-ae-v",
["tsa"] = "i-e-ae-ts", ["tse"] = "i-e-ae-ts",
["ta"] = "i-e-ae-t", ["te"] = "i-e-ae-t",
["sa"] = "i-e-ae-s", ["se"] = "i-e-ae-s",
["la"] = "i-e-ae-l", ["le"] = "i-e-ae-l",
["ha"] = "i-e-ae-h/ġ", ["he"] = "i-e-ae-h/ġ", ["ġa"] = "i-e-ae-h/ġ", ["ġe"] = "i-e-ae-h/ġ",
["a"] = "i-e-ae", ["e"] = "i-e-ae",
["mi"] = "i-e-i-m",
["pi"] = "i-e-i-p/b", ["bi"] = "i-e-i-p/b",
["vi"] = "i-e-i-v",
["ni"] = "i-e-i-n",
["ri"] = "i-e-i-r",
["li"] = "i-e-i-l",
["ki"] = "i-e-i-k/g", ["gi"] = "i-e-i-k/g",
["i"] = "i-e-i",
["o"] = "i-e-o",
-- U = front vowel
["mu"] = "i-e-U-m/n", ["my"] = "i-e-U-m/n", ["mů"] = "i-e-U-m/n", ["nu"] = "i-e-U-m/n", ["ny"] = "i-e-U-m/n", ["nů"] = "i-e-U-m/n",
["ku"] = "i-e-U-k", ["ky"] = "i-e-U-k", ["ků"] = "i-e-U-k",
["u"] = "i-e-U", ["y"] = "i-e-U", ["ů"] = "i-e-U",
}
-- [gender]-[declension]-[subgroup]
export.endings.i = {
["m"] = "i-i-mV", ["n"] = "i-i-nV", ["um"] = "i-i-uomnV", ["om"] = "i-i-uomnV", ["un"] = "i-i-uomnV", ["on"] = "i-i-uomnV",
["v"] = "i-i-vV", ["g"] = "i-i-gV", ["s"] = "i-i-gV", ["r"] = "i-i-rV", ["h"] = "i-i-hV", ["k"] = "i-i-kV",
["l"] = "i-i-lV", ["i"] = "i-i-i", -- else -V
}
-- [gender]-[declension]-[final vowel(s)]-[subgroup]
export.endings.o = {
["ba"] = "i-o-ae-b/t", ["be"] = "i-o-ae-b/t", ["ta"] = "i-o-ae-b/t", ["te"] = "i-o-ae-b/t",
["va"] = "i-o-ae-v/m/n", ["ve"] = "i-o-ae-v/m/n", ["ma"] = "i-o-ae-v/m/n", ["me"] = "i-o-ae-v/m/n", ["na"] = "i-o-ae-v/m/n", ["ne"] = "i-o-ae-v/m/n",
["ra"] = "i-o-ae-r/h/g", ["re"] = "i-o-ae-r/h/g", ["ha"] = "i-o-ae-r/h/g", ["he"] = "i-o-ae-r/h/g", ["ga"] = "i-o-ae-r/h/g", ["ge"] = "i-o-ae-r/h/g",
["ka"] = "i-o-ae-k", ["ke"] = "i-o-ae-k",
["la"] = "i-o-ae-l", ["le"] = "i-o-ae-l",
["mi"] = "i-o-i-m/n/k/g", ["ni"] = "i-o-i-m/n/k/g", ["ki"] = "i-o-i-m/n/k/g", ["gi"] = "i-o-i-m/n/k/g",
["lki"] = "i-o-i-lk",
["rki"] = "i-o-i-rk",
["ski"] = "i-o-i-sk", ["sġi"] = "i-o-i-sk",
["li"] = "i-o-i-l",
["ri"] = "i-o-i-r/h", ["hi"] = "i-o-i-r/h",
["ṡi"] = "i-o-i-ṡ", ["hhįi"] = "i-o-i-ṡ",
["b"] = "i-o-ouyů-b/p/t/d", ["p"] = "i-o-ouyů-b/p/t/d", ["t"] = "i-o-ouyů-b/p/t/d", ["d"] = "i-o-ouyů-b/p/t/d",
["v"] = "i-o-ouyů-v/m/n", ["m"] = "i-o-ouyů-v/m/n", ["n"] = "i-o-ouyů-v/m/n",
["r"] = "i-o-ouyů-r/h/g/k", ["h"] = "i-o-ouyů-r/h/g/k", ["g"] = "i-o-ouyů-r/h/g/k", ["k"] = "i-o-ouyů-r/h/g/k",
["l"] = "i-o-ouyů-l",
["a"] = "i-o-ae", ["e"] = "i-o-ae",
["i"] = "i-o-i", --else -ouyů
}
export.endings.u = {
["ba"] = "i-u-aei-b/p/t/d", ["pa"] = "i-u-aei-b/p/t/d", ["ta"] = "i-u-aei-b/p/t/d", ["da"] = "i-u-aei-b/p/t/d",
["be"] = "i-u-aei-b/p/t/d", ["pe"] = "i-u-aei-b/p/t/d", ["te"] = "i-u-aei-b/p/t/d", ["de"] = "i-u-aei-b/p/t/d",
["bi"] = "i-u-aei-b/p/t/d", ["pi"] = "i-u-aei-b/p/t/d", ["ti"] = "i-u-aei-b/p/t/d", ["di"] = "i-u-aei-b/p/t/d",
["va"] = "", ["ma"] = "", ["na"] = "",
["ve"] = "", ["me"] = "", ["ne"] = "",
["vi"] = "", ["mi"] = "", ["ni"] = "",
["ra"] = "", ["ha"] = "", ["ga"] = "", ["ka"] = "",
["re"] = "", ["he"] = "", ["ge"] = "", ["ke"] = "",
["ri"] = "", ["hi"] = "", ["gi"] = "", ["ki"] = "",
["la"] = "", ["le"] = "", ["li"] = "",
["a"] = "", ["e"] = "", ["i"] = "",
["bo"] = "", ["bu"] = "", ["by"] = "", ["bů"] = "",
["po"] = "", ["pu"] = "", ["py"] = "", ["pů"] = "",
["to"] = "", ["tu"] = "", ["ty"] = "", ["tů"] = "",
["do"] = "", ["du"] = "", ["dy"] = "", ["dů"] = "",
["vo"] = "", ["vu"] = "", ["vy"] = "", ["vů"] = "",
["mo"] = "", ["mu"] = "", ["my"] = "", ["mů"] = "",
["no"] = "", ["nu"] = "", ["ny"] = "", ["nů"] = "",
["ro"] = "", ["ru"] = "", ["ry"] = "", ["rů"] = "",
["ho"] = "", ["hu"] = "", ["hy"] = "", ["hů"] = "",
["go"] = "", ["gu"] = "", ["gy"] = "", ["gů"] = "",
["ko"] = "", ["ku"] = "", ["ky"] = "", ["ků"] = "",
["lo"] = "", ["lu"] = "", ["ly"] = "", ["lů"] = "",
["o"] = "", ["u"] = "", ["y"] = "", ["ů"] = "",
}
}


return export
return export

Revision as of 22:44, 30 July 2021



local gsub = mw.ustring.gsub

local export = {}

function export.lenition(word)
	--local word = frame:getParent().args[1] -- for testing
	local lenited = ""
	local v = "([aeiouyůõảẻỉỏủỷę])"
	local lenition_patterns = {
	--geminated voiced stops
	["bb"] = "b", ["dd"] = "d", ["gg"] = "g", ["ġġ"] = "ġ",
	--long sonorants
	["gį"] = "į", ["mm"] = "m", ["ll"] = "l", ["nn"] = "n", ["rr"] = "r",
	--uvular stops
	["bġ"] = "p", ["pr"] = "p", ["dġ"] = "t", ["tr"] = "t",
	--glottalized stops
	["bm"] = "m", ["dn"] = "n", ["kn"] = "ng",
	--consonant clusters
	["dl"] = "l", ["([lr])pp"] = "%1p", ["([lr])tt"] = "%1t", ["([lrms])kk"] = "%1k",["ps"] = "bs", ["k([lvs])"] = "g%1",
	--voiced consonants (▫, arbitrary character to compute diphthongs further down)
	["vv"] = "ų", [v.."[vųbhḥg]"..v] = "%1▫%2", ["d[aou]"] = "l", [v.."[dġ][ieůy]"] = "%1▫", [v.."ġ[aou]"] = "%1vv",
	--Ci#
	["[rġ]i"] = "ṡi", ["nįi"] = "gįi", 	["hhį"] = "ṡ",
	--long voiceless consonants
	["ḍb"] = "p", ["ḍḍ"] = "hh", ["ḍg"] = "k",
	--semi-vowels
	["[ou]ų"] = "ů",
	}
	for regex, repl in pairs(lenition_patterns) do
		lenited = gsub(word, regex, repl, 1)
		if lenited ~= word then return lenited end
	end
	
	return word
end

function export.coalescence(word, stress)
	local diphthongs = {
		["[aę]▫[aę]"] = "ả", ["e▫e"] = "ẻ", ["i▫i"] = "ỉ", ["u▫[yuů]"] = "ủ", ["[yů]▫u"] = "ủ",
		["a▫e"] = "ai", ["a▫o"] = "au", ["ę▫[uů]"] = "ay",
		["ę▫[ei]"] = "ei", ["ę▫o"] = "eu", 
		["e▫[aę]"] = "ia", ["e▫o"] = "io", ["i▫y"] = "iů",
		["[uo]▫e"] = "oi", ["o▫ů"] = "ou",
		["[yůo]▫o"] = "uo",
		["[yů]▫[yů]"] = "ỷ",
		["([ae])▫ů"] = "%1y", ["([iou])▫ę"] = "%1a", 
		["[yů]▫[aę]"] = "ůa", ["[yů]▫[ei]"] = "ůi", -- to screen out stressed vs. unstressed
	}
	
	local triphthongs = {
		["([aoe])i([aoue])"] = "%1į%2", ["([aoe])ii"] = "%1gįi", ["([aoe])ỉ"] = "%1gįi",
		["ai[yů]"] = "ey", ["([eo])i[yů]"] = "%1įů",
		["u[ie]([aoue])"] = "uį%1", ["u[ie]i"] = "ugįi", ["uỉ"] = "ugįi", ["[uů][ie][yů]"] = "ůgįů",
		["ie([aouy])"] = "igį%1", ["ieů"] = "igįy", ["iee"] = "iẻ",
		["ůi([aoe])"] = "ůį%1", ["ůii"] = "ůgįi", ["ůỉ"] = "ůgįi", ["ůiu"] = "ůgįů",
		["ay([ae])"] = "ęm%1", ["ay([oiů])"] = "ęb%1", ["ay[uy]"] = "ębů",
		["uo([aue])"] = "um%1", ["uo([oi])"] = "ub%1", ["uo[yů]"] = "ůbů",
		["au([aue])"] = "am%1", ["au([oi])"] = "ab%1", ["au[yů]"] = "amů",
		["o[ua]([aue])"] = "om%1", ["o[ua]([oi])"] = "ob%1", ["o[ua][yů]"] = "omů",
		["[eẻ][uy]([ae])"] = "ům%1", ["[eẻ][uy]([oi])"] = "ůb%1", ["[eẻ][ua][yůu]"] = "ůbů",
		["iu([ae])"] = "ivv%1", ["iu([oui])"] = "ib%1", ["iu[yů]"] = "ivvů",
		["ả([aoue])"] = "ęį%1", ["ải"] = "ęgįi", ["[ảẻ][yů]"] = "ey",
		["ỏ([aue])"] = "om%1", ["([ỏỷ])([oi])"] = "%1b%2", ["ỏ[yů]"] = "omů",
		["([ủỷ])([ae])"] = "%1m%2", ["ủ([oui])"] = "ub%1", ["ủ[yů]"] = "ůbů",
		["([ẻỉ])([aou])"] = "%1vv%2", ["ẻ([ie])"] = "egį%1",
		["ỉi"] = "iddįi", ["ỷ[yů]"] = "yby",
		
	}
	
	for regex, repl in pairs(diphthongs) do
		word = gsub(word, regex, repl)
	end
	
	if stress then
		word = gsub(word, "[ůy][aęei]", "ỷ")
	end
	
	word = gsub(word, "▫", "")
	
	return word
end

export.stressed_vowels = {
	["a"] = "a", ["ả"] = "a", ["au"] = "a",  ["ai"] = "a", ["oa"] = "a",
	["e"] = "e", ["ẻ"] = "e", ["ę"] = "e", ["ei"] = "e", ["ay"] = "e", ["eu"] = "e",
	["i"] = "i", ["ỉ"] = "i", ["ia"] = "i", ["ie"] = "i", ["io"] = "i", ["iu"] = "i",
	["o"] = "o", ["ỏ"] = "o", ["õ"] = "o", ["õu"] = "o", ["oi"] = "o", ["ou"] = "o",
	["u"] = "u", ["ủ"] = "u", ["uo"] = "u", ["ui"] = "u",
	["y"] = "y", ["ỷ"] = "y", ["ů"] = "y", ["ẻu"] = "y", ["ey"] = "y", ["ůa"] = "y", ["ůi"] = "y",
}

export.digraphs_to_single = {
	["ts"] = "ʦ", ["tṡ"] = "ʨ", ["dį"] = "ʥ", ["ng"] = "ŋ", ["nį"] = "ɲ",
	["kį"] = "c", ["gį"] = "ɟ", ["hh"] = "ħ", ["ḍḍ"] = "ð", ["dl"] = "ɬ", ["o̊"] = "ȯ",
	["õu"] = "ở", ["ẻu"] = "ử",
}

return export