Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 154: Line 154:
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"b([sɕ])", "p%1"},
{"n[ꬶk]", "ŋk"}, {"([ðđʦłɕꬶr])v", "%1wː"},
{"n[ꬶk]", "ŋk"}, {"([ðđʦłɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},

Revision as of 11:41, 3 August 2021



local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬłʣʦʔƀꝺđɣꬶɉʼ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNR.."?"..vowel..")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	return table.concat(pss,"·")
end

local function detect_dialect(term)
	if find(term, "̊") or find(term, "ṡ$") or find(term, "rg") or find(term, "bġ") then
		return "e"
	elseif find(term, "[ṁṅłƛ]") then
		return "w"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔk"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"ʦꬶ", "ːʣɨʔk"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"},
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔt"},
	
	{"đꬶ", "ːdɨʔk"}, {"đꬶv", "ːdɨkwː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skʔɨwː"}, {"sʔk", "ːsɨʔk"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2"}, {"ł([mnx])", "ːłɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔk"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([łrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"xv", "ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔk"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w",
	}},
	 {"o̊", "ɯ"}, {"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ł"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"},
	{"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, 
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"ẻu", "øː"}, {"õu", "õː̃"},
	
	-- diphthongs
	{"øɑ", "üa"}, {"øi", "üɪ"}, {"uɔ", "ʊɔ"},
	{"ɑy", "æœ"}, {"ey", "ɛœ"},
	
	{"^(ˈ)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ[ƀđꬶc])", "%1ʰ"}, -- voiceless stops word-initially become aspirated
	{"^(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be
	{"^(ˈ)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[kꬶx])", "ħ%1"}, {"[hʔ]([tđ][vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"},
	{"[hʔ]([pbtdkmnlsrʦꬶđƀꝺɣ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"pƀ", "ʔp"},
	{"đđ", "ʔt"}, {"tđ", "ʔt"},
	{"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	
	-- other stem- and/or word-initial configurations
	{"đi", "ʨi"}, {"ꝺi", "ʥi"}, {"ɣi", "ɉi"}, {"ɣj", "jː"}, {"ɣjː", "ɟː"},	
	{"ˈƀ", "ˈp"}, {"ˈđ", "ˈt"}, {"ˈꬶ", "ˈk"}, {"ˈꝺ", "ˈd"}, {"ƛ", "ʔł"}, {"ˈɣ", "ˈɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, {spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"},
	
	-- internal consonant clusters
	{"pp", "ʔp"},
	{"tt", "ʔt"},
	{"k[kc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꬶk]", "ŋk"}, {"([ðđʦłɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
	{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"},
	{"(ð[mꬶk])", "%1ː"}, {"ðɣ", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"}, {"s[ꬶk]", "sk"},
	{"([lr])ʔ([ptk])", "%1%2ː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ł[mnꬶk])", "%1ː"},
	{"([ꬶk][msɕ])", "%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksł"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶ](["..front_vowel.."])", "ɟ%1"},
	{"(" .. vowel .. ")[đt]$", "%1ʔ%1"}, -- -Vt becomes -VʔV (or -Vht, not considered)
	
	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ł", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"ɉ", "ɟ"},  {"õ", "ɔ̃"},
	 {"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"},
	 
	{"k(["..front_vowel.."])", "c%1"}, {"ɡ(["..front_vowel.."])", "ɟ%1"}, {"tɟ", "tc"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"·", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"-", "‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔł")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "([đƀ])r", "%1ʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end



function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "e" then
		ipa = ipa .. line_format(separate_word(term, false, true, false), {'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa lemmas")
			table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
		end
	elseif detect_dialect(term) == "w" then
		ipa = ipa .. line_format(separate_word(term, false, false, true), {'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa lemmas")
			table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
		end
	end
	
return ipa .. ncategories(categories)
end

return export