Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 287: Line 287:
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Western Siwa lemmas")
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
elseif detect_dialect(term) == "e" then
elseif detect_dialect(term) == "e" then
Line 294: Line 294:
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Eastern Siwa lemmas")
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
elseif args.a then
elseif args.a then
Line 307: Line 307:
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
end
end
Line 321: Line 321:
end
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
end
end

Revision as of 17:38, 21 August 2021



local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

local unrelaxed = {
	["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o",	["ụ"] = "u", ["ỵ"] = "y",
}

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʔːƀꝺđꬶɉ]*"..UNR.."?"..vowel..")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
	
	return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end

local function detect_dialect(term)
	if find(term, "[ṁṅłƛ]") then
		return "w"
	elseif find(term, "̊") then
		return "e"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"ʦꬶ", "ːʣɨʔkː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"},
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
	
	{"đꬶ", "ːdɨʔkː"}, {"đꬶv", "ːdɨkwː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skʔɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"xv", "ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w", ["ł"] = "ɬ",
		
	}},
	{"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
	
	{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"},
	{"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, 
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
	{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
	
	{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ?[ƀđꬶc])", "%1ʰ"}, -- voiceless stops word-initially become aspirated
	{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be
	{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[kꬶx])", "ħ%1"}, {"[hʔ]([tđ][vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"},
	{"[hʔ]([pbtdkmnlsrʦꬶđƀꝺɣ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"pƀ", "ʔp"},
	{"đđ", "ʔt"}, {"tđ", "ʔt"},
	{"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	
	-- other stem- and/or word-initial configurations
	{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
	{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, {spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɉ%2"},
	
	-- internal consonant clusters
	{"pp", "ʔp"},
	{"tt", "ʔt"},
	{"k[kc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꬶk]", "ŋk"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
	{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"},
	{"(ð[mꬶk])", "%1ː"}, {"ðɣ", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"}, {"s[ꬶk]", "sk"},
	{"([lr])ʔ([ptk])", "%1%2ː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ⱡ[mnꬶk])", "%1ː"},
	{"([ꬶk][msɕ])", "%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶ](["..front_vowel.."])", "ɟ%1"},
	{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
	{"õːʔ", "õʔ"},
	
	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
	{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"},
	 
	{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(["..front_vowel.."])", "ɟ%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"tɟ", "tc"}, {"ƛ", "tɬʼ"},
	{"ɉi", "ji"}, {"ɉ", "ɣ"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"-", "‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔⱡ")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "([đƀ])r", "%1ʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
		["dia"] = {},
		["rs"] = {type = 'boolean'},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "w" then
		args.e = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa terms")
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	elseif detect_dialect(term) == "e" then
		args.w = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa terms")
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	end
	
	if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
	
return ipa .. ncategories(categories)
end

return export