Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 13: Line 13:
local u = mw.ustring.char
local u = mw.ustring.char
local split = mw.text.split
local split = mw.text.split
local function rsub(term, foo, bar, n)
local retval = gsub(term, foo, bar, n)
return retval
end


local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

Revision as of 19:35, 8 January 2021



local export = {}

local lang = "Siwa"
local m_IPA = require("Module:IPA")
local m_su = require("Module:string utilities")
local m_table = require("Module:table")
local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split

local function rsub(term, foo, bar, n)
	local retval = gsub(term, foo, bar, n)
	return retval
end

local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local front_vowel = "iɪyeøɛœæa"
local back_vowel = "uɔ" .. NASALIZED .. "?ɑʊɤ"
local vowel = "[" .. front_vowel .. back_vowel .. "ɨ]"

local spat1 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(h)([^ː])"
local spat2 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꬶ)([^ː])"
local spat3 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ƀ)([^ː])"
local spat4 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(đ)([^ː])"
local spat5 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꝺ)([^ː])"

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v, w)
	local otc = {}
	local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ",
		["ø"] = w and "ɤ" or "œ"}

	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	return table.concat(pss,"·")
end

function export.crux(term, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	local rules = {
		{".", {
			["ḍ"] = "ð", ["ṡ"] = "ɕ",
			["ḥ"] = "ʔ", ["į"] = "j",
			["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		}},
		{"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ł"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"},
		
		-- long consonants
		{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
		{"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
		{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"},
		{"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
		{"nɲ", "ɲː"}, {"hl", "ɬː"},
		
		-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
		{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
		
		-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
		{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, {"õ", "ɔ̃"},
		{"ả", "æː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"ẻu", "øː"}, {"õu", "ɔ̃ː̃"},
		
		{"^(ˈ)ꬶ([" .. front_vowel .. "])", "%1c%2"}, -- word-initial [k] palatalizes before front-vowels
		{"^(ˈ[ƀđꬶc])", "%1ʰ"}, -- voiceless stops word-initially become aspirated
		{"^(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be
		{"^(ˈ)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
		{"^(ˈ)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
		{"^(ˈ)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- ɣ > ɟ / #_[+back]

		-- other stem- and/or word-initial configurations
		{"đi", "ʨi"}, {"ꝺi", "ʥi"}, {"ɣi", "ɉi"}, {"ɣj", "jː"}, {"ɣjː", "ɟː"},	
		{"ˈƀ", "ˈp"}, {"ˈđ", "ˈt"}, {"ˈꬶ", "ˈk"}, {"ˈꝺ", "ˈd"},

		-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
		{spat1, "%1ʔ%3"}, {spat2, "%1k%3"}, {spat3, "%1p%3"}, {spat4, "%1t%3"}, {spat5, "%1ð%3"},
		
		-- internal consonant clusters
		{"ƀƀ", "ʔp"}, {"pƀ", "ʔp"},
		{"đđ", "ʔt"}, {"tđ", "ʔt"},
		{"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"},
		{"bm", "ʔp̚m"}, {"ꝺn", "ʔt̚n"}, {"ꬶn", "ʔk̚ŋ"},
		{"mn", "mnː"}, {"mʔk", "mkː"},
		{"(p[msɕ])", "%1ː"}, {"pr", "px"},
		{"b([sɕ])", "p%1"},
		{"nꬶ", "ŋk"}, {"([ðđʦłɕꬶr])v", "%1wː"},
		{"đn", "tnː"}, {"đr", "tx"}, {"đꬶv", "tkwː"},
		{"(ʦ[đlmn])", "%1ː"}, {"ʦꬶv", "ʦkwː"},
		{"ʦxv", "ʦxw"}, {"đx", "tːx"},
		{"(ð[mꬶ])", "%1ː"},
		{"ꝺx", "ðx"},
		{"(skl)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"},
		{"([lr])ʔ([ptk])", "%1%2ː"},
		{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
		{"(ł[mnꬶ])", "%1ː"},
		{"(ꬶ[msɕ])", "%1ː"}, {"ꬶl", "ʔł", {"w"}}, {"ꬶl", "klː"},
		{"ꬶsꬶ", "kskː"}, {"ꬶsl", "ksł"},
		{"ɣ([mn])", "ŋ%1ː"}, {"ɣ([vsl])", "k%1"},

		-- closed vowels
		{"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1, w) .. s2 end, {"w"}},
		{"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
		{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1, w) .. s2 end, {"w"}},
		{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
		{"ɑ$", "a"},
		{"(" .. vowel .. ")đ$", "%1ʔ%1"}, -- -Vt becomes -VʔV (or -Vht, not considered)
	
		--undo ligatures
		{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ł", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
		{"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"ɉ", "ɟ"},
	
		-- remove morpheme separator and possible double long vowel markers
		{"·", ""}, {"ːː", "ː"},
	}
	
	for _, rule in ipairs(rules) do
		local fordialect = rule[3]
		if not fordialect or m_table.contains(fordialect, "w") then
			word = rsub(word, rule[1], rule[2])
		end
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		a = {type = 'boolean', default = true},
		e = {type = 'boolean', default = true},
		w = {type = 'boolean', default = true},
		acc = {list = true},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local accent = args.acc
	local categories = {}
	
	local ipa = "* "
	
	if args.a then
		ipa = ipa .. line_format(export.crux(term, false, false), accent and #accent > 0 or {'Aingo'})
	end
	
	if args.e then
		if args.a then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(export.crux(term, true, false), accent and #accent > 0 or {'Eastern'})
		table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
	end
	
	if args.w then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(export.crux(term, false, true), accent and #accent > 0 or {'Western'})
		table.insert(categories, "Siwa terms with Western IPA pronunciation")
	end
	
return ipa .. ncategories(categories)
end

return export