Module:mg-pron

From Linguifex
Revision as of 13:57, 5 September 2021 by Sware (talk | contribs)
Jump to navigation Jump to search


local sub = mw.ustring.sub
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local NASAL = u(0x0303) -- COMBINING TILDE, ̃
local NONSYLL = u(0x032F) -- COMBINING INVERTED BREVE BELOW, ̯
local ADV = u(0x031F) -- COMBINING PLUS SIGN BELOW, ̟
local RET = u(0x0320) -- COMBINING MINUS SIGN BELOW, ̠
local CEN = u(0x0308) -- COMBINING DIAERESIS, ̈
local ACUTE = u(0x0301) -- COMBINING ACUTE ACCENT, ́

local velar = "[kɡɣɫw]"; local palatal = "[ɲʧʃʎ]"
local consonants = "[bkdhjlmnɲprɾstʃθβðɡɣzʧɫʎ]"
local vowels = "[áéíóúaɑɐeɪɛiɔʊouwJ" .. ADV .. RET .. CEN .. ACUTE .. "]"
local voiced = "[bdhʎjmnɲβðɡɣz]"

local function same(foo, bar)
	foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
	foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
	return foo == bar and true or false
end

local function remove_acute(str, with_stress)
	str = mw.ustring.toNFD(str)
	str = gsub(str, ACUTE, "")
	str = mw.ustring.toNFC(str)
	
	return (with_stress and "ˈ" or "") .. str
end

local export = {}

local desoften = {["β"] = "b", ["ð"] = "d", ["ɣ"] = "ɡ"}

local prelims = {
	{"r", "ɾ"}, {"ch", "ʧ"}, {"qu", "k"}, {"il", "ʎ"}, {"ñ", "ɲ"}, {"c", "k"},
	{"[bv]", "β"}, {"gu?", "ɣ"}, {"d", "ð"},  
	{"z", "θ"}, {"x", "ʃ"}, {"(" .. vowels .. ")i(" .. vowels .. ")", "%1j%2"},
	{"(" .. vowels .. ")u", "%1w"}, {"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")i", "%1J"}, {"i(" .. vowels .. ")", "J%1"}, 
}

local function syllabify(term, pos)
	term = gsub(term, "(" .. consonants .. "*)(" .. vowels .. "*)", "%1%2·")
	term = gsub(term, "··", "·"); term = gsub(term, "·$", "")
	term = gsub(term, "·(" .. consonants .. ")(" .. consonants .. ")(" .. vowels .. "*)", "%1·%2%3")
	term = gsub(term, "·(" .. consonants .. ")$", "%1")
	term = gsub(term, "·(" .. consonants .. ")·", "%1·")
	
	local syll = split(term, "·"); local noa = {}
	
	local monosyll = {["n"] = "ˈ", ["pron"] = "", ["particle"] = "(ˈ)", ["prep"] = "(ˈ)", ["conj"] = "(ˈ)"}
	
	if #syll == 1 then
		if not pos then error('Part of speech needed to determine stress') end
		syll[1] = remove_acute(syll[1])
		syll[1] = (monosyll[pos] or "(ˈ)") .. syll[1]
	else
		if match(term, "[áéíóú]") then
			for _, s in ipairs(syll) do
				s = remove_acute(s, match(s, "[áéíóú]") and true or false)
				table.insert(noa, s)
			end
		else
			syll[#syll - 1] = "ˈ" .. syll[#syll - 1]
		end
	end

	return table.concat(#noa > 1 and noa or syll, "·")
end

local rules = {
	{"([ɾs])·([ɾs])", function(s1, s2) return same(s1, s2) and "·ʰ" .. s1 or s1 .. s2 end}, -- rr and ss clusters, preaspirated
	{".$", {["a"] = "ɐ", ["e"] = "ɪ", ["o"] = "ʊ", ["n"] = NASAL}},
	{"l·", "ɫ·"}, {"l$", "ɫ"}, -- velarized [l]
	{"a(" .. velar .. ")", "ɑ" .. ADV .. "%1"}, {"(" .. velar .. ")a", "%1ɑ" .. ADV},		-- [a] retracts next to velar consonants
	{"a(" .. palatal .. ")", "a" .. RET .. "%1"}, {"(" .. palatal .. "a)", "%1" .. RET},	-- and advances next to palatal
	{"s·(" .. voiced .. ")", "z·%1"}, {"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"}, -- [s]-voicing
	{"[Jj]m$", "y" .. NASAL}, {"m$", "u" .. NASAL}, {"n·", NASAL .. "·"}, -- [m] and [n] behave as nasalizers in codas and word-finally
	
	{"e(" .. consonants .. ")·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. consonants .. ")·(" .. consonants .. ")", "ɔ%1·%2"}, 
	{"e(" .. consonants .. ")?·(" .. consonants .. "[aɐɑ])", "ɛ%1·%2"}, {"o(" .. consonants .. ")?·(" .. consonants .. "[aɐɑ])", "ɔ%1·%2"},
	{"(" .. vowels .. "i)", "%1" .. NONSYLL}, {"w", "u" .. NONSYLL}, {"J", "i" .. NONSYLL}, 
	
	{"^(ˈ?)([βðɣ])", function(foo, bar) return foo .. desoften[bar] end},
	{"([βðɣ])(·ˈ?)(" .. consonants .. ")", function(foo, bar, baz) return desoften[foo] .. bar .. baz end}, 
	{"(" .. consonants .. ")(·ˈ?)([βðɣ])", function(foo, bar, baz) return foo .. bar .. desoften[baz] end},
	
	{"ʧ", "(t)ʃ"}, {"a([^" .. RET .. "])", "a" .. CEN .. "%1"},	{"a" .. CEN .. NASAL, "a" .. RET .. NASAL},
	{"a" .. CEN .. "(ː?)" .. NASAL, "a" .. CEN .. NASAL .. "%1"},
	{"·ˈ", "ˈ"}, {"·", "."}, 
}

function export.crux(term, pos, g)
	term = mw.ustring.lower(term)
	
	for _, repl in ipairs(prelims) do
		term = gsub(term, repl[1], repl[2])
	end
	
	term = syllabify(term, pos)
	
	for _, rule in ipairs(rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	term = gsub(term, "[ɡɣ]([ei])", (g and "j" or "h") .. "%1")
	term = gsub(term, "([ɡɣ])", g and "%1" or "h")
	term = gsub(term, "hɑ" .. ADV, "ha" .. CEN)
	term = gsub(term, "a" .. CEN .. "(" .. velar .. ")", "ɑ" .. ADV .. "%1")
	term = gsub(term, "(ˈ..?[aɑ].?)", "%1ː")
	
	return term
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:'Gentium','DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Modern Gallaecian|key]])</sup>:&#32;" .. IPA_span(items)
end

function line_format(pronunciation, register)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(register, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, pos, g)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word , pos, g))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().text }, -- PAGENAME
		[2] = {},
		["pos"] = {},
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = frame.args[1] or mw.title.getCurrentTitle().nsText == 'Template' and "gueizuñe" or args[1]
	local pos = frame.args[2] or args.pos or args[2]
	
	local is_g = match(term, "g")
	local ipa = "* "
	
	ipa = ipa .. line_format(separate_word(term, pos), {'Standard Calá'})
	
	if is_g then
		ipa = ipa .. "\n* " .. line_format(separate_word(term, pos, true), {'g–h distinction'})
	end
	
	return ipa
end

return export