Module:xchc-pron

From Linguifex
Jump to navigation Jump to search


local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local lang = require("Module:languages").getByCode("xchc")
local c = require("Module:languages/data").chars
local m_IPA = require("Module:IPA")

local M = c.macron
local B = c.breve
local D = c.diaer
local nb_cons = "mnŋɲptbdkɡfvszʃʒxɣhʧʦʤʣlrɾʎ"
local consonants = "[" .. nb_cons .. "]"

local consonantsAsIs = "[мньӈбдгвзжғлрйпткфсшӀчц]"
local vowels = "[ыиэеаяүөуюоёӯӣɜɔ]"

local export = {}

local first_rules = {
	{"%-", ""}, {M, "ː"}, {"ː([" .. M .. D .. B .. "])", "%1ː"},
	
	{"е" .. D, "ьo"}, {"о" .. B, "ɔ"}, {"э" .. B, "ɛ"}, {"и" .. B, "j"},
	
	{"а", "a"},
	{"б", "b"},
	{"в", "v"},
	{"г", "ɡ"},
	{"ғ", "ɣ"},
	{"дз", "ʣ"}, {"дж", "ʤ"}, {"д", "d"},
	{"е", "ьe"},
	{"ж", "ʒ"},
	{"з", "z"},
	{"и", "ьi"},
	{"к", "k"},
	{"л", "l"},
	{"м", "m"},
	{"н", "n"},
	{"ӈ", "ŋ"},
	{"о", "o"},
	{"ө", "ø"},
	{"п", "p"},
	{"р", "r"},
	{"т", "t"},
	{"с", "s"},
	{"у", "u"},
	{"ү", "y"},
	{"ф", "f"},
	{"[хx]ӏ", "h"}, {"х", "x"},
	{"ц", "ʦ"},
	{"ч", "ʧ"},
	{"ш", "ʃ"},
	{"ы", "i"},
	{"э", "e"},
	{"ю", "ьu"},
	{"я", "ьa"},
	
	{"l[ьі]", "ʎ"}, {"n[ьі]", "ɲ"}, {"[ьі]", "j"},
	
	{"(" .. consonants .. ")%1", "%1ː"},
	{"r$", "ɾ"}, {"([ŋkɡxɣh])a", "%1ɑ"}, {"a(ː?[ŋkɡxɣh])", "ɑ%1"}
}

local last_rules = {
	{"ʣ", "d͡z"},
	{"ʤ", "d͡ʒ"},
	{"ʦ", "t͡s"},
	{"ʧ", "t͡ʃ"},
}

local wnc = "([iyueøoɛɔœʌ]ː?)"
local western_rules = {
    {wnc .. "f$", "%1p̪͡f"},
    {wnc .. "v$", "%1b̪͡v"},
	{wnc .. "s$", "%1t͡s"},
    {wnc .. "z$", "%1d͡z"},
    {wnc .. "ʃ$", "%1t͡ʃ"},
    {wnc .. "ʒ$", "%1d͡ʒ"},
    {wnc .. "x$", "%1k͡x"},
    {wnc .. "ɣ$", "%1ɡ͡ɣ"},
    {wnc .. "h$", "%1ʔ͡h"},
    {wnc .. "p$", "%1ɸ"}, {wnc .. "b$", "%1β"}, {wnc .. "t$", "%1s"}, 
    {wnc .. "d$", "%1z"}, {wnc .. "k$", "%1x"}, {wnc .. "ɡ$", "%1ɣ"},
}

local surgut_rules = {
	{"ɲ", "nʲ"}, {"ʎ", "lʲ"},
	{"(" .. consonants .. ")([iyeø])", "%1ʲ%2"},
}

-- Remove diacritics for module calculations based on vowels
function export.simplify(term)
	term = mw.ustring.toNFD(term):gsub(c.macron, "") -- vowel length 
	term = term:gsub("э" .. c.breve, "ɛ"); term = term:gsub("о" .. c.breve, "ɔ")
	term = mw.ustring.toNFC(term):gsub("ё", "о"); term = term:gsub("е" .. c.diaer, "о")
	
	return term
end

function export.crux(term)
	for _, rule in ipairs(first_rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	for _, rule in ipairs(last_rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	return term
end

function export.harmony(term)
	term = export.simplify(term)
	local termR, termB = term, term
	
	local unrounded = "[ыиӣеэ]"; local rounded = "[үөуӯюоё]"; local neutralR = "[ɛɔая]"
	local front = "ɛ"; local back = "ɔ"; local neutralB = "[ыиӣеэүөуӯюоёая]"
	
	-- for words that violate vowel harmony (compounds and loanwords)
	local exception = (term:match(unrounded) and term:match(rounded)) or (term:match(front) and term:match(back))
	if exception then
		termR = term:gsub(neutralR, ""); termB = term:gsub(neutralB, "")
		termR = termR:match("(".. vowels .. consonantsAsIs .. "*)$") or termR; termB = termB:match("(" .. vowels .. consonants .. "*)$") or termB
	end
	
	local ret = {
		["roundness"] = termR:match(unrounded) and "u" or termR:match(rounded) and "r" or "ar",
		["backness"] = termB:match(front) and "f" or termB:match(back) and "b" or "ab",
	}
	return ret
end


function separate_word(term)
	local result = {}
	
	for word in gsplit(term, " ") do
		local ipa = export.crux(word)
		
		if export.harmony(word).backness == "b" then ipa = ipa:gsub("a", "ɑ") end
		if export.harmony(word).roundness == "r" then ipa = ipa:gsub("ɛ", "œ") end
		if export.harmony(word).roundness == "u" then ipa = ipa:gsub("ɔ", "ʌ") end
		
		table.insert(result, ipa)
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local parent_args = frame:getParent().args
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "чы̄ӈымэ̆ц" or mw.title.getCurrentTitle().text },
	}
	local args = require("Module:parameters").process(parent_args, params)
	local term = mw.ustring.lower(mw.ustring.toNFD(args[1]))

	local IPA_args = {}
	
	local phonetic = separate_word(term)
	
	local western = phonetic
	for _, rule in ipairs(western_rules) do
		western = gsub(western, rule[1], rule[2])
	end
	local ipa1 = (western ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple(lang, {{pron='[' .. western .. ']', q={"Western"}}}) or ""
	
	local surgut = phonetic
	for _, rule in ipairs(surgut_rules) do
		surgut = gsub(surgut, rule[1], rule[2])
	end
	local ipa2 = (surgut ~= phonetic) and "\n** " .. m_IPA.format_IPA_multiple(lang, {{pron='[' .. surgut .. ']', q={"Surgut"}}}) or ""

	return "* " .. m_IPA.format_IPA_full{lang = lang, items = {{pron='[' .. phonetic .. ']'}}} .. ipa1 .. ipa2
end

return export