Module:qlu-pron

From Linguifex
Jump to navigation Jump to search

This module is still on development.
This module generates IPA pronunciation for Luthic words. Backend to {{qlu-IPA}}.
local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local lang = require("Module:languages").getByCode("qlu")
local m_table = require("Module:table")
local m_IPA = require("Module:IPA")
local c = require("Module:languages/data").chars

local export = {}

local voiced = "mnɲŋbdgβvzðɣʣʤlʎrɹɾʁʒʥǵ"
local voiceless = "ptʈkɸfsθʃxʦʧʨḱ"
local consonants = "[" .. voiced .. voiceless .. "ʷː]"

local front = "eêĕéiïíîIɛɪæyʏøœ"
local back = "uúûüoóôɔʊʌɑɒ"

local diacritics = c.acute .. c.breve .. c.circ .. c.diaer .. c.tilde
local vowels = "[aáâăɐ" .. front .. back .. diacritics .. "jw]"

local function dediacv(str, diacritic)
	return gsub(mw.ustring.toNFD(str), diacritic, "")
end

local function same(foo, bar)
	foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
	foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
	return foo == bar and true or false
end

local first_rules = {
	-- Greco-Roman digraphs
	{"^mn", "n"}, {"^tm", "m"}, {"^ps", "s"},
	
	{"ch", "k"}, {"g([ckqg])", "ŋ%1"},
	{"sc([eêĕéiïíî])", "ʃ%1"}, {"c([eêĕéiïíî])", "ʧ%1"},
	{"g([eêĕéiïíî])", "ʤ%1"}, {"gh", "g"},
	{"c", "k"}, {"ŋʤ", "dʤ"}, {"ŋʧ", "ŋk"}, {"dz", "ʣ"},
	
	{"ŋgü", "gǵ"}, {"gu(" .. vowels .. ")", "ǵ%1"},
	{"gli?", "ʎ"}, {"gni?", "ɲ"},
	{"(" .. vowels .. ")([ʣʎɲʃ])(" .. vowels .. ")", "%1%2%2%3"},
	{"([ʧʤ])i(" .. vowels .. ")", "%1%2"}, {"([ʧʤʎɲ])ï([ʧʤʎɲ])", "%1i%2"},
	
	{"ts", "ʦ"}, {"ph", "ɸ"}, {"th", "ʈ"}, {"h", ""},
	{"qu?", "ḱ"}, {"þ", "θ"}, {"v", "β"},
	
	--{"%-", ""},
}

local phonemic_rules = {
	-- vowel digraphs
	{"[ăa]e", "ɛ"}, {"[ăa]u", "ɔ"}, {"[ĕe]i", "I"},
	
	-- unstressed /a/
	{"a", "ɐ"}, {"([ˈˌ])([^ɐ·a]*)ɐ", "%1%2a"},
	
	-- diphthongs
	{"[iI](" .. vowels .. ")", "j%1"}, {"(" .. vowels .. ")[Ii]", "%1j"},
	{"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")u", "%1w"},
	{"jw", "ju"}, {"wj", "wi"},
	
	-- nasal vowels
	{"(" .. vowels .. ")([mnŋɲ])([·ˈˌ]?)%2", "%1" .. c.tilde .. "%3%2"},
	{"(" .. vowels .. ")[mnŋɲ]([·ˈˌ]?)([ɸβfvszθðʃxɣ])", "%1" .. c.tilde .. "%2%3"},
	{"[ɛI]" .. c.tilde, "ẽ"}, {"ɔ" .. c.tilde, "õ"},
}

local advanced = u(0x031F); local lowered = u(0x031E)
local retracted = u(0x0320); local raised = u(0x031D)
local laminal = u(0x033B); local dental = u(0x032A)
local phonetic_rules = {
	-- Gorgia Toscana
	{"(" .. vowels .. "·)p(" .. vowels .. ")", "%1ɸ%2"}, {"p$", "ɸ"},
	{"(" .. vowels .. "·)b(" .. vowels .. ")", "%1β%2"}, {"b$", "β"},
	{"(" .. vowels .. "·)t(" .. vowels .. ")", "%1θ%2"}, {"t$", "θ"},
	{"(" .. vowels .. "·)d(" .. vowels .. ")", "%1ð%2"}, {"d$", "ð"},
	{"(" .. vowels .. "·)k(" .. vowels .. ")", "%1x%2"}, {"k$", "x"},
	{"(" .. vowels .. "·)g(" .. vowels .. ")", "%1ɣ%2"}, {"g$", "ɣ"},
	
	{"I", "i"},
	{"^ɸ", "f"}, {"(" .. consonants .. ")([·ˈˌ]?)ɸ", "%1%2f"},
	{"^β", "v"}, {"(" .. consonants .. ")([·ˈˌ]?)β", "%1%2v"},
	{"ḱ", "kʷ"}, {"ǵ", "gʷ"}, 
	{"k([ieɛj])", "k" .. advanced .. "%1"}, {"g([ieɛj])", "g˖%1"},
	{"k([oɔu])", "q%1"}, {"g([oɔu])", "ɢ%1"},
	{"([kg])ʷ([ieɛj])", "%1ᶣ%2"},
	{"ŋ([·ˈˌ]?)([kg])([ʷᶣ])", "ŋ%3%1%2%3"},
	{"ŋ([·ˈˌ]?)([qɢ])", "ɴ%1%2"}, {"ŋ([·ˈˌ]?)([kg])([" .. advanced .. "˖])", "ŋ˖%1%2%3"},
	
	-- vowels 
	{"iw", "ɪw"}, 
	{"ɐw", "ɒw"}, {"aw", "ɑw"},
	{"ij", "ɪj"}, 
	
	-- consonants
	{"ʈ", "t"},
	{"[ʤʧʃ]", "%1ʷ"}, 
}

local narrow_phonetic_rules = {
	{"ɔ", "ɔ" .. advanced}, {"uw", "u" .. lowered .. "w"}, {"ew", "e" .. c.diaer .. lowered .. "w"}, {"ow", "o" .. lowered .. "w"},
	{"ɛw", "æ" .. c.diaer .. "w"}, {"ɔ" .. advanced .. "w", "ʌw"}, {"ej", "e" .. advanced .. "j"}, {"oj", "o" .. advanced .. "j"},
	{"uj", "u" .. lowered .. "j"}, {"ɛj", "ɛ" .. raised .. "j"}, {"ɔ" .. advanced .. "j", "ɐ" .. raised .. "j"},
	{"ɐj", "ɔ" .. raised .. "j"}, {"aj", "a" .. c.diaer .. raised .. "j"},
	
    {"ʦ", "t͡s"}, {"ʣ", "d͡z"},
	{"([ntdszl])", "%1" .. laminal}, {"(͡[sz])" .. laminal, "%1" .. dental}, 
}

local upper_rules = {
	{"a", "ɐ"}, {"ɸ", "f"}, {"β", "v"}, {"θ", "t"}, {"ð", "d"},
	{"^[ˌˈ]+", ""}, {"[ˌˈ]+", "·"}, {"··", "·"}, {"ju", "y"}, {"ew", "y"}, {"[ɛI]", "ɐj"}, {"ɔ", "aw"},
	{"r", "ɾ"}, {"r·r", "ʀ"}, {"d·([ʣʤ])", "·%1"}, {"t·([ʦʧ])", "·%1"},
	{"ʣ", "z"}, {"ʦ", "s"}, {"ʧ", "ʃ"}, {"ʤ", "ʒ"},
	{"(" .. consonants .. ")·%1", "·%1"},
	{"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"},
	{"ḱ", "k"}, {"ǵ", "g"}, {"ʎ", "ʎ" .. raised},
	{"k([ieɛj])", "c%1"}, {"g([ieɛj])", "ɟ%1"},
	{"ã", "ɐᵑ"}, {"ẽ", "eᵑ"}, {"ĩ", "iᵑ"}, {"õ", "oᵑ"}, {"ũ", "uᵑ"}, 
	
	-- final devoicing
	{"b$", "p"}, {"d$", "t"}, {"g$", "k"}, {"β$", "ɸ"}, {"z$", "s"},
}

local bolognese_rules = {
	{"ʧ", "ʦ"}, {"ʤ", "ʣ"},
	
	-- apophony
	{"e([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*[·ˈˌ])", "i%1"}, {"e([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*)$", "i%1"},
	{"ɛ([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*[·ˈˌ])", "e%1"}, {"ɛ([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*)$", "e%1"},
	{"o([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*[·ˈˌ])", "u%1"}, {"o([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*)$", "u%1"},
	{"ɔ([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*[·ˈˌ])", "o%1"}, {"ɔ([^·ˈˌ]*[·ˈˌ][^·ˈˌ]*[ijuw]+[^·ˈˌ]*)$", "o%1"},
	
	{"([ntdsθzðʦʣlrmŋɲɴ][·ˈˌ]?)[fɸ]", "%1p͡f"}, {"([ntdsθzðʦʣlrmŋɲɴ][·ˈˌ]?)s", "%1ʦ"}, {"([ntdsθzðʦʣlrmŋɲɴ][·ˈˌ]?)θ", "%1t͡θ"},
	{"ɣ", "ɣ˕"}, {"ð", "ð" .. lowered}, {"v", "ʋ"},
	
	{"(·[^·]*)ɛ", "%1e"}, {"(·[^·]*)ɔ", "%1o"}, {"^([^·]*)ɛ", "%1e"}, {"^([^·]*)ɔ", "%1o"}, 
	{"(·[^·]*)([eo])", "%1%2" .. lowered}, {"^([^·]*)([eo])", "%1%2" .. lowered},
}

local paulistan_rules = {
	{"^([ˈˌ]?)s(" .. consonants .. ")", "es%1%2"}, {"r", "ɾ"}, {"d·([ʣʤ])", "·%1"}, {"t·([ʦʧ])", "·%1"}, {"[ʧʦ]", "s"}, {"ʤ", "ʒ"},  
	{"(" .. consonants .. ")([ˈ·ˌ])%1", "%2%1"}, {"ð", "d"}, {"d([iĩjɪ])", "ʤ%1"}, {"t([iĩjɪ])", "ʧ%1"},
	{"^([ˈˌ]?)ɾ", "%1ʁ"}, {"ɾ([ˈˌ·]?)(" .. consonants .. ")", "ɹ%1%2"}, {"([pbtdkg])$", "%1ĭ"},
	{"([mnŋɲ])(" .. vowels .. ")([ˈˌ·]?)([mnŋɲ])", "%1%2" .. c.tilde .. "%3%4"},
	
	{"ḱ", "kw"}, {"ǵ", "gw"}, {"(" .. vowels .. ")([ˈˌ·]?)ɲ", "%1" .. c.tilde .. "%2j̃"},
	{"(·[^·ˈˌ]*)j", "%1ɪ̯"}, {"^(·[^·ˈˌ]*)j", "%1ɪ̯"},  
	{"(·[^·ˈˌ]*)w", "%1ʊ̯"}, {"^(·[^·ˈˌ]*)w", "%1ʊ̯"},  
	{"(·[^·ˈˌ]*)i", "%1ɪ"}, {"^(·[^·ˈˌ]*)i", "%1ɪ"},  
	{"(·[^·ˈˌ]*)u", "%1ʊ"}, {"^(·[^·ˈˌ]*)u", "%1ʊ"},  
	{"(·[^·ˈˌ]*)e", "%1i"}, {"^(·[^·ˈˌ]*)e", "%1i"}, 
	{"(" .. vowels .. ")([ˈˌ·]?)ŋ", "%1" .. c.tilde .. "%2w" .. c.tilde},
	{"ẽ", "ẽj" .. c.tilde}, {"õ", "õw" .. c.tilde}, {c.tilde .. c.tilde, c.tilde},
}

local last_rules = {
	-- Escaped characters
	{"ʤ", "d͡ʒ"}, {"ʧ", "t͡ʃ"}, {"ʈ", "t"}, {"ʦ", "t͡s"}, {"ʣ", "d͡z"},
	{"g", "g"}, {"ḱ", "kʷ"}, {"ǵ", "gʷ"}, {"I", "i"},
	
	{"·?ˈ·?", "ˈ"}, {"·", "."},
}

local function syllabify(word, no_stress)
	local clusters = m_table.listToSet({
		"s[ptʈkfɸ]", "z[bdgβʤmnlr]",
		"[fɸvβpbtʈdkgθð]r", "[fɸvβpbkg]l",
		"[fɸvβszpbtʈdkgmnŋɲ][jw]",
		"s[ptʈkfɸ]r", "z[bdg]r",
		"s[pk]l", "zbl",
		"[fɸvβpbtʈdkg]r[jw]",
		"gn", "[θð]vβr",
	})
	
	if not match(word, "·") then
		for cluster, _ in pairs(clusters) do
			word = gsub(word, "(" .. cluster .. vowels .. "+" .. consonants .. "?)", "·%1·")
		end
		
		word = gsub(word, "(" .. consonants .. vowels .. "+" .. consonants .. "?)", "·%1·")
		word = gsub(word, "··", "·"); word = gsub(word, "·$", ""); word = gsub(word, "^·", "")
		word = gsub(word, "(" .. consonants .. ")·(" .. vowels .. ")", "·%1%2")
		word = gsub(word, "(·?)(" .. consonants .. ")%2(" .. vowels .. "*)", "%2%1%2%3")
		word = gsub(word, "·(" .. consonants .. ")$", "%1"); word = gsub(word, "^(" .. consonants .. ")·", "%1")
		word = gsub(word, "·(.)·", "%1·")
		word = gsub(word, "·s(" .. voiceless .. ")", "s·%1")
		word = gsub(word, "(" .. consonants .. ")s·(" .. voiceless .. ")", "%1·s%2")
		word = gsub(word, "ï(" .. vowels .. ")", "i·%1"); word = gsub(word, "ḱ·ḱ", "k·ḱ")
		word = gsub(word, "·?d([ʤʣ])", "d·%1"); word = gsub(word, "·tʦ", "t·ʦ")
		word = gsub(word, "·?ŋg", "ŋ·g"); word = gsub(word, "··", "·")
		
		for cluster, _ in pairs(clusters) do
			word = gsub(word, "(" .. consonants .. "+)·(" .. consonants .. "+)", function(c1,c2) return match(c1 .. c2, cluster) and "·" .. c1 .. c2 or c1 .. "·" .. c2 end)
		end
		
		word = gsub(word, "%-", "·"); word = gsub(word, "··", "·")
	end
	
	local syllables = split(word, "·");
	
	if no_stress or (#syllables == 1 and not match(word, "[" .. c.acute .. c.circ .. "]")) then
		return dediacv(word, "[" .. c.acute .. c.circ .. "]")
	end
	
	local first_stress = "[âêîôû]"
	local second_stress = "[áéíóú]"
	local unstressed = "[ăĕ]"
	for i, _ in ipairs(syllables) do
		if match(word, first_stress) then
			if match(syllables[i], first_stress) then
				syllables[i] = "ˈ" .. dediacv(syllables[i], c.circ)
			end
		elseif match(syllables[i], second_stress) then
			syllables[i] = "ˌ" .. dediacv(syllables[i], c.acute)
		end
		
		if match(word, "a[eu]") or match(word, "ei") then
			if match(syllables[i], "a[eu]") or match(syllables[i], "ei") then
				syllables[i] = "ˈ" .. syllables[i]
			end
		end 
	end
	
	local ret = syllables
	if not match(table.concat(ret, "·"), "ˈ") then
		ret[#ret-1] = "ˈ" .. ret[#ret-1]
	end
	
	return table.concat(ret, "·")
end

function export.crux(term, no_stress)
	local ret, dialects = {}, {}
	term = mw.ustring.lower(term)
	
	for _, rule in ipairs(first_rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	term = syllabify(term, no_stress)
	term = term:gsub("·?([ˈˌ])·?", "%1")
	term = term:gsub("ˌˌ", "ˌ")
	
	local phonemic = term
	for _, rule in ipairs(phonemic_rules) do
		phonemic = gsub(phonemic, rule[1], rule[2])
	end
	
	local phonetic = phonemic
	for _, rule in ipairs(phonetic_rules) do
		phonetic = gsub(phonetic, rule[1], rule[2])
	end
	
	local upper, bolognese, paulistan = phonemic, phonetic, phonemic
	for _, rule in ipairs(upper_rules) do
		upper = gsub(upper, rule[1], rule[2])
	end
	
	for _, rule in ipairs(bolognese_rules) do
		bolognese = gsub(bolognese, rule[1], rule[2])
	end
	
	for _, rule in ipairs(paulistan_rules) do
		paulistan = gsub(paulistan, rule[1], rule[2])
	end
	
	for _, rule in ipairs(narrow_phonetic_rules) do
		phonetic = gsub(phonetic, rule[1], rule[2])
	end
	
	for _, rule in ipairs(last_rules) do
		phonemic = gsub(phonemic, rule[1], rule[2])
		phonetic = gsub(phonetic, rule[1], rule[2])
		upper = gsub(upper, rule[1], rule[2])
		bolognese = gsub(bolognese, rule[1], rule[2])
		paulistan = gsub(paulistan, rule[1], rule[2])
	end
	
	--[[if outputs then
		-- testcases
		if outputs == "phonemic" then
			return phonemic
		elseif not outputs or outputs == "phonetic" then
			return phonetic
		elseif outputs == "test" then
			return "/" .. phonemic .. "/ [" .. phonetic .. "]"
		end
	end]]

	dialects["paulistan"] = paulistan
	dialects["bolognese"] = bolognese
	dialects["upper"] = upper
	
	return {phonemic, phonetic}, dialects
end

function merge_tables(foo, bar)
    local merged_table = {}
    
    for key, value1 in pairs(foo) do
        local value2 = bar[key]
        if value2 then
            merged_table[key] = value1 .. " ".. value2
        else
            merged_table[key] = value1
        end
    end
    
    for key, value2 in pairs(bar) do
        if not merged_table[key] then
            merged_table[key] = value2
        end
    end
    
    return merged_table
end


function separate_word(term, no_stress)
    local phonemic, phonetic, dialects = {}, {}, {}
	local m, t, d = "", "", {}
	
	if match(term, " ") then
	    for word in gsplit(term, " ") do
	        local ret, d = export.crux(word, no_stress)
	        m = ret[1]; t = ret[2]
	        
	        if #phonemic == 0 or #phonetic == 0 then
	            phonemic, phonetic, dialects = {m}, {t}, d
	        else
	        	table.insert(phonemic, m)
	        	table.insert(phonetic, t)
	        	d = merge_tables(dialects, d)
	        end
	    end
    else
    	return export.crux(term, no_stress)
    end

    return {table.concat(phonemic, " "), table.concat(phonetic, " ")}, dialects
end

function export.show(frame)
    local parent_args = frame:getParent().args
    local params = {
        [1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "agghiu" or mw.title.getCurrentTitle().text },
        ["rs"] = {type = 'boolean'},
    }
    local args = require("Module:parameters").process(parent_args, params)
    local term = args[1]; local no_stress = args.rs
	local ret, dialects = separate_word(term, no_stress)
	
    local phonemic = ret[1]; local phonetic = ret[2]
    
    local IPA_args = {{pron = '/' .. (phonemic or "test") .. '/', a = {"Standard Ravennese"}}, {pron = '[' .. (phonetic or "test2") .. ']'}}
    for key, value in pairs(dialects) do
    	if value ~= phonemic and value ~= phonetic  then
    		table.insert(IPA_args, {pron = '[' .. value .. ']', a = {sub(key,1,1)}, separator = "\n** "})
    	end
	end

	return "* " .. m_IPA.format_IPA_full({ lang = lang, items = IPA_args })
end

return export