Module:qlu-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 313: Line 313:


     local ret = separate_word(term)
     local ret = separate_word(term)
     if not ret then return "oops" end
     if not ret then return "hay ret" end
    if not ret.phonemic then return "no hay phonemic" end
    if not ret.dialectal then return "no hay dialectal" end
      
      
     local IPA_args = {{pron = '/' .. (ret.phonemic or "test") .. '/', a = "Standard Ravennese"}, {pron = '[' .. (ret.phonetic or "test") .. ']'}}
     local IPA_args = {{pron = '/' .. (ret.phonemic or "test") .. '/', a = "Standard Ravennese"}, {pron = '[' .. (ret.phonetic or "test") .. ']'}}

Revision as of 12:33, 26 January 2025


This module is still on development.

This module generates IPA pronunciation for Luthic words. Backend to {{qlu-IPA}}.


local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local lang = require("Module:languages").getByCode("qlu")
local m_table = require("Module:table")
local m_IPA = require("Module:IPA")
local c = require("Module:languages/data").chars

local export = {}

local voiced = "mnɲŋbdgβvzðɣʣʤlʎrɹɾʁʒʥǵ"
local voiceless = "ptʈkɸfsθʃxʦʧʨḱ"
local consonants = "[" .. voiced .. voiceless .. "ʷː]"

local front = "eêĕéiïíîIɛɪæyʏøœ"
local back = "uúûüoóôɔʊʌɑɒ"

local diacritics = c.acute .. c.breve .. c.circ .. c.diaer .. c.tilde
local vowels = "[aáâăɐ" .. front .. back .. diacritics .. "jw]"

local function dediacv(str, diacritic)
	return gsub(mw.ustring.toNFD(str), diacritic, "")
end

local function laxen(v)
	local otc = {}
	local switch = {["e"] = "i", ["i"] = "ɪ", ["u"] = "ʊ"}
		 
	for vc in gmatch(v, ".") do
		if switch[vc] then vc = gsub(vc, vc, switch[vc]) end
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

local function same(foo, bar)
	foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
	foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
	return foo == bar and true or false
end

local first_rules = {
	-- Greco-Roman digraphs
	{"^mn", "n"}, {"^tm", "m"}, {"^ps", "s"},
	
	{"ch", "k"}, {"g([ckqg])", "ŋ%1"},
	{"sc([eêĕéiïíî])", "ʃ%1"}, {"c([eêĕéiïíî])", "ʧ%1"},
	{"g([eêĕéiïíî])", "ʤ%1"}, {"gh", "g"},
	{"c", "k"}, {"ŋʤ", "dʤ"}, {"ŋʧ", "ŋk"}, {"dz", "ʣ"},
	
	{"ŋgü", "gǵ"}, {"gu(" .. vowels .. ")", "ǵ%1"},
	{"gli?", "ʎ"}, {"gni?", "ɲ"},
	{"(" .. vowels .. ")([ʣʎɲʃ])(" .. vowels .. ")", "%1%2%2%3"},
	{"([ʧʤ])i(" .. vowels .. ")", "%1%2"}, {"([ʧʤʎɲ])ï([ʧʤʎɲ])", "%1i%2"},
	
	{"ts", "ʦ"}, {"ph", "ɸ"}, {"th", "ʈ"}, {"h", ""},
	{"qu?", "ḱ"}, {"þ", "θ"}, {"v", "β"},
	
	{"%-", ""},
}

local phonemic_rules = {
	-- vowel digraphs
	{"[ăa]e", "ɛ"}, {"[ăa]u", "ɔ"}, {"[ĕe]i", "I"},
	
	-- unstressed /a/
	{"a", "ɐ"}, {"([ˈˌ])([^ɐ·a]*)ɐ", "%1%2a"},
	
	-- diphthongs
	{"[iI](" .. vowels .. ")", "j%1"}, {"(" .. vowels .. ")[Ii]", "%1j"},
	{"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")u", "%1w"},
	{"jw", "ju"}, {"wj", "wi"},
	
	-- nasal vowels
	{"(" .. vowels .. ")([mnŋɲ])([·ˈˌ]?)%2", "%1" .. c.tilde .. "%3%2"},
	{"(" .. vowels .. ")[mnŋɲ]([·ˈˌ]?)([ɸβfvszθðʃxɣ])", "%1" .. c.tilde .. "%2%3"},
	{"[ɛI]" .. c.tilde, "ẽ"}, {"ɔ" .. c.tilde, "õ"},
}

local advanced = u(0x031F); local lowered = u(0x031E)
local retracted = u(0x0320); local raised = u(0x031D)
local laminal = u(0x033B); local dental = u(0x032A)
local phonetic_rules = {
	-- Gorgia Toscana
	{"(" .. vowels .. "·)p(" .. vowels .. ")", "%1ɸ%2"},
	{"(" .. vowels .. "·)b(" .. vowels .. ")", "%1β%2"},
	{"(" .. vowels .. "·)t(" .. vowels .. ")", "%1θ%2"},
	{"(" .. vowels .. "·)d(" .. vowels .. ")", "%1ð%2"},
	{"(" .. vowels .. "·)k(" .. vowels .. ")", "%1x%2"},
	{"(" .. vowels .. "·)g(" .. vowels .. ")", "%1ɣ%2"},
	
	{"I", "ɛ"},
	{"^ɸ", "f"}, {"(" .. consonants .. ")([·ˈˌ]?)ɸ", "%1%2f"},
	{"^β", "v"}, {"(" .. consonants .. ")([·ˈˌ]?)β", "%1%2v"},
	{"ḱ", "kʷ"}, {"ǵ", "ɡʷ"}, 
	{"k([ieɛj])", "k" .. advanced .. "%1"}, {"g([ieɛj])", "g˖%1"},
	{"k([oɔu])", "q%1"}, {"g([oɔu])", "ɢ%1"},
	{"([kg])ʷ([ieɛj])", "%1ᶣ%2"},
	{"ŋ([·ˈˌ]?)([kg])([ʷᶣ])", "ŋ%3%1%2%3"},
	{"ŋ([·ˈˌ]?)([qɢ])", "ɴ%1%2"}, {"ŋ([·ˈˌ]?)([kg])([" .. advanced .. "˖])", "ŋ˖%1%2%3"},
	
	-- vowels 
	{"ɔ", "ɔ" .. advanced}, 
	{"iw", "ɪw"}, {"uw", "u" .. lowered .. "w"},
	{"ew", "e" .. c.diaer .. lowered .. "w"}, {"ow", "o" .. lowered .. "w"},
	{"ɛw", "æ" .. c.diaer .. "w"}, {"ɔ" .. advanced .. "w", "ʌw"},
	{"ɐw", "ɒw"}, {"aw", "ɑw"},
	{"ij", "ɪj"}, {"uj", "u" .. lowered .. "j"},
	{"ej", "e" .. advanced .. "j"}, {"oj", "o" .. advanced .. "j"},
	{"ɛj", "ɛ" .. raised .. "j"}, {"ɔ" .. advanced .. "j", "ɐ" .. raised .. "j"},
	{"ɐj", "ɔ" .. raised .. "j"}, {"aj", "a" .. c.diaer .. raised .. "j"},
	
	-- consonants
	{"ʈ", "t"}, {"ʦ", "t͡s"}, {"ʣ", "d͡z"},
	{"([ntdszl])", "%1" .. laminal}, {"(͡[sz])" .. laminal, "%1" .. dental}, 
	{"[ʤʧʃ]", "%1ʷ"}, 
}

local upper_rules = {
	{"a", "ɐ"}, {"ɸ", "f"}, {"β", "v"}, {"θ", "t"}, {"ð", "d"},
	{"^[ˌˈ]", ""}, {"[ˌˈ]", "·"}, {"ju", "y"}, {"[ɛI]", "ɐj"}, {"ɔ", "aw"},
	{"r", "ɾ"}, {"r·r", "ʀ"}, {"d·([ʣʤ])", "·%1"}, {"t·([ʦʧ])", "·%1"},
	{"ʣ", "z"}, {"ʦ", "s"}, {"ʧ", "ʃ"}, {"ʤ", "ʒ"},
	{"(" .. consonants .. ")·%1", "·%1"},
	{"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"},
	{"ḱ", "k"}, {"ǵ", "g"}, {"ʎ", "ʎ" .. raised},
	{"k([ieɛj])", "c%1"}, {"g([ieɛj])", "ɟ%1"},
	{"ã", "ɐᵑ"}, {"ẽ", "eᵑ"}, {"ĩ", "iᵑ"}, {"õ", "oᵑ"}, {"ũ", "uᵑ"}, 
}

local last_rules = {
	-- Escaped characters
	{"ʤ", "d͡ʒ"}, {"ʧ", "t͡ʃ"}, {"ʈ", "t"}, {"ʦ", "t͡s"}, {"ʣ", "d͡z"},
	{"g", "ɡ"}, {"ḱ", "kʷ"}, {"ǵ", "ɡʷ"}, 
	
	{"·", "."},
}

local function syllabify(word)
	local clusters = m_table.listToSet({
		"s[ptʈkfɸ]", "z[bdgβʤmnlr]",
		"[fɸvβpbtʈdkgθð]r", "[fɸvβpbkg]l",
		"[fɸvβszpbtʈdkgmnŋɲ][jw]",
		"s[ptʈkfɸ]r", "z[bdg]r",
		"s[pk]l", "zbl",
		"[fɸvβpbtʈdkg]r[jw]",
		"gn", "[θð]vβr",
	})
	
	if not match(word, "·") then
		for cluster, _ in pairs(clusters) do
			word = gsub(word, "(" .. cluster .. vowels .. "+" .. consonants .. "?)", "·%1·")
		end
		
		word = gsub(word, "(" .. consonants .. vowels .. "+" .. consonants .. "?)", "·%1·")
		word = gsub(word, "··", "·"); word = gsub(word, "·$", ""); word = gsub(word, "^·", "")
		word = gsub(word, "(" .. consonants .. ")·(" .. vowels .. ")", "·%1%2")
		word = gsub(word, "·?(" .. consonants .. ")%1(" .. vowels .. "*)", "%1·%1%2")
		word = gsub(word, "·(" .. consonants .. ")$", "%1"); word = gsub(word, "^(" .. consonants .. ")·", "%1")
		word = gsub(word, "·(.)·", "%1·")
		--word = gsub(word, "·s(" .. voiceless .. ")", "s·%1")
		--word = gsub(word, "(" .. consonants .. ")s·(" .. voiceless .. ")", "%1·s%2")
		word = gsub(word, "ï(" .. vowels .. ")", "i·%1")
		word = gsub(word, "·?d([ʤʣ])", "d·%1"); word = gsub(word, "·tʦ", "t·ʦ")
		word = gsub(word, "·?ŋg", "ŋ·g"); word = gsub(word, "··", "·")
	end
	
	local syllables = split(word, "·");
	
	if #syllables == 1 then return table.concat(syllables) end -- account for monosyllables
	
	local first_stress = "[âêîôû]"
	local second_stress = "[áéíóú]"
	local unstressed = "[ăĕ]"
	for i, _ in ipairs(syllables) do
		if match(word, first_stress) then
			if match(syllables[i], first_stress) then
				syllables[i] = "ˈ" .. dediacv(syllables[i], c.circ)
			end
		elseif match(syllables[i], second_stress) then
			syllables[i] = "ˌ" .. dediacv(syllables[i], c.acute)
		end
		
		if match(word, "a[eu]") or match(word, "ei") then
			if match(syllables[i], "a[eu]") or match(syllables[i], "ei") then
				syllables[i] = "ˈ" .. syllables[i]
			end
		end 
	end
	
	local ret = syllables
	if not match(table.concat(ret, "·"), "ˈ") then
		ret[#ret-1] = "ˈ" .. ret[#ret-1]
	end
	
	return table.concat(ret, "·")
end

function export.crux(term, outputs)
	local ret = {["dialectal"] = {}}
	term = mw.ustring.lower(term)
	
	for _, rule in ipairs(first_rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	term = syllabify(term)
	term = term:gsub("·([ˈˌ])", "%1")
	term = term:gsub("ˌˌ", "ˌ")
	
	local phonemic = term
	for _, rule in ipairs(phonemic_rules) do
		phonemic = gsub(phonemic, rule[1], rule[2])
	end
	
	local phonetic = phonemic
	for _, rule in ipairs(phonetic_rules) do
		phonetic = gsub(phonetic, rule[1], rule[2])
	end
	
	local upper, bolognese, paulistan = phonemic, phonemic, phonemic
	for _, rule in ipairs(upper_rules) do
		upper = gsub(upper, rule[1], rule[2])
	end
	
	--[[for _, rule in ipairs(bolognese_rules) do
		bolognese = gsub(bolognese, rule[1], rule[2])
	end
	
	for _, rule in ipairs(paulistan_rules) do
		paulistan = gsub(paulistan, rule[1], rule[2])
	end]]
	
	for _, rule in ipairs(last_rules) do
		phonemic = gsub(phonemic, rule[1], rule[2])
		phonetic = gsub(phonetic, rule[1], rule[2])
		upper = gsub(upper, rule[1], rule[2])
		bolognese = gsub(bolognese, rule[1], rule[2])
		paulistan = gsub(paulistan, rule[1], rule[2])
	end

	-- testcases
	if outputs == "phonemic" then
		return phonemic
	elseif not outputs or outputs == "phonetic" then
		return phonetic
	elseif outputs == "test" then
		return "/" .. phonemic .. "/ [" .. phonetic .. "]"
	end
	
	ret.phonemic = phonemic
	ret.phonetic = phonetic
	ret.dialectal.upper = upper
	ret.dialectal.bolognese = bolognese
	ret.dialectal.paulistan = paulistan
	
    mw.logObject(ret)
	
	return ret
end

function merge_tables(foo, bar)
    local merged = {}

    for key, value in pairs(foo) do
        if type(value) == "table" and type(bar[key]) == "table" then
            merged[key] = merge_tables(value, bar[key])
        elseif type(value) == "string" and type(bar[key]) == "string" then
            merged[key] = value .. " " .. bar[key]
        else
            merged[key] = value or bar[key]
        end
    end

    for key, value in pairs(bar) do
        if merged[key] == nil then
            merged[key] = value
        end
    end

    return merged
end

function separate_word(term)
    local final_ret = nil

    for word in gsplit(term, " ") do
        local pron = export.crux(word)
        
        if final_ret == nil then
            final_ret = pron
        else
			final_ret = merge_tables(final_ret, pron)
        end
    end

    return final_ret
end

function export.show(frame)
    local parent_args = frame:getParent().args
    local params = {
        [1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "aggiu" or mw.title.getCurrentTitle().text },
    }
    local args = require("Module:parameters").process(parent_args, params)
    local term = args[1]

    local ret = separate_word(term)
    if not ret then return "hay ret" end
    if not ret.phonemic then return "no hay phonemic" end
    if not ret.dialectal then return "no hay dialectal" end
    
    local IPA_args = {{pron = '/' .. (ret.phonemic or "test") .. '/', a = "Standard Ravennese"}, {pron = '[' .. (ret.phonetic or "test") .. ']'}}
    for key, value in pairs(ret.dialectal) do
    	if value ~= ret.phonemic then
    		table.insert(IPA_args, {pron = '[' .. (value or "test") .. ']', a = sub(key, 1, 1), separator = "\n** "})
    	end
	end

	return "* " .. m_IPA.format_IPA_full({ lang = lang, items = IPA_args })
end

return export