Module:zm-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(82 intermediate revisions by 2 users not shown)
Line 7: Line 7:
local split = mw.text.split
local split = mw.text.split
local gsplit = mw.text.gsplit
local gsplit = mw.text.gsplit


-- To avoid weird annoying cursor behavior
-- To avoid weird annoying cursor behavior
local TILDE, NASAL = u(0x0303), u(0x0303) -- COMBINING TILDE ̃◌
local TILDEBELOW, CREAKY = u(0x0330), u(0x0330) -- COMBINING TILDE BELOW ̰◌
local SPH = CREAKY .. CREAKY -- sphincteric or strident vowel
local GRAVE = u(0x0300) -- COMBINING GRAVE ACCENT ̀◌
local GRAVE = u(0x0300) -- COMBINING GRAVE ACCENT ̀◌
local HIGHFALL = "˥˦"
local ACUTE = u(0x0301) -- COMBINING ACUTE ACCENT
local SYLLABIC = u(0x0329) -- COMBINING VERTICAL LINE BELOW ̩◌
local SYLL = u(0x0329) -- COMBINING VERTICAL LINE BELOW ̩◌
local SYLLABICA = u(0x030D) -- COMBINING VERTICAL LINE ABOVE ̍◌
local VL = u(0x0325) -- COMBINING RING BELOW ̥◌
local DENTAL = u(0x032A) -- COMBINING BRIDGE BELOW ̪◌
local INTERDENTAL = DENTAL .. u(0x0346) -- COMBINING BRIDGE BELOW AND ABOVE ̪͆◌
local VOICELESS = u(0x0325) -- COMBINING RING BELOW ̥◌
local AFFR = u(0x0361) -- COMBINING DOUBLE INVERTED BREVE ͡
local AFFR = u(0x0361) -- COMBINING DOUBLE INVERTED BREVE ͡


local back_vowel = "aouɔ"
local back_vowel = "aouɔǫō"
local front_vowel = "ieɛ"
local front_vowel = "ieɛɪ"
local vowel = "[" .. back_vowel .. front_vowel .. "]"
local vowels = "[" .. back_vowel .. front_vowel .. "]"
local oral_to_nasal = {["a"] = "ã", ["i"] = "ĩ", ["ɔ"] = "ṍ", ["u"] = "ᴍ"} -- ṍ = ɔ̃
local acuted = "[áéíóúýÉÓ]"
local nasal_to_oral = {["ã"] = "a" .. NASAL, ["ĩ"] = "i" .. NASAL, ["ṍ"] = "ɔ" .. NASAL}
local oral = "áéíóúýÉÓaouɔieɛ"
local modal_to_glottal = {["a"] = "à", ["e"] = "è", ["i"] = "ì", ["ɔ"] = "ò", ["u"] = "ù"}
local palatal = "[ʨjʎʃʒʥʤʧʦʣɕʑ]"
local glottal_to_modal = {["à"] = "a" .. SPH, ["è"] = "e" .. SPH, ["ì"] = "i" .. SPH, ["ò"] = "ɔ" .. CREAKY, ["ù"] = "u" .. CREAKY}
local nasalized = "[ãĩṍᴍ]"
local glottalic = "[àèìòù]"
local oral = "[aeiou]"
local palatal = "[ʨjʎʃʒʥʤʧ]"
local sonorant = "[rl]"
local sonorant = "[rl]"
local click = "ǀǃʘǂǁ"
local plosives = "[ptkbdg]"
local SYLLA = u(0x0329)
local velar = "[ɡkx]"
local SYLL = "[]"
local consonants = "[mnptkbdɡfvszxrljɲŋʋʎʨjʎʃʒʥʤʧʤʣʦɕʑ]"
local consonant = "[mnptkbdgfvszšžhrljćźč]"
 
 
local function same(foo, bar)
foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
return foo == bar and true or false
end


local export = {}
local export = {}


local rules = {
local rules = {
{"h", "x"}, {"ai", "ɛ"}, {"au", "ɔ"}, {"è", "ɛ"}, {"ò", "ɔ"}, {"dź", "ʥ"}, {"dž", "ʤ"},
{"ai", "ɛ"}, {"", "É"}, {"au", "ɔ"}, {"aú", "Ó"}, {"è", "ɛ"}, {"ò", "ɔ"}, {"y", "ɪ"},
{"ć", "ʨ"}, {"nj", "ɲ"}, {"č", "ʧ"}, {"ž", "ʒ"},  {"š", "ʃ"},
{"dź", "ʥ"}, {"dž", "ʤ"}, {"c", "ʦ"}, {"ć", "ʨ"}, {"nj", "ɲ"}, {"č", "ʧ"}, {"ž", "ʒ"},  {"š", "ʃ"}, {"lj", "ʎ"},
{"(" .. palatal .. ")e", "ɛ%1"},
{"h", "x"}, {"g", "ɡ"}, {"ǫ", "o"}, {"ō", "o"}, {"", "ʃt͡ʃ"}, {"ou", "u"}, {"ś", "ɕ"}, {"ź", "ʑ"},
{"(" .. palatal .. ")o", "ɔ%1"},
{"r(" .. consonant .. ")", "r̩%1"},
{"(" .. vowel .. ")r̩", "r%1"},
{"ŕ(" .. consonant .. ")", "ŕ̩%1"},
{"(" .. vowel .. ")r̩", "ŕ%1"},
{"(" .. consonant .. ")r", "ər%1"},
{"(" .. vowels .. ")[lv]$", "%"}, {"(" .. acuted .. ")[lv]$", "%"},
{"(" .. vowel .. ")̩", "r%1"},
{"(" .. consonant .. ")ŕ", "ə́r%1"},
{"r̩(" .. vowel .. ")", "r%1"}
,
{"(" .. palatal .. ")e", "ɛ%1"},
{"([ḛḭṵaɔ]" .. TILDEBELOW .. "?)", {["ḛ"] = "è", ["ḭ"] = "ì", ["ṵ"] = "ù", ["a" .. TILDEBELOW] = "à", ["ɔ" .. TILDEBELOW] = "ò"}},
{"(" .. plosives .. ")[ptkbdg]$", "[ptkbdg]"},
{"([" .. click .. "])(" .. glottalic .. ")", "%1ˀ%2"}, -- ꞰV̰ = ꞰˀV̰
{"pt" , "t"}, {"pk" , "k"}, {"tp" , "p"}, {"tk" , "k"}, {"kp" , "p"}, {"kt" , "t"},
-- tell apart between regular and syllabic <m>
{"bd" , "d"}, {"bg" , "g"}, {"db" , "b"}, {"dg" , "g"}, {"gb" , "b"}, {"gd" , "d"},
{"(" .. sonorant .. ")(" .. glottalic .. ")", "%1" .. CREAKY .. "%2"}, -- MV̰ > M̰V̰
{"[mᴟ](" .. vowel .. ")", "ᴟᵇ%1"}, {"ŋ(" .. vowel .. ")", "ŋᶢ%1"}, {"ɳ(" .. vowel .. ")", "ᶯɖ%1"},
{"(" .. vowel .. ")(".. vowel .. ")", function(s1, s2) return same(s1, s2) and s1 .. "ː˧" or s1 .. s2 .. "˧" end},
{"(" .. vowel .. ")(" .. nasalized .. ")",
function(s1, s2)
return same(s1, s2) and s2 .. "ː" .. HIGHFALL or oral_to_nasal[s1] .. s2 .. HIGHFALL
end
},
{"(" .. glottalic .. ")([" .. front_vowel .. "])",
function(s1, s2)
return same(s1, s2) and s1 .. "ː˦" or s1 .. modal_to_glottal[s2] .. "˦"
end
},
{"(" .. glottalic .. ")([" .. back_vowel .. "])",
function(s1, s2)
return same(s1, s2) and s1 .. "ː˨" or s1 .. modal_to_glottal[s2] .. "˨"
end
},
{"(" .. glottalic .. ")(" .. nasalized .. ")",
function(s1, s2)
return same(s1, s2) and glottal_to_modal[s1] .. NASAL .. "ː˧" or glottal_to_modal[s1] .. NASAL .. s2 .. "˧"
end
},
{"(" .. glottalic .. "ʼ[" .. front_vowel .. "])", "%1˦"}, {"(" .. glottalic .. "ʼ[" .. back_vowel .. "])", "%1˨"},
{"(" .. glottalic .. ")ʼ(" .. nasalized .. ")", function(s1, s2) return glottal_to_modal[s1] .. NASAL .. "ʔ" .. s2 .. HIGHFALL end},
{"([^uɯɔɑieaʼ])m([^uɯɔɑieaʼ])", "%1ᴍ%2"}, {"u" .. CREAKY .. NASAL, "" .. CREAKY},
{"(" .. consonants ..")([])(" .. consonants ..")", "%%2" .. SYLL .. "%3"}, {"(" .. consonants ..")r$", "%1ər"},
{"ᴍᴍ", "m" .. SYLLABIC .. "ː"}, {"ùᴍ", "m" .. CREAKY .. SYLLABICA .. "ː˧"},
{"n(" .. velar .. ")", "ŋ%1"},
{"ᴍʼᴍ", "m" .. SYLLABICA .. CREAKY .. "ʔm" .. SYLLABIC .. "˧"}, {"" .. CREAKY .. "", "m" .. CREAKY .. SYLLABICA .. "ː"},
{"p(" .. vowels .. ")", "pʰ%1"}, {"t(" .. vowels .. ")", "tʰ%1"}, {"k(" .. vowels .. ")", "kʰ%1"},
{"ᴍ" .. CREAKY, "m" .. CREAKY .. SYLLABICA}, {"ṍṍ", "ṍː"}, {"", "ɔ" .. NASAL}, {"(" .. glottalic .. ")", function(s1) return glottal_to_modal[s1] end},
{"p(" .. acuted .. ")", "pʰ%1"}, {"t(" .. acuted .. ")", "tʰ%1"}, {"k(" .. acuted .. ")", "kʰ%1"},
{"ᴍ", "m" .. SYLLABIC},
{"(" .. palatal .. ")e", "%1ɛ"}, {"(" .. palatal .. ")é", "%1É"},
{"(" .. palatal .. ")o", "%1ɔ"}, {"(" .. palatal .. ")ó", "%1Ó"},
{"É", "ɛ" .. ACUTE}, {"Ó", "ɔ" .. ACUTE}, {"ý", "ɪ" .. ACUTE},
}
}


function export.crux(term)
local affricates = {
term = term:gsub("N", "ɴ"); term = term:gsub("Ɠ", "ʛ")
{"ʥ", "d͡ʑ"}, {"ʤ", "d͡ʒ"}, {"ʦ", "t͡s"}, {"ʨ", "t͡ɕ"}, {"ʧ", "t͡ʃ"}, {"ʣ", "d͡z"},
}
 
function export.crux(term, d)
term = mw.ustring.lower(term)
term = mw.ustring.lower(term)
for _, rule in ipairs(rules) do
for _, rule in ipairs(rules) do
term = gsub(term, rule[1], rule[2])
term = gsub(term, rule[1], rule[2])
end
if d == "c" then term = gsub(term, "ʨ", "ʧ"); term = gsub(term, "ʥ", "ʤ") end
if d == "s" then term = gsub(term, "ʨ", "ʦ"); term = gsub(term, "ʥ", "ʣ"); term = gsub(term, "ʰ", "") end
if d == "s" then term = gsub(term, "aj", "ɛː"); term =gsub(term, "ej", "ɛː") end
if d == "c" then term = gsub(term, "aj", "ɛː"); term =gsub(term, "ej", "ɛː") end
if d == "c" then term = gsub(term, "a", "ɐ"); term = gsub(term, "e", "ɪ"); term = gsub(term, "i", "ɪ"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "ʊ")  end
if d == "s" then term = gsub(term, "a", "ɐ"); term = gsub(term, "e", "ɛ"); term = gsub(term, "i", "e"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "o"); term = gsub(term, "ɪ", "i"); term = gsub(term, "ɪ́", "í"); term = gsub(term, "ų", "u")  end
if d == "n" then term = gsub(term, "a", "ə"); term = gsub(term, "e", "ə"); term = gsub(term, "i", "y"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "ʊ"); term = gsub(term, "ɪ", "ɨ"); term = gsub(term, "ɪ́", "ɨ́"); term = gsub(term, "y", "ɪ")  end
for _, aff in ipairs(affricates) do
term = gsub(term, aff[1], aff[2])
end
end
return term
return term
end
end


function IPA_span(items)
function IPA_span(items)
Line 123: Line 90:


function format_IPA(items)
function format_IPA(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for e|key]])</sup>:&#32;" .. IPA_span(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for Zemljask|key]])</sup>:&#32;" .. IPA_span(items)
end
end


function line_format(pronunciation)
function line_format(pronunciation, dialect)
local full_pronunciations = {}
local full_pronunciations = {}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
table.insert(full_pronunciations, format_IPA(IPA_args))
table.insert(full_pronunciations, format_IPA(IPA_args))
return table.concat(full_pronunciations)
return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end
end


function separate_word(term)
function separate_word(term, d)
local result = {}
local result = {}
for word in gsplit(term, " ") do
for word in gsplit(term, " ") do
table.insert(result, export.crux(word))
table.insert(result, export.crux(word, d))
end
end
Line 145: Line 112:
function export.show(frame)
function export.show(frame)
local params = {
local params = {
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "ǂAː Ṇṵĩ" or mw.title.getCurrentTitle().text },
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "dźélo" or mw.title.getCurrentTitle().text },
}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
Line 151: Line 118:
local ipa = "* "
local ipa = "* "
ipa = ipa .. line_format(separate_word(term))
ipa = ipa .. line_format(separate_word(term), {'Standard'})
if export.crux(term, "c") ~= export.crux(term) then
ipa = ipa .. "\n* "
ipa = ipa .. line_format(separate_word(term, "n"), {'Northern'})
end
if export.crux(term, "s") ~= export.crux(term) then
ipa = ipa .. "\n* "
ipa = ipa .. line_format(separate_word(term, "c"), {'Central'})
end
if export.crux(term, "n") ~= export.crux(term) then
ipa = ipa .. "\n* "
ipa = ipa .. line_format(separate_word(term, "s"), {'Southern'})
end
return ipa
return ipa

Latest revision as of 02:48, 20 July 2022



local sub = mw.ustring.sub
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit


-- To avoid weird annoying cursor behavior
local GRAVE = u(0x0300) -- COMBINING GRAVE ACCENT ̀◌
local ACUTE = u(0x0301) -- COMBINING ACUTE ACCENT
local SYLL = u(0x0329) -- COMBINING VERTICAL LINE BELOW ̩◌
local VL = u(0x0325) -- COMBINING RING BELOW ̥◌
local AFFR = u(0x0361) -- COMBINING DOUBLE INVERTED BREVE ͡

local back_vowel = "aouɔǫō"
local front_vowel = "ieɛɪ"
local vowels = "[" .. back_vowel .. front_vowel .. "]"
local acuted = "[áéíóúýÉÓ]"
local oral = "áéíóúýÉÓaouɔieɛ"
local palatal = "[ʨjʎʃʒʥʤʧʦʣɕʑ]"
local sonorant = "[rl]"
local plosives = "[ptkbdg]"
local velar = "[ɡkx]"
local consonants = "[mnptkbdɡfvszxrljɲŋʋʎʨjʎʃʒʥʤʧʤʣʦɕʑ]"

local export = {}

local rules = {
	{"ai", "ɛ"}, {"aí", "É"}, {"au", "ɔ"}, {"aú", "Ó"}, {"è", "ɛ"}, {"ò", "ɔ"}, {"y", "ɪ"},
	{"dź", "ʥ"}, {"dž", "ʤ"}, {"c", "ʦ"}, {"ć", "ʨ"}, {"nj", "ɲ"}, {"č", "ʧ"}, {"ž", "ʒ"},  {"š", "ʃ"},  {"lj", "ʎ"},
	{"h", "x"}, {"g", "ɡ"}, {"ǫ", "o"}, {"ō", "o"}, {"sč", "ʃt͡ʃ"}, {"ou", "u"}, {"ś", "ɕ"}, {"ź", "ʑ"},
	
	{"(" .. vowels .. ")[lv]$", "%1ʋ"}, {"(" .. acuted .. ")[lv]$", "%1ʋ"},
	
	{"(" .. plosives .. ")[ptkbdg]$", "[ptkbdg]"},
	
	{"pt" , "t"}, {"pk" , "k"}, {"tp" , "p"}, {"tk" , "k"}, {"kp" , "p"}, {"kt" , "t"},
	{"bd" , "d"}, {"bg" , "g"}, {"db" , "b"}, {"dg" , "g"}, {"gb" , "b"}, {"gd" , "d"},
	
	{"(" .. consonants ..")([rŕ])(" .. consonants ..")", "%1ə%2" .. SYLL .. "%3"}, {"(" .. consonants ..")r$", "%1ər"},
	{"n(" .. velar .. ")", "ŋ%1"},
	{"p(" .. vowels .. ")", "pʰ%1"}, {"t(" .. vowels .. ")", "tʰ%1"}, {"k(" .. vowels .. ")", "kʰ%1"},
	{"p(" .. acuted .. ")", "pʰ%1"}, {"t(" .. acuted .. ")", "tʰ%1"}, {"k(" .. acuted .. ")", "kʰ%1"},
	
	{"(" .. palatal .. ")e", "%1ɛ"}, {"(" .. palatal .. ")é", "%1É"},
	{"(" .. palatal .. ")o", "%1ɔ"}, {"(" .. palatal .. ")ó", "%1Ó"},
	
	{"É", "ɛ" .. ACUTE}, {"Ó", "ɔ" .. ACUTE}, {"ý", "ɪ" .. ACUTE},
}

local affricates = {
	{"ʥ", "d͡ʑ"}, {"ʤ", "d͡ʒ"}, {"ʦ", "t͡s"}, {"ʨ", "t͡ɕ"}, {"ʧ", "t͡ʃ"}, {"ʣ", "d͡z"},
}

function export.crux(term, d)
	term = mw.ustring.lower(term)
	
	for _, rule in ipairs(rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	if d == "c" then term = gsub(term, "ʨ", "ʧ"); term = gsub(term, "ʥ", "ʤ") end
	if d == "s" then term = gsub(term, "ʨ", "ʦ"); term = gsub(term, "ʥ", "ʣ"); term = gsub(term, "ʰ", "") end
	if d == "s" then term = gsub(term, "aj", "ɛː"); term =gsub(term, "ej", "ɛː") end
	if d == "c" then term = gsub(term, "aj", "ɛː"); term =gsub(term, "ej", "ɛː") end
	if d == "c" then term = gsub(term, "a", "ɐ"); term = gsub(term, "e", "ɪ"); term = gsub(term, "i", "ɪ"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "ʊ")  end
	if d == "s" then term = gsub(term, "a", "ɐ"); term = gsub(term, "e", "ɛ"); term = gsub(term, "i", "e"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "o"); term = gsub(term, "ɪ", "i"); term = gsub(term, "ɪ́", "í"); term = gsub(term, "ų", "u")  end
	if d == "n" then term = gsub(term, "a", "ə"); term = gsub(term, "e", "ə"); term = gsub(term, "i", "y"); term = gsub(term, "o", "ɔ"); term = gsub(term, "u", "ʊ"); term = gsub(term, "ɪ", "ɨ"); term = gsub(term, "ɪ́", "ɨ́"); term = gsub(term, "y", "ɪ")  end

	
	for _, aff in ipairs(affricates) do
		term = gsub(term, aff[1], aff[2])
	end
	
	return term
end


function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:'Gentium','DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Zemljask|key]])</sup>:&#32;" .. IPA_span(items)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, d)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, d))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "dźélo" or mw.title.getCurrentTitle().text },	
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	
	local ipa = "* "
	ipa = ipa .. line_format(separate_word(term), {'Standard'})
	
	if export.crux(term, "c") ~= export.crux(term) then
		ipa = ipa .. "\n* "
		ipa = ipa .. line_format(separate_word(term, "n"), {'Northern'})
	end
	
	if export.crux(term, "s") ~= export.crux(term) then
		ipa = ipa .. "\n* "
		ipa = ipa .. line_format(separate_word(term, "c"), {'Central'})
	end
		
	if export.crux(term, "n") ~= export.crux(term) then
		ipa = ipa .. "\n* "
		ipa = ipa .. line_format(separate_word(term, "s"), {'Southern'})
	end
	
	return ipa
end

return export