Module:zm-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 58: Line 58:
{"ál", "áʋ"}, {"él", "éʋ"}, {"ɛ́l", "ɛ́ʋ"}, {"íl", "íʋ"}, {"ól", "óʋ"}, {"ɔ́l", "ɔ́ʋ"}, {"úl", "úʋ"},
{"ál", "áʋ"}, {"él", "éʋ"}, {"ɛ́l", "ɛ́ʋ"}, {"íl", "íʋ"}, {"ól", "óʋ"}, {"ɔ́l", "ɔ́ʋ"}, {"úl", "úʋ"},
{"áv", "áʋ"}, {"év", "éʋ"}, {"ɛ́v", "ɛ́ʋ"}, {"ív", "íʋ"}, {"óv", "óʋ"}, {"ɔ́v", "ɔ,́ʋ"}, {"úv", "úʋ"},
{"áv", "áʋ"}, {"év", "éʋ"}, {"ɛ́v", "ɛ́ʋ"}, {"ív", "íʋ"}, {"óv", "óʋ"}, {"ɔ́v", "ɔ,́ʋ"}, {"úv", "úʋ"},
{"(" .. vowel .. ")ʋ", "l%1"},
{"ʋ(" .. vowel .. ")", "l%1"},
{"ʋ(" .. vowel .. ")", "l%1"},

Revision as of 10:24, 29 April 2022



local sub = mw.ustring.sub
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

-- To avoid weird annoying cursor behavior
local TILDE, NASAL = u(0x0303), u(0x0303) -- COMBINING TILDE ̃◌
local TILDEBELOW, CREAKY = u(0x0330), u(0x0330) -- COMBINING TILDE BELOW ̰◌
local SPH = CREAKY .. CREAKY -- sphincteric or strident vowel
local GRAVE = u(0x0300) -- COMBINING GRAVE ACCENT ̀◌
local HIGHFALL = "˥˦"
local SYLLABIC = u(0x0329) -- COMBINING VERTICAL LINE BELOW ̩◌
local SYLLABICA = u(0x030D) -- COMBINING VERTICAL LINE ABOVE ̍◌
local DENTAL = u(0x032A) -- COMBINING BRIDGE BELOW ̪◌
local INTERDENTAL = DENTAL .. u(0x0346) -- COMBINING BRIDGE BELOW AND ABOVE ̪͆◌
local VOICELESS = u(0x0325) -- COMBINING RING BELOW ̥◌
local AFFR = u(0x0361) -- COMBINING DOUBLE INVERTED BREVE ͡

local back_vowel = "aouɔ"
local front_vowel = "ieɛ"
local vowel = "[" .. back_vowel .. front_vowel .. "]"
local oral_to_nasal = {["a"] = "ã", ["i"] = "ĩ", ["ɔ"] = "ṍ", ["u"] = "ᴍ"} -- ṍ = ɔ̃
local nasal_to_oral = {["ã"] = "a" .. NASAL, ["ĩ"] = "i" .. NASAL, ["ṍ"] = "ɔ" .. NASAL}
local modal_to_glottal = {["a"] = "à", ["e"] = "è", ["i"] = "ì", ["ɔ"] = "ò", ["u"] = "ù"}
local glottal_to_modal = {["à"] = "a" .. SPH, ["è"] = "e" .. SPH, ["ì"] = "i" .. SPH, ["ò"] = "ɔ" .. CREAKY, ["ù"] = "u" .. CREAKY}
local nasalized = "[ãĩṍᴍ]"
local glottalic = "[àèìòù]"
local oral = "[aeiou]"
local palatal = "[ʨjʎʃʒʥʤʧ]"
local sonorant = "[rl]"
local click = "ǀǃʘǂǁ"
local SYLLA = u(0x0329)
local SYLL = "[r̩]"
local consonant = "[mnptkbdgfvszšžhrljćźč]"


local function same(foo, bar)
	foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
	foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
	return foo == bar and true or false
end

local export = {}

local rules = {
	{"h", "x"}, {"ai", "ɛ"}, {"au", "ɔ"}, {"è", "ɛ"}, {"ò", "ɔ"}, {"dź", "ʥ"}, {"dž", "ʤ"},
	{"ć", "ʨ"}, {"nj", "ɲ"}, {"č", "ʧ"}, {"ž", "ʒ"},  {"š", "ʃ"},  {"lj", "ʎ"},
	{"ʨe", "ʨɛ"}, {"ʥe", "ʥɛ"}, {"ʧe", "ʧɛ"}, {"ʤe", "ʤɛ"}, {"je", "jɛ"}, {"ʃe", "ʃɛ"}, {"ʒe", "ʒɛ"}, {"ʎe", "ʎɛ"}, {"ʎe", "ʎɛ"},
	{"ʨo", "ʨɔ"}, {"ʥo", "ʥɔ"}, {"ʧo", "ʧɔ"}, {"ʤo", "ʤɔ"}, {"jo", "jɔ"}, {"ʃo", "ʃɔ"}, {"ʒo", "ʒɔ"}, {"ʎo", "ʎɔ"}, {"ʎo", "ʎɔ"},
	{"ʨé", "ʨɛ́"}, {"ʥé", "ʥɛ́"}, {"ʧé", "ʧɛ́"}, {"ʤé", "ʤɛ́"}, {"jé", "jɛ́"}, {"ʃé", "ʃɛ́"}, {"ʒé", "ʒɛ́"}, {"ʎé", "ʎɛ́"}, {"ʎé", "ʎɛ́"},
	{"ʨó", "ʨɔ́"}, {"ʥó", "ʥɔ́"}, {"ʧó", "ʧɔ́"}, {"ʤó", "ʤɔ́"}, {"jó", "jɔ́"}, {"ʃó", "ʃɔ́"}, {"ʒó", "ʒɔ́"}, {"ʎó", "ʎɔ́"}, {"ʎó", "ʎɔ́"},
	{"al", "aʋ"}, {"el", "eʋ"}, {"ɛl", "ɛʋ"}, {"il", "iʋ"}, {"ol", "oʋ"}, {"ɔl", "ɔʋ"}, {"ul", "uʋ"},
	{"av", "aʋ"}, {"ev", "eʋ"}, {"ɛv", "ɛʋ"}, {"iv", "iʋ"}, {"ov", "oʋ"}, {"ɔv", "ɔʋ"}, {"uv", "uʋ"},
	{"ál", "áʋ"}, {"él", "éʋ"}, {"ɛ́l", "ɛ́ʋ"}, {"íl", "íʋ"}, {"ól", "óʋ"}, {"ɔ́l", "ɔ́ʋ"}, {"úl", "úʋ"},
	{"áv", "áʋ"}, {"év", "éʋ"}, {"ɛ́v", "ɛ́ʋ"}, {"ív", "íʋ"}, {"óv", "óʋ"}, {"ɔ́v", "ɔ,́ʋ"}, {"úv", "úʋ"},
	{"ʋ(" .. vowel .. ")", "l%1"},
		
	{"(" .. palatal .. ")e", "ɛ%2"},
	{"(" .. palatal .. ")o", "ɔ%2"},
	{"r(" .. consonant .. ")", "ər%1"},
	{"(" .. vowel .. ")ər", "r%1"},
	{"ŕ(" .. consonant .. ")", "ə́r%1"},
	{"(" .. vowel .. ")ə́r", "r%1"},
	
	{"ər(" .. vowel .. ")", "r%1"},
	{"ər(" .. vowel .. ")", "r%1"}
	,
	{"(" .. palatal .. ")e", "ɛ%1"},
	
	{"([ḛḭṵaɔ]" .. TILDEBELOW .. "?)", {["ḛ"] = "è", ["ḭ"] = "ì", ["ṵ"] = "ù", ["a" .. TILDEBELOW] = "à", ["ɔ" .. TILDEBELOW] = "ò"}},
	
	{"([" .. click .. "])(" .. glottalic .. ")", "%1ˀ%2"}, -- ꞰV̰ = ꞰˀV̰
	 -- tell apart between regular and syllabic <m>
	{"(" .. sonorant .. ")(" .. glottalic .. ")", "%1" .. CREAKY .. "%2"}, -- MV̰ > M̰V̰ 
	{"[mᴟ](" .. vowel .. ")", "ᴟᵇ%1"},  {"ŋ(" .. vowel .. ")", "ŋᶢ%1"},  {"ɳ(" .. vowel .. ")", "ᶯɖ%1"},
	{"(" .. vowel .. ")(".. vowel .. ")", function(s1, s2) return same(s1, s2) and s1 .. "ː˧" or s1 .. s2 .. "˧" end},
	{"(" .. vowel .. ")(" .. nasalized .. ")",
		function(s1, s2)
			return same(s1, s2) and s2 .. "ː" .. HIGHFALL or oral_to_nasal[s1] .. s2 .. HIGHFALL
		end
	},
	{"(" .. glottalic .. ")([" .. front_vowel .. "])",
		function(s1, s2)
			return same(s1, s2) and s1 .. "ː˦" or s1 .. modal_to_glottal[s2] .. "˦"
		end
	},
	{"(" .. glottalic .. ")([" .. back_vowel .. "])",
		function(s1, s2)
			return same(s1, s2) and s1 .. "ː˨" or s1 .. modal_to_glottal[s2] .. "˨"
		end
	},
	{"(" .. glottalic .. ")(" .. nasalized .. ")",
		function(s1, s2)
			return same(s1, s2) and glottal_to_modal[s1] .. NASAL .. "ː˧" or glottal_to_modal[s1] .. NASAL .. s2 .. "˧"
		end
	},
	{"(" .. glottalic .. "ʼ[" .. front_vowel .. "])", "%1˦"}, {"(" .. glottalic .. "ʼ[" .. back_vowel .. "])", "%1˨"},
	{"(" .. glottalic .. ")ʼ(" .. nasalized .. ")", function(s1, s2) return glottal_to_modal[s1] .. NASAL .. "ʔ" .. s2 .. HIGHFALL end},
	
	{"([^uɯɔɑieaʼ])m([^uɯɔɑieaʼ])", "%1ᴍ%2"}, {"u" .. CREAKY .. NASAL, "ᴍ" .. CREAKY},
	{"ᴍᴍ", "m" .. SYLLABIC .. "ː"}, {"ùᴍ", "m" .. CREAKY .. SYLLABICA .. "ː˧"},
	{"ᴍʼᴍ", "m" .. SYLLABICA .. CREAKY .. "ʔm" .. SYLLABIC .. "˧"},  {"ᴍ" .. CREAKY .. "ᴍ", "m" .. CREAKY .. SYLLABICA .. "ː"},
	{"ᴍ" .. CREAKY, "m" .. CREAKY .. SYLLABICA}, {"ṍṍ", "ṍː"}, {"ṍ", "ɔ" .. NASAL}, {"(" .. glottalic .. ")", function(s1) return glottal_to_modal[s1] end},
	{"ᴍ", "m" .. SYLLABIC},
	
}

function export.crux(term)
	term = term:gsub("N", "ɴ"); term = term:gsub("Ɠ", "ʛ")
	term = mw.ustring.lower(term)
	
	for _, rule in ipairs(rules) do
		term = gsub(term, rule[1], rule[2])
	end
	
	return term
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:'Gentium','DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for e|key]])</sup>:&#32;" .. IPA_span(items)
end

function line_format(pronunciation)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return table.concat(full_pronunciations)
end

function separate_word(term)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "ǂAː Ṇṵĩ" or mw.title.getCurrentTitle().text },	
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	
	local ipa = "* "
	ipa = ipa .. line_format(separate_word(term))
	
	return ipa
end

return export