Module:ábḫ-ipa

From Linguifex
Jump to navigation Jump to search


-- apologies to anyone horrified by my primitive coding ability

local export = {}

function export.convert(frame)
	local word = type(frame) == "table" and frame.args[1] or frame
	
	-- go from orthography to a one-to-one phonemic map
	word = string.gsub(word, "aa", "ā")
	word = string.gsub(word, "áa", "â")
	word = string.gsub(word, "aá", "ǎ")
	word = string.gsub(word, "aai", "Ā")
	word = string.gsub(word, "áai", "Â")
	word = string.gsub(word, "aái", "Ǎ")
	word = string.gsub(word, "ai", "A")
	word = string.gsub(word, "ái", "Á")
	word = string.gsub(word, "^mb", "B")
	word = string.gsub(word, "ei", "ē")
	word = string.gsub(word, "éi", "ê")
	word = string.gsub(word, "eí", "ě")
	word = string.gsub(word, "nd", "D")
	word = string.gsub(word, "ng", "G")
	word = string.gsub(word, "ḫ", "H")
	word = string.gsub(word, "ii", "ī")
	word = string.gsub(word, "íi", "î")
	word = string.gsub(word, "ií", "ǐ")
	word = string.gsub(word, "oi", "I")
	word = string.gsub(word, "ói", "Í")
	word = string.gsub(word, "jh", "J")
	word = string.gsub(word, "nkk", "Kk")
	word = string.gsub(word, "ou", "ō")
	word = string.gsub(word, "óu", "ô")
	word = string.gsub(word, "oú", "ǒ")
	word = string.gsub(word, "aao", "Ō")
	word = string.gsub(word, "áao", "Ô")
	word = string.gsub(word, "aáo", "Ǒ")
	word = string.gsub(word, "ao", "O")
	word = string.gsub(word, "áo", "Ó")
	word = string.gsub(word, "mp", "P")
	word = string.gsub(word, "ntt", "Tt")
	
	word = mw.text.split(word, "", true)
	
	result = {}
	
	-- put in syllable boundary marks
	for i,val in ipairs(word) do
		if word[i-1] == nil then
			if mw.ustring.match(word[i], "[íîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
				table.insert(result, "ˈ")
				table.insert(result, val)
			elseif mw.ustring.match(word[i], "[mnjBDGPpTtKkbzdsJHhry]") and mw.ustring.match(word[i+1], "[íîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
				table.insert(result, "ˈ")
				table.insert(result, val)
			else
				table.insert(result, ".")
				table.insert(result, val)
			end
		elseif mw.ustring.match(word[i], "[iīeēoōIaāAĀOŌ]") then
			if mw.ustring.match(word[i-1], "[iīeēoōIaāAĀOŌíîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
				table.insert(result, ".")
				table.insert(result, val)
			else
				table.insert(result, val)
			end
		elseif mw.ustring.match(word[i], "[íîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
			if mw.ustring.match(word[i-1], "[iīeēoōIaāAĀOŌíîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
				table.insert(result, "ˈ")
				table.insert(result, val)
			else
				table.insert(result, val)
			end
		else
			if word[i+1] == nil then
				table.insert(result, val)
			elseif mw.ustring.match(word[i+1], "[iīeēoōIaāAĀOŌ]") then
				table.insert(result, ".")
				table.insert(result, val)
			elseif mw.ustring.match(word[i+1], "[íîǐéêěóôǒÍáâǎÁÂǍÓÔǑ]") then
				table.insert(result, "ˈ")
				table.insert(result, val)
			else
				table.insert(result, val)
			end
		end
	end
	
	-- isolate syllable boundary marks
	stresses = string.gsub(table.concat(result), "[^%.ˈ]", "")
	stresses = mw.text.split(stresses, "", true)
	
	-- compute position of stress
	syllables = 0
	pos = 0
	for i,val in ipairs(stresses) do
		syllables = syllables + 1
		if val == "ˈ" then
			pos = i
		end
	end
	
	newStresses = {}
	
	-- mark secondary stress
	for i,val in ipairs(stresses) do
		if val == "." and (i ~= syllables) then
			if (i - pos) % 2 == 0 then
				table.insert(newStresses, "ˌ")
			else
				table.insert(newStresses, ".")
			end
		else
			table.insert(newStresses, val)
		end
	end
	
	newWord = {}
	index = 1
	for i,val in ipairs(result) do
		-- look through word and replace the correct .'s with ˌ's
		if (i == 1) and val == "." and newStresses[1] == "." then
			index = index + 1
		elseif val == "." or val == "ˈ" then
			table.insert(newWord, newStresses[index])
			index = index + 1
		-- allophonic rules, i.e., from a one-to-one phonemic map to a one-to-
		-- one phonetic map: vowels first
		elseif val == "a" then
			if newStresses[index-1] == "." then
				table.insert(newWord, "w")
			else
				table.insert(newWord, "a")
			end
		elseif val == "A" then
			if newStresses[index-1] == "." then
				table.insert(newWord, "W")
			else
				table.insert(newWord, "A")
			end
		elseif val == "O" then
			if newStresses[index-1] == "." then
				table.insert(newWord, "x")
			else
				table.insert(newWord, "O")
			end
		elseif val == "e" then
			if newStresses[index-1] == "." then
				if result[i+1] ~= nil then
					if mw.ustring.match(result[i+1], "[ntzdr]") then
						table.insert(newWord, "3")
					elseif result[i+2] ~= nil then
						if mw.ustring.match(result[i+1], "[%.ˌˈ]") and mw.ustring.match(result[i+2], "[ntzdr]") then
							table.insert(newWord, "3")
						else
							table.insert(newWord, "e")
						end
					else
						table.insert(newWord, "e")
					end
				else
					table.insert(newWord, "e")
				end
			else
				table.insert(newWord, "e")
			end
		elseif val == "i" then
			if newStresses[index-1] == "." then
				if result[i+1] == "H" then
					table.insert(newWord, "1")
				elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
					table.insert(newWord, "1")
				elseif ((result[i-1] == "m") or (result[i-1] == "n") or (result[i-1] == "j")) and (result[i+1] ~= nil) then
					if mw.ustring.match(result[i+1], "[mnjPTt]") then
						table.insert(newWord, "6")
					elseif (result[i+1] == "k") or (result[i+1] == "K") then
						table.insert(newWord, "7")
					elseif mw.ustring.match(result[i+1], "[%.ˌˈ]") then
						if mw.ustring.match(result[i+2], "[mnjt]") then
							table.insert(newWord, "6")
						elseif (result[i+2] == "k") or (result[i+2] == "kK") then
							table.insert(newWord, "7")
						else
							table.insert(newWord, "i")
						end
					else
						table.insert(newWord, "i")
					end
				else
					table.insert(newWord, "i")
				end
			else
				if result[i+1] == "H" then
					table.insert(newWord, "u")
				elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
					table.insert(newWord, "u")
				else
					table.insert(newWord, "i")
				end
			end
		elseif val == "í" then
			if result[i+1] == "H" then
				table.insert(newWord, "ú")
			elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
				table.insert(newWord, "ú")
			else
				table.insert(newWord, "í")
			end
		elseif val == "ī" then
			if result[i+1] == "H" then
				table.insert(newWord, "ū")
			elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
				table.insert(newWord, "ū")
			else
				table.insert(newWord, "ī")
			end
		elseif val == "î" then
			if result[i+1] == "H" then
				table.insert(newWord, "û")
			elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
				table.insert(newWord, "û")
			else
				table.insert(newWord, "î")
			end
		elseif val == "ǐ" then
			if result[i+1] == "H" then
				table.insert(newWord, "ǔ")
			elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "H") then
				table.insert(newWord, "ǔ")
			else
				table.insert(newWord, "ǐ")
			end
		elseif val == "ó" then
			if (result[i-1] == "m") or (result[i-1] == "p") or (result[i-1] == "B") then
				table.insert(newWord, "Ú")
			else
				table.insert(newWord, "ó")
			end
		elseif val == "ô" then
			if (result[i-1] == "m") or (result[i-1] == "p") or (result[i-1] == "B") then
				table.insert(newWord, "Û")
			else
				table.insert(newWord, "ô")
			end
		elseif val == "ǒ" then
			if (result[i-1] == "m") or (result[i-1] == "p") or (result[i-1] == "B") then
				table.insert(newWord, "Ǔ")
			else
				table.insert(newWord, "ǒ")
			end
		elseif val == "ó" then
			if (result[i-1] == "m") or (result[i-1] == "p") or (result[i-1] == "B") then
				table.insert(newWord, "Ú")
			else
				table.insert(newWord, "ó")
			end
		elseif val == "Í" then
			if (result[i-1] == "m") or (result[i-1] == "p") or (result[i-1] == "B") then
				table.insert(newWord, "X")
			else
				table.insert(newWord, "Í")
			end
		-- now let's do consonants
		elseif val == "n" then
			if ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "j") then
				table.insert(newWord, "j")
			elseif ((result[i+1] == ".") or (result[i+1] == "ˌ") or (result[i+1] == "ˈ")) and (result[i+2] == "k") then
				table.insert(newWord, "N")
			else
				table.insert(newWord, "n")
			end
		elseif val == "j" then
			if result[i-1] == "ˈ" then
				if result[i-2] == nil then
					table.insert(newWord, "Z")
				elseif mw.ustring.match(result[i-2], "[iīeēoōIaāAĀOŌ]") then
					table.insert(newWord, "Z")
				else
					table.insert(newWord, "j")
				end
			else
				table.insert(newWord, "j")
			end
		elseif val == "t" then
			if ((result[i-1] == ".") or (result[i-1] == "ˌ") or (result[i-1] == "ˈ")) and (result[i-2] == "n") then
				table.insert(newWord, "2")
			else
				table.insert(newWord, "t")
			end
		elseif val == "k" then
			if ((result[i-1] == ".") or (result[i-1] == "ˌ") or (result[i-1] == "ˈ")) and (result[i-2] == "n") then
				table.insert(newWord, "4")
			else
				table.insert(newWord, "k")
			end
		elseif val == "b" then
			if ((result[i-1] == ".") or (result[i-1] == "ˌ") or (result[i-1] == "ˈ")) and (result[i-2] == "m") then
				table.insert(newWord, "8")
			elseif result[i+1] == nil then
				table.insert(newWord, "v")
			else
				table.insert(newWord, "b")
			end
		else
			table.insert(newWord, val)
		end
	end
	
	word = table.concat(newWord)
	
	word = string.gsub(word, "x", "ɐɔ̯")
	-- go from one-to-one phonetic map to IPA
	word = string.gsub(word, "ā", "aː")
	word = string.gsub(word, "â", "âː")
	word = string.gsub(word, "ǎ", "ǎː")
	word = string.gsub(word, "A", "aɪ̯")
	word = string.gsub(word, "Á", "áɪ̯")
	word = string.gsub(word, "Ā", "aːɪ̯")
	word = string.gsub(word, "Â", "âːɪ̯")
	word = string.gsub(word, "Ǎ", "ǎːɪ̯")
	word = string.gsub(word, "b", "β")
	word = string.gsub(word, "B", "ᵐb")
	word = string.gsub(word, "d", "ð")
	word = string.gsub(word, "D", "ⁿd̪")
	word = string.gsub(word, "ē", "eːɪ̯")
	word = string.gsub(word, "ê", "êːɪ̯")
	word = string.gsub(word, "ě", "ěːɪ̯")
	word = string.gsub(word, "G", "ᵑg")
	word = string.gsub(word, "h", "ɦ")
	word = string.gsub(word, "H", "x")
	word = string.gsub(word, "ī", "iː")
	word = string.gsub(word, "î", "îː")
	word = string.gsub(word, "ǐ", "ǐː")
	word = string.gsub(word, "I", "ɔɪ̯")
	word = string.gsub(word, "Í", "ɔ́ɪ̯")
	word = string.gsub(word, "j", "ȵ")
	word = string.gsub(word, "J", "ʑ")
	word = string.gsub(word, "K", "ŋk")
	word = string.gsub(word, "n", "n̪")
	word = string.gsub(word, "N", "ŋ")
	word = string.gsub(word, "o", "ɔ")
	word = string.gsub(word, "ó", "ɔ́")
	word = string.gsub(word, "ō", "ɔːʊ̯")
	word = string.gsub(word, "ô", "ɔ̂ːʊ̯")
	word = string.gsub(word, "ǒ", "ɔ̌ːʊ̯")
	word = string.gsub(word, "O", "aɔ̯")
	word = string.gsub(word, "Ó", "áɔ̯")
	word = string.gsub(word, "Ō", "aːɔ̯")
	word = string.gsub(word, "Ô", "âːɔ̯")
	word = string.gsub(word, "Ǒ", "ǎːɔ̯")
	word = string.gsub(word, "P", "mp")
	word = string.gsub(word, "r", "ɺ")
	word = string.gsub(word, "s", "ɕ")
	word = string.gsub(word, "t", "t̪")
	word = string.gsub(word, "T", "n̪t̪")
	word = string.gsub(word, "u", "ʲɪ")
	word = string.gsub(word, "ú", "íɪ̯")
	word = string.gsub(word, "ū", "iːɪ̯")
	word = string.gsub(word, "û", "îːɪ̯")
	word = string.gsub(word, "ǔ", "ǐːɪ̯")
	word = string.gsub(word, "Ú", "ʊ̯ɔ́")
	word = string.gsub(word, "Û", "ʊ̯ɔ̂ːʊ̯")
	word = string.gsub(word, "Ǔ", "ʊ̯ɔ̌ːʊ̯")
	word = string.gsub(word, "v", "v")
	word = string.gsub(word, "w", "ɐ")
	word = string.gsub(word, "W", "ɐɪ̯")
	word = string.gsub(word, "X", "ʊ̯ɔ́ɪ̯")
	word = string.gsub(word, "y", "j")
	word = string.gsub(word, "z", "θ̱")
	word = string.gsub(word, "Z", "ᶮȡ")
	word = string.gsub(word, "1", "ɪ")
	word = string.gsub(word, "2", "d̪")
	word = string.gsub(word, "3", "ɛ")
	word = string.gsub(word, "4", "g")
	word = string.gsub(word, "6", "ȵ̩")
	word = string.gsub(word, "7", "ŋ̩ʲ")
	word = string.gsub(word, "8", "b")
	
	return "[" .. word .. "]"
end

return export