Module:mai-Tirh-translit

Documentation for this module may be created at Module:mai-Tirh-translit/doc

-- Transliteration for Maithili in Tirhuta script

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
	-- consonants
	["๐‘’"] = "k", ["๐‘’"] = "kh", ["๐‘’‘"] = "g", ["๐‘’’"] = "gh", ["๐‘’“"] = "แน…",
	["๐‘’”"] = "c", ["๐‘’•"] = "ch", ["๐‘’–"] = "j", ["๐‘’—"] = "jh", ["๐‘’˜"] = "รฑ",
	["๐‘’™"] = "แนญ", ["๐‘’š"] = "แนญ", ["๐‘’›"] = "แธ", ["๐‘’œ"] = "แธh", ["๐‘’"] = "แน‡",
	["๐‘’ž"] = "t", ["๐‘’Ÿ"] = "th", ["๐‘’ "] = "d", ["๐‘’ก"] = "dh", ["๐‘’ข"] = "n",
	["๐‘’ฃ"] = "p", ["๐‘’ค"] = "ph", ["๐‘’ฅ"] = "b", ["๐‘’ฆ"] = "bh", ["๐‘’ง"] = "m",
	["๐‘’จ"] = "y", ["๐‘’ฉ"] = "r", ["๐‘’ช"] = "l", ["๐‘’ซ"] = "v", ["๐‘’ช๐‘“ƒ"] = "แธท",
	["๐‘’ฌ"] = "ล›", ["๐‘’ญ"] = "แนฃ", ["๐‘’ฎ"] = "s", ["๐‘’ฏ"] = "h",
	["๐‘’›๐‘“ƒ"] = "แน›", ["๐‘’œ๐‘“ƒ"] = "แน›h",

	-- vowel diacritics
	["๐‘’ฑ"] = "i", ["๐‘’ณ"] = "u", ["๐‘’น"] = "ฤ“", ["๐‘’บ"] = "e", ["๐‘’ผ"] = "ล", ["๐‘’ฝ"] = "o", ["๐‘’ฐ"] = "ฤ", ["๐‘’ฒ"] = "ฤซ", ["๐‘’ด"] = "ลซ",
	["๐‘’ต"] = "rฬฅ", ["๐‘’ถ"] = "rฬฅฬ„", ["๐‘’ป"] = "ai", ["๐‘’พ"] = "au", ["๐‘’ท"] = "lฬฅ", ["๐‘’ธ"] = "lฬฅฬ„",
	
	-- vowels
	["๐‘’"] = "a", ["๐‘’‚"] = "ฤ", ["๐‘’ƒ"] = "i", ["๐‘’„"] = "ฤซ", ["๐‘’…"] = "u", ["๐‘’†"] = "ลซ",
	["๐‘’‡"] = "rฬฅ", ["๐‘’ˆ"] = "rฬฅฬ„", ["๐‘’‰"] = "lฬฅ", ["๐‘’Š"] = "lฬฅฬ„",
	["๐‘’‹"] = "ฤ“", ["๐‘’Œ"] = "ai", ["๐‘’"] = "ล", ["๐‘’Ž"] = "au", 
	 ["เฅจ"] = "โ€™", ["๐‘’๐‘’บ"] = "รช",["๐‘’๐‘’ฝ"] = "รด",

    ["เคฝ"] = "ยฒ", -- avagraha
	["๐‘’ฟ"] = "ฬƒ", -- chandrabindu
	["๐‘“€"] = "ฬƒ", -- anusvara
	["๐‘“…"] = "ฬƒ", -- gvang
	["๐‘“"] = "แธฅ", -- visarga
	["๐‘“‚"] = "", -- virama
	["๐‘“‡"] = "omฬ", -- om

	-- numerals
	["๐‘“"] = "0", ["๐‘“‘"] = "1", ["๐‘“’"] = "2", ["๐‘““"] = "3", ["๐‘“”"] = "4", ["๐‘“•"] = "5", ["๐‘“–"] = "6", ["๐‘“—"] = "7", ["๐‘“˜"] = "8", ["๐‘“™"] = "9",

	-- punctuation
	["เฅค"] = ".", -- danda
	["เฅฅ"] = ".", -- double danda
	["+"] = "", -- compound separator
	-- abbreviation sign
	["เฅฐ"] = "."
}

local nasal_assim = {
	["๐‘’"] = "๐‘’“", ["๐‘’"] = "๐‘’“", ["๐‘’‘"] = "๐‘’“", ["๐‘’’"] = "๐‘’“",
	["๐‘’”"] = "๐‘’˜", ["๐‘’•"] = "๐‘’˜", ["๐‘’–"] = "๐‘’˜", ["๐‘’—"] = "๐‘’˜", ["๐‘’˜"] = "๐‘’˜",
	["๐‘’™"] = "๐‘’", ["๐‘’š"] = "๐‘’", ["๐‘’›"] = "๐‘’", ["๐‘’œ"] = "๐‘’",
	["๐‘’ž"] = "๐‘’ข", ["๐‘’Ÿ"] = "๐‘’ข", ["๐‘’ "] = "๐‘’ข", ["๐‘’ก"] = "๐‘’ข", ["๐‘’ข"] = "๐‘’ข",
	["๐‘’ฃ"] = "๐‘’ง", ["๐‘’ค"] = "๐‘’ง", ["๐‘’ฅ"] = "๐‘’ง", ["๐‘’ฆ"] = "๐‘’ง", ["๐‘’ง"] = "๐‘’ง"
}
local perm_cl = {
	["๐‘’ง๐‘“‚๐‘’ช"] = true
}
local all_cons, special_cons = "๐‘’๐‘’๐‘’‘๐‘’’๐‘’“๐‘’”๐‘’•๐‘’–๐‘’—๐‘’˜๐‘’™๐‘’š๐‘’›๐‘’œ๐‘’๐‘’ž๐‘’Ÿ๐‘’ ๐‘’ก๐‘’ข๐‘’ฃ๐‘’ค๐‘’ฅ๐‘’ฆ๐‘’ง๐‘’จ๐‘’ฉ๐‘’ช๐‘’ซ๐‘’ฎ๐‘’ฌ๐‘’ญ๐‘’ฏ๐‘’ฉ", "๐‘’•๐‘’๐‘’๐‘’‘๐‘’ž๐‘’ฎ๐‘’ฏ๐‘’จ๐‘’Ÿ๐‘’›๐‘’œ๐‘’š๐‘’ฃ๐‘’ ๐‘’๐‘’ก๐‘’ฉ๐‘’ญ๐‘’™๐‘’ช๐‘’ซ๐‘’ฅ๐‘’ฆ๐‘’›๐‘’”๐‘’ข๐‘’ฌ๐‘’ง"
local vowel, vowel_sign = "a๐‘’ฑ๐‘’ณ๐‘’ต๐‘’น๐‘’ผ๐‘’ฐ๐‘’ฒ๐‘’ด๐‘’ป๐‘’พ๐‘’ฐ๐‘’ป๐‘’ฝ๐‘’ฐ๐‘’ฐ๐‘’ฝ๐‘’บ", "๐‘’๐‘’ƒ๐‘’…๐‘’‹๐‘’๐‘’‚๐‘’„๐‘’†๐‘’‡๐‘’ˆ๐‘’๐‘’บ๐‘’๐‘’ฝ๐‘’ซ๐‘“ƒ๐‘’จ๐‘“ƒ๐‘’๐‘’‚๐‘’Œ๐‘’Ž๐‘’‚๐‘’‹"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(๐‘“ƒ?[' .. all_cons .. '])a(๐‘“ƒ?[' .. all_cons .. '])([๐‘“€๐‘’ฟ]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
	local result, length = {}, mw.ustring.len(text)
	for i = length, 1, -1 do
		table.insert(result, mw.ustring.sub(text, i, i))
	end
	return table.concat(result)
end

function export.tr(text, lang, sc)
	text =
		gsub(
		text,
		"([" .. all_cons .. "]๐‘“ƒ?)([" .. vowel .. "๐‘“‚]?)",
		function(c, d)
			return c .. (d == "" and "a" or d)
		end
	)

	for word in mw.ustring.gmatch(text, "[๐‘’ฟ-เฅฅa]+") do
		local orig_word = word

		word = rev_string(word)
		
		word = gsub(
			word,
			'^a(๐‘“ƒ?)([' .. all_cons .. '])(.)(.?)',
			function(opt, first, second, third)
				local a = ""
				if match(first, '[' .. special_cons .. ']')
					and match(second, '๐‘“‚')
					and not perm_cl[first..second..third]
					or match(first .. second, '๐‘’จ[๐‘’ฒ๐‘’บ]') then
						a = "a"
				end
				
				return a .. opt .. first .. second .. third
			end
		)

	while match(word, syncope_pattern) do
			word = gsub(word, syncope_pattern, "%1%2แตŠ%3%4")
		end

		word =
			gsub(
			word,
			"(.?)๐‘“€(.)",
			function(succ, prev)
				return succ ..
					(succ .. prev == "a" and "๐‘“‚๐‘’ง" or
						(succ == "" and match(prev, "[" .. vowel .. "]") and "ฬƒ" or nasal_assim[succ] or "ฬƒ")) ..
						prev
			end
		)

		local escaped_orig_word = gsub(orig_word, "%+", "")
		text = gsub(text, orig_word, rev_string(word))
		text = gsub(text, "๐‘’–๐‘“‚๐‘’˜", "gy")
	end
	text = gsub(text, "ฤ([iu])ฬƒ", "ฤอ %1")
	text = gsub(text, "uu", "u")
	text = gsub(text, "aรข", "รข")
	text = gsub(text, "ii", "i")
	text = gsub(text, "([iฤซaฤuลซeoรข])a", "%1")
	text = gsub(text, "[<>]", "")
	text = gsub(text, ".๐‘“ƒ?", conv)
	return mw.ustring.toNFC(text)
end

return export