Module:mai-Tirh-translit
Documentation for this module may be created at Module:mai-Tirh-translit/doc
-- Transliteration for Maithili in Tirhuta script
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
["๐"] = "k", ["๐"] = "kh", ["๐"] = "g", ["๐"] = "gh", ["๐"] = "แน
",
["๐"] = "c", ["๐"] = "ch", ["๐"] = "j", ["๐"] = "jh", ["๐"] = "รฑ",
["๐"] = "แนญ", ["๐"] = "แนญ", ["๐"] = "แธ", ["๐"] = "แธh", ["๐"] = "แน",
["๐"] = "t", ["๐"] = "th", ["๐ "] = "d", ["๐ก"] = "dh", ["๐ข"] = "n",
["๐ฃ"] = "p", ["๐ค"] = "ph", ["๐ฅ"] = "b", ["๐ฆ"] = "bh", ["๐ง"] = "m",
["๐จ"] = "y", ["๐ฉ"] = "r", ["๐ช"] = "l", ["๐ซ"] = "v", ["๐ช๐"] = "แธท",
["๐ฌ"] = "ล", ["๐ญ"] = "แนฃ", ["๐ฎ"] = "s", ["๐ฏ"] = "h",
["๐๐"] = "แน", ["๐๐"] = "แนh",
-- vowel diacritics
["๐ฑ"] = "i", ["๐ณ"] = "u", ["๐น"] = "ฤ", ["๐บ"] = "e", ["๐ผ"] = "ล", ["๐ฝ"] = "o", ["๐ฐ"] = "ฤ", ["๐ฒ"] = "ฤซ", ["๐ด"] = "ลซ",
["๐ต"] = "rฬฅ", ["๐ถ"] = "rฬฅฬ", ["๐ป"] = "ai", ["๐พ"] = "au", ["๐ท"] = "lฬฅ", ["๐ธ"] = "lฬฅฬ",
-- vowels
["๐"] = "a", ["๐"] = "ฤ", ["๐"] = "i", ["๐"] = "ฤซ", ["๐
"] = "u", ["๐"] = "ลซ",
["๐"] = "rฬฅ", ["๐"] = "rฬฅฬ", ["๐"] = "lฬฅ", ["๐"] = "lฬฅฬ",
["๐"] = "ฤ", ["๐"] = "ai", ["๐"] = "ล", ["๐"] = "au",
["เฅจ"] = "โ", ["๐๐บ"] = "รช",["๐๐ฝ"] = "รด",
["เคฝ"] = "ยฒ", -- avagraha
["๐ฟ"] = "ฬ", -- chandrabindu
["๐"] = "ฬ", -- anusvara
["๐
"] = "ฬ", -- gvang
["๐"] = "แธฅ", -- visarga
["๐"] = "", -- virama
["๐"] = "omฬ", -- om
-- numerals
["๐"] = "0", ["๐"] = "1", ["๐"] = "2", ["๐"] = "3", ["๐"] = "4", ["๐"] = "5", ["๐"] = "6", ["๐"] = "7", ["๐"] = "8", ["๐"] = "9",
-- punctuation
["เฅค"] = ".", -- danda
["เฅฅ"] = ".", -- double danda
["+"] = "", -- compound separator
-- abbreviation sign
["เฅฐ"] = "."
}
local nasal_assim = {
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐", ["๐"] = "๐",
["๐"] = "๐ข", ["๐"] = "๐ข", ["๐ "] = "๐ข", ["๐ก"] = "๐ข", ["๐ข"] = "๐ข",
["๐ฃ"] = "๐ง", ["๐ค"] = "๐ง", ["๐ฅ"] = "๐ง", ["๐ฆ"] = "๐ง", ["๐ง"] = "๐ง"
}
local perm_cl = {
["๐ง๐๐ช"] = true
}
local all_cons, special_cons = "๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐ ๐ก๐ข๐ฃ๐ค๐ฅ๐ฆ๐ง๐จ๐ฉ๐ช๐ซ๐ฎ๐ฌ๐ญ๐ฏ๐ฉ", "๐๐๐๐๐๐ฎ๐ฏ๐จ๐๐๐๐๐ฃ๐ ๐๐ก๐ฉ๐ญ๐๐ช๐ซ๐ฅ๐ฆ๐๐๐ข๐ฌ๐ง"
local vowel, vowel_sign = "a๐ฑ๐ณ๐ต๐น๐ผ๐ฐ๐ฒ๐ด๐ป๐พ๐ฐ๐ป๐ฝ๐ฐ๐ฐ๐ฝ๐บ", "๐๐๐
๐๐๐๐๐๐๐๐๐บ๐๐ฝ๐ซ๐๐จ๐๐๐๐๐๐๐"
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(๐?[' .. all_cons .. '])a(๐?[' .. all_cons .. '])([๐๐ฟ]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = length, 1, -1 do
table.insert(result, mw.ustring.sub(text, i, i))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text =
gsub(
text,
"([" .. all_cons .. "]๐?)([" .. vowel .. "๐]?)",
function(c, d)
return c .. (d == "" and "a" or d)
end
)
for word in mw.ustring.gmatch(text, "[๐ฟ-เฅฅa]+") do
local orig_word = word
word = rev_string(word)
word = gsub(
word,
'^a(๐?)([' .. all_cons .. '])(.)(.?)',
function(opt, first, second, third)
local a = ""
if match(first, '[' .. special_cons .. ']')
and match(second, '๐')
and not perm_cl[first..second..third]
or match(first .. second, '๐จ[๐ฒ๐บ]') then
a = "a"
end
return a .. opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, "%1%2แต%3%4")
end
word =
gsub(
word,
"(.?)๐(.)",
function(succ, prev)
return succ ..
(succ .. prev == "a" and "๐๐ง" or
(succ == "" and match(prev, "[" .. vowel .. "]") and "ฬ" or nasal_assim[succ] or "ฬ")) ..
prev
end
)
local escaped_orig_word = gsub(orig_word, "%+", "")
text = gsub(text, orig_word, rev_string(word))
text = gsub(text, "๐๐๐", "gy")
end
text = gsub(text, "ฤ([iu])ฬ", "ฤอ %1")
text = gsub(text, "uu", "u")
text = gsub(text, "aรข", "รข")
text = gsub(text, "ii", "i")
text = gsub(text, "([iฤซaฤuลซeoรข])a", "%1")
text = gsub(text, "[<>]", "")
text = gsub(text, ".๐?", conv)
return mw.ustring.toNFC(text)
end
return export