Module:tevo-translit: Difference between revisions
No edit summary |
No edit summary |
||
| (57 intermediate revisions by 2 users not shown) | |||
| Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local | local m_str_utils = require("Module:string utilities") | ||
local gsub = m_str_utils.gsub | |||
local toNFC = mw.ustring.toNFC | |||
local U = m_str_utils.char | |||
local grave = U(0x300) | |||
local acute = U(0x301) | |||
local diaeresis = U(0x308) | |||
local svar = U(0x951) | |||
local anud = U(0x952) | |||
local d_svar = U(0x1CDA) -- double svarita, sometimes used for long vowel with svarita | |||
local consonants = { | |||
['क']='k', | |||
['ख']='kh', | |||
['ग']='g', | |||
['घ']='gh', | |||
['च']='c', | |||
['छ']='ch', | |||
['ज']='j', | |||
['झ']='jh', | |||
['ञ']='ñ', | |||
['श']='ś', | |||
['झ़']='ź', | |||
['ट']='ṭ', | |||
['ठ']='ṭh', | |||
['ड']='ḍ', | |||
['ढ']='ḍh', | |||
['ण']='ṇ', | |||
['ष']='ṣ', | |||
['ढ़']='ẓ', | |||
['त']='t', | |||
['थ']='th', | |||
['द']='d', | |||
['ध']='dh', | |||
['न']='n', | |||
['स']='s', | |||
['ज़']='z', | |||
['प']='p', | |||
['फ']='ph', | |||
['ब']='b', | |||
['भ']='bh', | |||
['म']='m', | |||
['य']='y', | |||
['र']='r', | |||
['ल']='l', | |||
['व']='v', | |||
['ह']='h', | |||
} | |||
local diacritics = { | |||
['ा']='ā', | |||
['ि']='i', | |||
['ी']='ī', | |||
['ु']='u', | |||
['ू']='ū', | |||
['ॆ']='ei', | |||
['े']='ēi', | |||
['ॅ']='e', | |||
['ै']='ē', | |||
['ॊ']='ou', | |||
['ो']='ōu', | |||
['ॉ']='o', | |||
['ौ']='ō', | |||
['्']='', | |||
} | |||
local tt = { | |||
-- vowels | |||
['अ']='a', | |||
['आ']='ā', | |||
['इ']='i', | |||
['ई']='ī', | |||
['उ']='u', | |||
['ऊ']='ū', | |||
['ऎ']='ei', | |||
['ए']='ēi', | |||
['ऍ']='e', | |||
['ऐ']='ē', | |||
['ऒ']='ou', | |||
['ओ']='ōu', | |||
['ऑ']='o', | |||
['औ']='ō', | |||
-- anusvara | |||
['ं']='̣', | |||
-- visarga | |||
['ः']='h', | |||
-- avagraha | |||
['ऽ']='ʼ', | |||
['़']='', | |||
--numerals | |||
['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', | |||
['॒']='́', | |||
['॑']='́', | |||
--punctuation | |||
-- ['॥']='.', --double danda | |||
-- ['।']='.', --danda | |||
--Om | |||
['ॐ']='oṃ', | |||
--reconstructed | |||
-- ['*'] = '', | |||
} | } | ||
function export.tr(text, lang, sc) | function export.tr(text, lang, sc) | ||
text = | if sc ~= "Deva" then | ||
return nil | |||
end | |||
text = gsub(text,'([क-ह]़?)'..'([ािीुूृॄॢॣेैोौ्ॅॆॊॉ]?)'..'([अ-औ]?)',function(c, d, e) | |||
if d == "" and e ~= "" then | |||
if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis | |||
else return consonants[c] .. 'a' .. tt[e] end | |||
elseif e ~= "" then | |||
return consonants[c] .. diacritics[d] .. tt[e] | |||
elseif d == "" then | |||
return consonants[c] .. 'a' | |||
else | |||
return consonants[c] .. diacritics[d] | |||
end | |||
end) | |||
text = gsub(text, "ः॑","́h") | |||
text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis) | |||
text = gsub(text, '.', tt) | |||
text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1') | |||
text = gsub(text, 'e([i])([' .. acute .. grave .. '])', 'e%2%1') | |||
text = gsub(text, 'o([u])([' .. acute .. grave .. '])', 'o%2%1') | |||
text = gsub(text, 'ē([i])([' .. acute .. grave .. '])', 'ē%2%1') | |||
text = gsub(text, 'ō([u])([' .. acute .. grave .. '])', 'ō%2%1') | |||
text = gsub(text, " ?[।॥]", ".") | |||
text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels | |||
text = toNFC(text) | |||
return text | |||
end | end | ||
return export | return export | ||