Module:tevo-translit: Difference between revisions

Melinoë (talk | contribs)
No edit summary
Melinoë (talk | contribs)
No edit summary
 
(52 intermediate revisions by 2 users not shown)
Line 15: Line 15:


local consonants = {
local consonants = {
['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ',
['क']='k',  
['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ',  
['ख']='kh',  
['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ',  
['ग']='g',  
['']='t', ['']='th', ['']='d', ['']='dh', ['']='n',  
['घ']='gh',  
['']='p', ['']='ph', ['']='b', ['']='bh', ['']='m',
['च']='c',  
['']='y', ['']='r', ['']='l', ['']='v', ['']='',
['छ']='ch',  
['']='ś', ['']='', ['']='s', ['ह']='h',
['ज']='j',  
['झ']='jh',  
['ञ']='ñ',
['श']='ś',
['झ़']='ź',
['ट']='ṭ',  
['ठ']='ṭh',  
['ड']='ḍ',  
['ढ']='ḍh',  
['ण']='ṇ',  
['']='',
['ढ़']='',
['']='t',  
['']='th',  
['']='d',  
['']='dh',  
['']='n',  
['']='s',
['ज़']='z',
['']='p',  
['']='ph',  
['']='b',  
['']='bh',  
['']='m',
['']='y',  
['']='r',  
['']='l',  
['']='v',  
['ह']='h',
}
}


Line 39: Line 67:
['ौ']='ō',   
['ौ']='ō',   
['्']='',
['्']='',
['॔']='́',
['॓']='́',
}
}


Line 65: Line 91:
-- avagraha
-- avagraha
['ऽ']='ʼ',
['ऽ']='ʼ',
['़']='',
--numerals
--numerals
['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9',
['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9',
['॒']='́',
['॑']='́',
--punctuation         
--punctuation         
--  ['॥']='.', --double danda
--  ['॥']='.', --double danda
Line 80: Line 109:
return nil
return nil
end
end
-- Vedic accent handling
text = gsub(text,'([क-ह]?)'..'([ािीुूृॄॢॣेैोौ्ॅॆॊॉ]?)'..'([अ-औ]?)',function(c, d, e)
if text:match(anud) or text:match(svar) or text:match(d_svar) then
-- insert 'a' after consonants without vowel diacritic or virama
text = gsub(text, '([क-ह])([ा-्ॢॣ]?)',
function(c,d)
if d == "" then return c .. 'a' else return c .. d end
end)
local vow_list = "aअ-औा-ौॠ-ॣ"
local vow = "[" .. vow_list .. "]"
local extra_list = "ःंँ" -- visarga, anusvara, candrabindu
local extra = "[" .. extra_list .. "]"
local acc_list = svar .. anud .. d_svar
-- local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting)
-- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list')
-- are defined by negating the non-consonants, so as to include
-- the munged versions of formatting characters (e.g. bold formatting)
        local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥१३ॐ]"
        -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign)
text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" ..
cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",
function(a,b,c,d)
if d ~= "" then
return grave .. a .. b .. grave .. c -- 2 × independent svarita
else
return grave .. a .. b .. acute .. c -- independent svarita + udatta
end
end)
-- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system)
-- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā
-- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita
-- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)
-- note that the Rigvedic system doesn't distinguish between dependent vs. independendent
-- svarita after udatta (the latter needs manual addition of grave `, see documentation)
text = gsub(text, "(" .. extra .. "?)[᳡᳖`]", grave .. "%1")
text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)
-- initial udatta/svarita
text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
function(a,b,c)
if c == svar or c == d_svar then
return a .. grave .. b -- initial svarita
else
return a .. acute .. b .. c -- initial udatta
end
end)
-- the same, after (double) danda or 'om'
text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
function(a,b,c)
if c == svar or c == d_svar then
return a .. grave .. b -- initial svarita
else
return a .. acute .. b .. c -- initial udatta
end
end)
-- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)
text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
function(a,b,c)
if c == svar or c == d_svar then 
return a .. grave .. b -- independent svarita
else
return a .. acute .. b .. c -- udatta
end
end)
-- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)
text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])",
function(a,b,c)
if c == svar or c == d_svar then 
return a .. grave .. b -- independent svarita
else
return a .. acute .. b .. c -- udatta
end
end)
-- the same, string final
text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$",
function(a,b,c)
if c ~= "" then 
return a .. grave .. b -- independent svarita
else
return a .. acute .. b -- udatta
end
end)
-- unmarked vowel after udatta is also udatta
text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-- and again, in case of three udatta's in a row
text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-- yet again: 4 udatta's in a row occurs in RV.1.164.39
text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-- the same, string final
text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")
-- remove remaining anudatta and svarita signs
text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")
text = gsub(text, '.', consonants)
text = gsub(text, '.', diacritics)
else -- no Vedic accents
text = gsub(
text,
'([क-ह])'..
'([ािीुूृॄॢॣेैोौ्]?)'..
'([अ-औ]?)',
function(c, d, e)
if d == "" and e ~= "" then         
if d == "" and e ~= "" then         
if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis
if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis
Line 198: Line 121:
end
end
end)
end)
end
text = gsub(text, "ः॑","́h")
text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)
text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)
text = gsub(text, '.', tt)
text = gsub(text, '.', tt)
text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1')
text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1')
text = gsub(text, 'e([i])([' .. acute .. grave .. '])', 'e%2%1')
text = gsub(text, 'o([u])([' .. acute .. grave .. '])', 'o%2%1')
text = gsub(text, 'ē([i])([' .. acute .. grave .. '])', 'ē%2%1')
text = gsub(text, 'ō([u])([' .. acute .. grave .. '])', 'ō%2%1')
text = gsub(text, " ?[।॥]", ".")
text = gsub(text, " ?[।॥]", ".")
text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels
text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels