|
|
| (56 intermediate revisions by 2 users not shown) |
| Line 15: |
Line 15: |
|
| |
|
| local consonants = { | | local consonants = { |
| ['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ', | | ['क']='k', |
| ['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ', | | ['ख']='kh', |
| ['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ', | | ['ग']='g', |
| ['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n', | | ['घ']='gh', |
| ['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m', | | ['च']='c', |
| ['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ', | | ['छ']='ch', |
| ['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h', | | ['ज']='j', |
| | ['झ']='jh', |
| | ['ञ']='ñ', |
| | ['श']='ś', |
| | ['झ़']='ź', |
| | ['ट']='ṭ', |
| | ['ठ']='ṭh', |
| | ['ड']='ḍ', |
| | ['ढ']='ḍh', |
| | ['ण']='ṇ', |
| | ['ष']='ṣ', |
| | ['ढ़']='ẓ', |
| | ['त']='t', |
| | ['थ']='th', |
| | ['द']='d', |
| | ['ध']='dh', |
| | ['न']='n', |
| | ['स']='s', |
| | ['ज़']='z', |
| | ['प']='p', |
| | ['फ']='ph', |
| | ['ब']='b', |
| | ['भ']='bh', |
| | ['म']='m', |
| | ['य']='y', |
| | ['र']='r', |
| | ['ल']='l', |
| | ['व']='v', |
| | ['ह']='h', |
| } | | } |
|
| |
|
| local diacritics = { | | local diacritics = { |
| ['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ', | | ['ा']='ā', |
| ['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au', ['्']='', | | ['ि']='i', |
| | ['ी']='ī', |
| | ['ु']='u', |
| | ['ू']='ū', |
| | ['ॆ']='ei', |
| | ['े']='ēi', |
| | ['ॅ']='e', |
| | ['ै']='ē', |
| | ['ॊ']='ou', |
| | ['ो']='ōu', |
| | ['ॉ']='o', |
| | ['ौ']='ō', |
| | ['्']='', |
| } | | } |
|
| |
|
| local tt = { | | local tt = { |
| -- vowels | | -- vowels |
| ['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ', | | ['अ']='a', |
| ['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au', | | ['आ']='ā', |
| -- chandrabindu
| | ['इ']='i', |
| ['ँ']='m̐', --until a better method is found | | ['ई']='ī', |
| | ['उ']='u', |
| | ['ऊ']='ū', |
| | ['ऎ']='ei', |
| | ['ए']='ēi', |
| | ['ऍ']='e', |
| | ['ऐ']='ē', |
| | ['ऒ']='ou', |
| | ['ओ']='ōu', |
| | ['ऑ']='o', |
| | ['औ']='ō', |
| -- anusvara | | -- anusvara |
| ['ं']='ṃ', --until a better method is found | | ['ं']='̣', |
| ['ꣳ']='ṃ', -- candrabindu virama
| |
| -- visarga | | -- visarga |
| ['ः']='ḥ', | | ['ः']='h', |
| -- avagraha | | -- avagraha |
| ['ऽ']='ʼ', | | ['ऽ']='ʼ', |
| | ['़']='', |
| --numerals | | --numerals |
| ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', | | ['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9', |
| | ['॒']='́', |
| | ['॑']='́', |
| --punctuation | | --punctuation |
| -- ['॥']='.', --double danda | | -- ['॥']='.', --double danda |
| -- ['।']='.', --danda | | -- ['।']='.', --danda |
| --Vedic extensions
| |
| ['ᳵ']='x', ['ᳶ']='f',
| |
| --Om | | --Om |
| ['ॐ']='oṃ', | | ['ॐ']='oṃ', |
| Line 59: |
Line 109: |
| return nil | | return nil |
| end | | end |
| | | text = gsub(text,'([क-ह]़?)'..'([ािीुूृॄॢॣेैोौ्ॅॆॊॉ]?)'..'([अ-औ]?)',function(c, d, e) |
| -- Vedic accent handling
| |
| if text:match(anud) or text:match(svar) or text:match(d_svar) then
| |
| -- insert 'a' after consonants without vowel diacritic or virama
| |
| text = gsub(text, '([क-ह])([ा-्ॢॣ]?)', | |
| function(c,d)
| |
| if d == "" then return c .. 'a' else return c .. d end
| |
| end)
| |
| local vow_list = "aअ-औा-ौॠ-ॣ"
| |
| local vow = "[" .. vow_list .. "]"
| |
| local extra_list = "ःंँ" -- visarga, anusvara, candrabindu
| |
| local extra = "[" .. extra_list .. "]"
| |
| local acc_list = grave .. acute .. svar .. anud .. d_svar
| |
| -- local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting)
| |
| -- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list')
| |
| -- are defined by negating the non-consonants, so as to include
| |
| -- the munged versions of formatting characters (e.g. bold formatting)
| |
| local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥१३ॐ]"
| |
| -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign)
| |
| text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" ..
| |
| cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",
| |
| function(a,b,c,d)
| |
| if d ~= "" then
| |
| return grave .. a .. b .. grave .. c -- 2 × independent svarita
| |
| else
| |
| return grave .. a .. b .. acute .. c -- independent svarita + udatta
| |
| end
| |
| end)
| |
| -- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system)
| |
| -- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā
| |
| -- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita
| |
| -- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)
| |
| -- note that the Rigvedic system doesn't distinguish between dependent vs. independendent
| |
| -- svarita after udatta (the latter needs manual addition of grave `, see documentation)
| |
| text = gsub(text, "(" .. extra .. "?)[᳡᳖`]", grave .. "%1")
| |
| text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)
| |
| -- initial udatta/svarita
| |
| text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
| |
| function(a,b,c)
| |
| if c == svar or c == d_svar then
| |
| return a .. grave .. b -- initial svarita
| |
| else
| |
| return a .. acute .. b .. c -- initial udatta
| |
| end
| |
| end)
| |
| -- the same, after (double) danda or 'om'
| |
| text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
| |
| function(a,b,c)
| |
| if c == svar or c == d_svar then
| |
| return a .. grave .. b -- initial svarita
| |
| else
| |
| return a .. acute .. b .. c -- initial udatta
| |
| end
| |
| end)
| |
| -- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)
| |
| text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
| |
| function(a,b,c)
| |
| if c == svar or c == d_svar then
| |
| return a .. grave .. b -- independent svarita
| |
| else
| |
| return a .. acute .. b .. c -- udatta
| |
| end
| |
| end)
| |
| -- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)
| |
| text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])",
| |
| function(a,b,c)
| |
| if c == svar or c == d_svar then
| |
| return a .. grave .. b -- independent svarita
| |
| else
| |
| return a .. acute .. b .. c -- udatta
| |
| end
| |
| end)
| |
| -- the same, string final
| |
| text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$",
| |
| function(a,b,c)
| |
| if c ~= "" then
| |
| return a .. grave .. b -- independent svarita
| |
| else
| |
| return a .. acute .. b -- udatta
| |
| end
| |
| end)
| |
| -- unmarked vowel after udatta is also udatta
| |
| text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
| |
| -- and again, in case of three udatta's in a row
| |
| text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
| |
| -- yet again: 4 udatta's in a row occurs in RV.1.164.39
| |
| text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
| |
| -- the same, string final
| |
| text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
| |
| vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")
| |
| -- remove remaining anudatta and svarita signs
| |
| text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")
| |
|
| |
| text = gsub(text, '.', consonants)
| |
| text = gsub(text, '.', diacritics)
| |
| else -- no Vedic accents
| |
| text = gsub(
| |
| text,
| |
| '([क-ह])'..
| |
| '([ािीुूृॄॢॣेैोौ्]?)'..
| |
| '([अ-औ]?)',
| |
| function(c, d, e)
| |
| if d == "" and e ~= "" then | | if d == "" and e ~= "" then |
| if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis | | if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis |
| Line 178: |
Line 121: |
| end | | end |
| end) | | end) |
| end
| |
| | | |
| | text = gsub(text, "ः॑","́h") |
| text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis) | | text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis) |
| text = gsub(text, '.', tt) | | text = gsub(text, '.', tt) |
| text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1') | | text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1') |
| | text = gsub(text, 'e([i])([' .. acute .. grave .. '])', 'e%2%1') |
| | text = gsub(text, 'o([u])([' .. acute .. grave .. '])', 'o%2%1') |
| | text = gsub(text, 'ē([i])([' .. acute .. grave .. '])', 'ē%2%1') |
| | text = gsub(text, 'ō([u])([' .. acute .. grave .. '])', 'ō%2%1') |
| text = gsub(text, " ?[।॥]", ".") | | text = gsub(text, " ?[।॥]", ".") |
| text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels | | text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels |
| Line 189: |
Line 136: |
| return text | | return text |
| end | | end |
|
| | |
| return export | | return export |