Home
Random

Log in

Settings

About Linguifex
Disclaimers

Module:tevo-translit: Difference between revisions

Language
Watch
View history
View source

@@ Line 15: / Line 15: @@
 local consonants = {
-	['क']='k', ['ख']='kh', ['ग']='g', ['घ']='gh', ['ङ']='ṅ',
+	['क']='k',
-	['च']='c', ['छ']='ch', ['ज']='j', ['झ']='jh', ['ञ']='ñ',
+	['ख']='kh',
-	['ट']='ṭ', ['ठ']='ṭh', ['ड']='ḍ', ['ढ']='ḍh', ['ण']='ṇ',
+	['ग']='g',
-	['त']='t', ['थ']='th', ['द']='d', ['ध']='dh', ['न']='n',
+	['घ']='gh',
-	['प']='p', ['फ']='ph', ['ब']='b', ['भ']='bh', ['म']='m',
+	['च']='c',
-	['य']='y', ['र']='r', ['ल']='l', ['व']='v', ['ळ']='ḷ',
+	['छ']='ch',
-	['श']='ś', ['ष']='ṣ', ['स']='s', ['ह']='h',
+	['ज']='j',
+	['झ']='jh',
+	['ञ']='ñ',
+	['श']='ś',
+	['झ़']='ź',
+	['ट']='ṭ',
+	['ठ']='ṭh',
+	['ड']='ḍ',
+	['ढ']='ḍh',
+	['ण']='ṇ',
+	['ष']='ṣ',
+	['ढ़']='ẓ',
+	['त']='t',
+	['थ']='th',
+	['द']='d',
+	['ध']='dh',
+	['न']='n',
+	['स']='s',
+	['ज़']='z',
+	['प']='p',
+	['फ']='ph',
+	['ब']='b',
+	['भ']='bh',
+	['म']='m',
+	['य']='y',
+	['र']='r',
+	['ल']='l',
+	['व']='v',
+	['ह']='h',
 }
 local diacritics = {
-	['ा']='ā', ['ि']='i', ['ी']='ī', ['ु']='u', ['ू']='ū', ['ृ']='ṛ', ['ॄ']='ṝ',
+	['ा']='ā',
-	['ॢ']='ḷ', ['ॣ']='ḹ', ['े']='e', ['ै']='ai', ['ो']='o', ['ौ']='au',  ['्']='',
+	['ि']='i',
+	['ी']='ī',
+	['ु']='u',
+	['ू']='ū',
+	['ॆ']='ei',
+	['े']='ēi',
+	['ॅ']='e',
+	['ै']='ē',
+	['ॊ']='ou',
+	['ो']='ōu',
+	['ॉ']='o',
+	['ौ']='ō',
+	['्']='',
 }
 local tt = {
 	-- vowels
-	['अ']='a', ['आ']='ā', ['इ']='i', ['ई']='ī', ['उ']='u', ['ऊ']='ū', ['ऋ']='ṛ', ['ॠ']='ṝ',
+	['अ']='a',
-	['ऌ']='ḷ', ['ॡ']='ḹ', ['ए']='e', ['ऐ']='ai', ['ओ']='o', ['औ']='au',
+	['आ']='ā',
-	-- chandrabindu
+	['इ']='i',
-	['ँ']='m̐', --until a better method is found
+	['ई']='ī',
+	['उ']='u',
+	['ऊ']='ū',
+	['ऎ']='ei',
+	['ए']='ēi',
+	['ऍ']='e',
+	['ऐ']='ē',
+	['ऒ']='ou',
+	['ओ']='ōu',
+	['ऑ']='o',
+	['औ']='ō',
 	-- anusvara
-	['ं']='ṃ', --until a better method is found
+	['ं']='̣',
-	['ꣳ']='ṃ',  -- candrabindu virama
 	-- visarga
-	['ः']='ḥ',
+	['ः']='h',
 	-- avagraha
 	['ऽ']='ʼ',
+	['़']='',
 	--numerals
 	['०']='0', ['१']='1', ['२']='2', ['३']='3', ['४']='4', ['५']='5', ['६']='6', ['७']='7', ['८']='8', ['९']='9',
+	['॒']='́',
+	['॑']='́',
 	--punctuation
 --  ['॥']='.', --double danda
 --	['।']='.', --danda
-    --Vedic extensions
-    ['ᳵ']='x', ['ᳶ']='f',
      --Om
      ['ॐ']='oṃ',
@@ Line 59: / Line 109: @@
 		return nil
 	end
+		text = gsub(text,'([क-ह]़?)'..'([ािीुूृॄॢॣेैोौ्ॅॆॊॉ]?)'..'([अ-औ]?)',function(c, d, e)
-	-- Vedic accent handling
-	if text:match(anud) or text:match(svar) or text:match(d_svar) then
-		-- insert 'a' after consonants without vowel diacritic or virama
-		text = gsub(text, '([क-ह])([ा-्ॢॣ]?)',
-			function(c,d)
-				if d == "" then return c .. 'a' else return c .. d end
-			end)
-		local vow_list = "aअ-औा-ौॠ-ॣ"
-		local vow = "[" .. vow_list .. "]"
-		local extra_list = "ःंँ" -- visarga, anusvara, candrabindu
-		local extra = "[" .. extra_list .. "]"
-		local acc_list = grave .. acute .. svar .. anud .. d_svar
---		local cons_list = "क-हᳵᳶऽ् \'" -- consonants + avagraha + virama + space + apostrophe (from e.g. bold formatting)
-		-- Workaround: the consonants (plus a few other signs, see outcommented 'local cons_list')
-		-- are defined by negating the non-consonants, so as to include
-		-- the munged versions of formatting characters (e.g. bold formatting)
-        local cons = "[^" .. vow_list .. acc_list .. extra_list .. "।॥१३ॐ]"
-        -- independent svarita before udatta or other independent svarita (indicated by १/३ with both svarita and anudatta sign)
-		text = gsub(text, "(" .. extra .. "?)" .. anud .. "?[१३][" .. anud .. svar .. d_svar .. "]+(" ..
-			cons .. "*" .. vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)",
-			function(a,b,c,d)
-				if d ~= "" then
-					return grave .. a .. b .. grave .. c	-- 2 × independent svarita
-				else
-					return grave .. a .. b .. acute .. c	-- independent svarita + udatta
-				end
-			end)
-		-- optional: a few non-Rigvedic ways to mark the independent svarita (but compatible with Rigvedic system)
-		-- 1) ᳡ (U+1CE1) used by Atharvavedic Śaunakīya Saṃhitā
-		-- 2) ᳖ (U+1CD6) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'standard' independent svarita
-		-- 3) ᳕ (U+1CD5) used by Śuklayajurveda Mādhyandina-Saṃhitā for 'aggravated' independent svarita (before udatta)
-		-- note that the Rigvedic system doesn't distinguish between dependent vs. independendent
-		-- svarita after udatta (the latter needs manual addition of grave `, see documentation)
-		text = gsub(text, "(" .. extra .. "?)[᳡᳖`]", grave .. "%1")
-		text = gsub(text, "(" .. extra .. "?)᳕(" .. cons .. "*" .. vow ..")", grave .. "%1%2" .. acute)
-		-- initial udatta/svarita
-		text = gsub(text, "^(" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- initial svarita
-				else
-					return a .. acute .. b .. c -- initial udatta
-				end
-			end)
-		-- the same, after (double) danda or 'om'
-		text = gsub(text, "([।॥ॐ]" .. cons .. "*" .. vow .. ")(३?" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- initial svarita
-				else
-					return a .. acute .. b .. c -- initial udatta
-				end
-			end)
-		-- in case of anudatta sign not before other anudatta sign (nor before grave accent from १/३)
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b .. c -- udatta
-				end
-			end)
-		-- and again (excluding acute on next vowel), in case of overlapping patterns (if 'c' above happens to be another vowel with anudatta)
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([^" .. anud .. grave .. acute .. extra_list .. "])",
-			function(a,b,c)
-				if c == svar or c == d_svar then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b .. c -- udatta
-				end
-			end)
-		-- the same, string final
-		text = gsub(text, "(" .. vow .. extra .. "?" .. anud .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)([" .. svar .. d_svar .. "]?)$",
-			function(a,b,c)
-				if c ~= "" then
-					return a .. grave .. b -- independent svarita
-				else
-					return a .. acute .. b -- udatta
-				end
-			end)
-		-- unmarked vowel after udatta is also udatta
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- and again, in case of three udatta's in a row
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- yet again: 4 udatta's in a row occurs in RV.1.164.39
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?[^" .. acc_list .. extra_list .. "])", "%1" .. acute .. "%2")
-		-- the same, string final
-		text = gsub(text, "(" .. vow .. acute .. "३?" .. extra .. "?" .. cons .. "*" ..
-			vow .. ")(" .. extra .. "?)$", "%1" .. acute .. "%2")
-		-- remove remaining anudatta and svarita signs
-		text = gsub(text, "[" .. anud .. svar .. d_svar .. "]", "")
-		text = gsub(text, '.', consonants)
-		text = gsub(text, '.', diacritics)
-	else -- no Vedic accents
-		text = gsub(
-		text,
-		'([क-ह])'..
-		'([ािीुूृॄॢॣेैोौ्]?)'..
-		'([अ-औ]?)',
-		function(c, d, e)
 			if d == "" and e ~= "" then
 				if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. diaeresis
@@ Line 178: / Line 121: @@
 			end
 		end)
-	end
+	text = gsub(text, "ः॑","́h")
 	text = gsub(text, '([aअ][' .. acute .. grave .. ']?[इउ])', '%1' .. diaeresis)
 	text = gsub(text, '.', tt)
 	text = gsub(text, 'a([iu])([' .. acute .. grave .. '])', 'a%2%1')
+	text = gsub(text, 'e([i])([' .. acute .. grave .. '])', 'e%2%1')
+	text = gsub(text, 'o([u])([' .. acute .. grave .. '])', 'o%2%1')
+	text = gsub(text, 'ē([i])([' .. acute .. grave .. '])', 'ē%2%1')
+	text = gsub(text, 'ō([u])([' .. acute .. grave .. '])', 'ō%2%1')
 	text = gsub(text, " ?[।॥]", ".")
 	text = gsub(text, "(ā" .. acute .. "3[iu])" .. acute, "%1") -- for pluti vowels
@@ Line 189: / Line 136: @@
 	return text
 end
 return export

Retrieved from "https://linguifex.com/wiki/Module:tevo-translit"

Languages

This page is not available in other languages.

Linguifex

Privacy policy
About Linguifex
Disclaimers
Desktop