Module:sa-Taml-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Sanskrit language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sa-Taml-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local function dc(text)
	return string.gsub(string.gsub(text, 'க', ''), '𑌕', '') end -- 'drop carrier'
 
local consonants = {
	['க']='k' , ['ங']='ṅ' , ['ச']='c' , ['ஞ']='ñ' , ['ட']='ṭ' , ['ண']='ṇ' , ['த']='t' ,
	['ந']='n' , ['ப']='p', ['ம']='m' , ['ய']='y' , ['ர']='r' , ['ல']='l' , ['வ']='v' ,
	['ழ']='ḻ' , ['ள']='ḷ' , ['ற']='ṟ' ,
	['ன']='n' , -- So only contextual distinction between ந and ன.
	['ஶ']='ś' , ['ஜ']='j' , ['ஷ']='ṣ' , 
	['ஸ']='s' , ['ஹ']='h' , 
--	['ஃப']='f' , ['ஃஜ']='z', ['ஃஸ']='ks' , ['ஃக ']='x',
	['ஃ']='ḥ' , ['ௐ']='о̄m',
-- Consonants modified by spacing superscript digit.  Be liberal.
	['க¹']='k', ['க²']='kh', ['க³']='g', ['க⁴']='gh',
	['ச¹']='c', ['ச²']='ch', ['ச³']='j', ['ச⁴']='jh',
	                        ['ஜ¹']='j', ['ஜ²']='jh',
	['ட¹']='ṭ', ['ட²']='ṭh', ['ட³']='ḍ', ['ட⁴']='ḍh',
	['த¹']='t', ['த²']='th', ['த³']='d', ['த⁴']='dh',
	['ப¹']='p', ['ப²']='ph', ['ப³']='b', ['ப⁴']='bh',
	['ம²']='ṃ', ['ம³']='m̐',  ['ர²']='Ⓡ', ['ல²']='Ⓛ',
-- Consonants modified by spacing subscript digit.  Be liberal.
	['க₁']='k', ['க₂']='kh', ['க₃']='g', ['க₄']='gh',
	['ச₁']='c', ['ச₂']='ch', ['ச₃']='j', ['ச₄']='jh',
	                        ['ஜ₁']='j', ['ஜ₂']='jh',
	['ட₁']='ṭ', ['ட₂']='ṭh', ['ட₃']='ḍ', ['ட₄']='ḍh',
	['த₁']='t', ['த₂']='th', ['த₃']='d', ['த₄']='dh',
	['ப₁']='p', ['ப₂']='ph', ['ப₃']='b', ['ப₄']='bh',
	['ம₂']='ṃ', ['ம₃']='m̐',  ['ர₂']='Ⓡ', ['ல₂']='Ⓛ',
}

local diacritics = { -- Obliterate Tamil length contrast in mid vowels later. 
	['ா']= 'ā' , ['ி']='i' , ['ீ']='ī' , ['ு']='u' , ['ூ']='ū' ,  ['ெ']='e' ,
	['ே']='ē' , ['ை']='ai' , ['ொ']='o' , ['ோ']='ō' , ['ௌ']='au', 
	['்']='',	-- pulli, suppresses the inherent vowel "a"
-- Grantha syllabic consonants get used:
	['𑍃']='ṛ', ['𑍄']='ṝ', ['𑍢']='ḷ', ['𑍣']='ḹ',
	-- no diacritic
	[''] = 'a',
}

local nonconsonants = {
	-- vowels
	['அ']='’a' , ['ஆ']='’ā' , ['இ']='’i' , ['ஈ']='’ī' , ['உ']='’u' , ['ஊ']='’ū' , 
	['எ']='’e' , ['ஏ']='’ē' , ['ஐ']='’ai' , ['ஒ']='’o' , ['ஓ']='’ō' , ['ஔ']='’au' , ['ௐ']='о̄m',
	-- other symbols
	['ஃ']='ḥ', [dc('கஂ')] = 'ṃ', ['𑌃'] = 'ḥ',
-- syllabic consonants (Grantha)ː
	['𑌋']='ṛ', ['𑍠']='ṝ', ['𑌌']='ḷ', ['𑍡']='ḹ',
}

local syll2 = {
	['Ⓡu']='ṛ', ['Ⓡū']='ṝ', ['Ⓛu']='ḷ', ['Ⓛū']='ḹ',
}

-- translit any words or phrases
function export.tr(text, lang, sc)
-- Special vowel-killing diacritics
	local pattern = "[மயலவ][dc(கஂ⃰𑌕𑌁)]" -- gsub on gsub arguments tends to fail!
	text = mw.ustring.gsub(text, pattern, {
		['மஂ'] = "ṃ", -- Desirable to get more and independent examples.
		['ம⃰'] = " ṃ ", 
		['ய𑌁'] = "y̐", ['ல𑌁'] = "l̐", ['வ𑌁'] = "v̐", 
	})
	local nukta = '([¹²³⁴₁₂₃₄]?)'
	local anusvara = dc('([கஂ𑌕𑌂]?)')
	local vowel = dc('([கா-க்𑌕𑍃𑌕𑍄𑌕𑍢𑌕𑍣]?)')
	text = mw.ustring.gsub(
		text,
		'(ஃ?)([க-ஹ])'..nukta..vowel..nukta..'([ாௗ]?)'..nukta..anusvara..nukta,
		function(h, c, n1, d, n2, d2, n3, av, n4)
			local cn = c .. n1 .. n2 .. n3 .. n4
			local da = d..d2
			if d2 ~= "" then da = mw.ustring.toNFC(da) end
			return (consonants[h..cn] or (consonants[h] or "") .. (consonants[cn] or cn)) ..
					(diacritics[da] or da) .. av
		end)
	
	text = mw.ustring.gsub(text, '.', nonconsonants)
	text = string.gsub(text, 'ē', 'e')
	text = string.gsub(text, 'ō', 'o')
	text = mw.ustring.gsub(text, '[ⓇⓁ][uū]', syll2)
	text = string.gsub(text, '^’', '')
	text = mw.ustring.gsub(text, '([%s%p])’', '%1')
	
	return text
end
 
return export