Module:ta-colloquial-translit

From Linguifex
Jump to navigation Jump to search

Documentation for this module may be created at Module:ta-colloquial-translit/doc

local export = {}
 
local consonants = {
	['க']='k' , ['ங']='ṅ' , ['ச']='c' , ['ஞ']='ñ' , ['ட']='ṭ' , ['ண']='ṇ' , ['த']='t' ,
	['ந']='n' , ['ப']='p', ['ம']='m' , ['ய']='y' , ['ர']='r' , ['ல']='l' , ['வ']='v' ,
	['ழ']='ḻ' , ['ள']='ḷ' , ['ற']='ṟ' , ['ன']='ṉ' , ['ஶ']='ś' , ['ஜ']='j' , ['ஷ']='ṣ' , 
	['ஸ']='s' , ['ஹ']='h' , ['ஃப']='f' , ['ஃஜ']='z', ['ஃஸ']='ks' , ['ஃக ']='x',
	['ஃ']='ḥ' , ['ௐ']='о̄m',
}

local diacritics = {
	['ா']= 'ā' , ['ி']='i' , ['ீ']='ī' , ['ு']='u' , ['ூ']='ū' ,  ['ெ']='e' ,
	['ே']='ē' , ['ை']='ai' , ['ொ']='o' , ['ோ']='ō' , ['ௌ']='au', 
	['்']='',	--halant, supresses the inherent vowel "a"
	-- no diacritic
	[''] = 'a',
}

local nonconsonants = {
	-- vowels
	['அ']='’a' , ['ஆ']='’ā' , ['இ']='’i' , ['ஈ']='’ī' , ['உ']='’u' , ['ஊ']='’ū' , 
	['எ']='’e' , ['ஏ']='’ē' , ['ஐ']='’ai' , ['ஒ']='’o' , ['ஓ']='’ō' , ['ஔ']='’au' , ['ௐ']='о̄m',
	-- other symbols
--	['ஃ']='' , ['ௐ']='о̄m',
}

-- translit any words or phrases
function export.tr(text, lang, sc)
	text = mw.ustring.gsub(
		text,
		'(ஃ?)([க-ஹ])([ா-்]?)',
		function(h, c, d)
			return (consonants[h..c] or consonants[h] .. (consonants[c] or c)) .. diacritics[d]
		end)
	
	text = mw.ustring.gsub(text, '[அ-ஔ]', nonconsonants)

	text = mw.ustring.gsub(text, '^’', '')
	text = mw.ustring.gsub(text, '([%s%p])’', '%1')
	
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)i([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ])a", '%1e%2a')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)i([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ])a", '%1%2e%3a')
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)u([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ])a", '%1o%2a')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)u([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ])a", '%1%2o%3a')
	
	
	text = mw.ustring.gsub(text, "([iī])tt", '%1cc')
	text = mw.ustring.gsub(text, "([iī])nt", '%1ñc')
	
	text = mw.ustring.gsub(text, 'ṅk', 'ṅg')
	text = mw.ustring.gsub(text, 'ñc', 'ñj')
	text = mw.ustring.gsub(text, 'ṇṭ', 'ṇḍ')
	text = mw.ustring.gsub(text, 'nt', 'nd')
	text = mw.ustring.gsub(text, 'mp', 'mb')
	
	text = mw.ustring.gsub(text, 'ṟṟ', 'tt')
	
	text = mw.ustring.gsub(text, '([aeiouāīūēō])k([aeiouāīūēō])', '%1h%2')
	text = mw.ustring.gsub(text, '([aeiouāīūēō])c([aeiouāīūēō])', '%1s%2')
	text = mw.ustring.gsub(text, '([aeiouāīūēō])ṭ([aeiouāīūēō])', '%1ḍ%2')
	text = mw.ustring.gsub(text, '([aeiouāīūēō])t([aeiouāīūēō])', '%1d%2')
	text = mw.ustring.gsub(text, '([aeiouāīūēō])p([aeiouāīūēō])', '%1b%2')
	
	text = mw.ustring.gsub(text, '^c', 's')
    text = mw.ustring.gsub(text, '([%s%p])c', '%1s')

	text = mw.ustring.gsub(text, '([lḷṇr])$', '%1u')
	text = mw.ustring.gsub(text, '([lḷṇr])([%s%p])', '%1u%2')
	
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiou])([lḷṇ])u$", '%1%2%3%3u')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiou])([lḷṇ])u$", '%1%2%3%4%4u')
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiou])([lḷṇ])u([%s%p])", '%1%2%3%3u%4')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiou])([lḷṇ])u([%s%p])", '%1%2%3%4%4u%5')
	
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)ai$", '%1ayyi')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)ai$", '%1%2ayyi')
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)ai([%s%p])", '%1ayyi%2')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)ai([%s%p])", '%1%2ayyi%3')
	text = mw.ustring.gsub(text, 'aiy', 'ayy')
	text = mw.ustring.gsub(text, 'ai', 'e')
	
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiouāīūēō])y$", '%1%2yyi')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiouāīūēō])y$", '%1%2%3yyi')
	text = mw.ustring.gsub(text, "^([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiouāīūēō])y([%s%p])", '%1%2yyi%3')
	text = mw.ustring.gsub(text, "([%s%p])([kgṅcñṭḍṇtdnpbmyrlvḻḷṟṉśjṣshfzḥ]?)([aeiouāīūēō])y([%s%p])", '%1%2%3yyi%4')
	
	text = mw.ustring.gsub(text, '([aeiouāīūēō])([lḷrṟ])([kcṭtp])', '%1%3%3')
	text = mw.ustring.gsub(text, '([aeiouāīūēō])([lḷrṟ])([ṅnñṇm]?)([gjḍdbs])', '%1%3%4')
	text = mw.ustring.gsub(text, '([ṭṟ])k', 'kk')
	
	return text
end
 
return export