Module:tl-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Tagalog language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tl-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local consonants = {
	['ᜃ']='k', ['ᜄ']='g', ['ᜅ']='ng',
    ['ᜆ']='t', ['ᜇ']='d', ['ᜈ']='n',
    ['ᜉ']='p', ['ᜊ']='b', ['ᜋ']='m',
    ['ᜌ']='y', ['ᜎ']='l', ['ᜏ']='w',
    ['ᜐ']='s', ['ᜑ']='h', ['ᜍ']='r',
    ['ᜟ']='r' 
}

local diacritics = {
	['ᜒ']='i', ['ᜓ']='u', ['᜔']='', ['᜕'] = ''
}

local tt = {
	-- vowels
	['ᜀ']='a', ['ᜁ']='i', ['ᜂ']='u',
	--punctuation
    ['᜶']='.', -- kulit and pamudpod
    ['᜵']=',' -- single kulit and pamudpod
}

function export.tr(text, lang, sc, override)
	if sc ~= "Tglg" then
		return nil
	end
	
	local separate_dr = false
	if string.find(text, 'ᜍ') then
		separate_dr = true
	end

	text = mw.ustring.gsub(text,'([ᜃ᜔ᜄ᜔ᜅ᜔ᜆ᜔ᜈ᜔ᜉ᜔ᜊ᜔ᜋ᜔ᜌ᜔ᜎ᜔ᜏ᜔ᜐ᜔])'..'([ᜀᜁᜂ])','%1-%2')
	text = mw.ustring.gsub(
		text,
		'([ᜃ-ᜑᜟ])'..
		'([ᜒᜓ᜔᜕]?)'..
		'([ᜀ-ᜂ]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				if tt[e] == "i" or tt[e] == "u" then return consonants[c] .. 'a' .. tt[e] .. ''
				else return consonants[c] .. 'a' .. tt[e] end
				elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

	text = mw.ustring.gsub(text, '.', tt)

	--convert intervocalic D to R
	if not separate_dr then
		while true do
			local new_text = text
			-- text = mw.ustring.gsub(text,"([aiu])d([aiu])","%1r%2")
			
			-- Testing Classical Tagalog d~r rule instead of modern intervocalic rule
			text = mw.ustring.gsub(text,"([aiuyw])d","%1r")
			text = mw.ustring.gsub(text,"([bkdghlmnprstwy])([bkgpt])d([aiu])","%1%2r%3")
			text = mw.ustring.gsub(text,"^([bkgpt])d([aiu])","%1r%2")
			text = mw.ustring.gsub(text,"([^aiu])dd","%1dr")
			text = mw.ustring.gsub(text,"^dd","dr")
			
			if text == new_text then
				break
			end
		end
		
	end
	
	--remove hyphen between vowels
	text = mw.ustring.gsub(text,"([aiu])-([aiu])","%1%2")
	
	text = mw.ustring.gsub(text,
		'([ᜒᜓ᜔᜕])',
		function(c)
			return '-' .. diacritics[c]
		end)
	
	-- Attempt final syllable "o" instead of "u"
	text = mw.ustring.gsub(text,"([u])([^aeiou ]*)([ ])","o%2%3")
	text = mw.ustring.gsub(text,"([u])([^aeiou ]*)$","o%2")
	text = mw.ustring.gsub(text,"([u])([o])","o%2")
	
	-- iy" and "uw" diphthong is basically near impossible to that is assumed to be an "ey" and "ow" instead
	text = mw.ustring.gsub(text,"iy([^aeiou])","ey%1")
	text = mw.ustring.gsub(text,"iy$","ey")
	text = mw.ustring.gsub(text,"uw([^aeiou])","ow%1")
	text = mw.ustring.gsub(text,"uw$","ow")
	
	-- text = mw.ustring.gsub(text,"([^aiuds ])yi","%1ye")
	-- text = mw.ustring.gsub(text,"([^aiu ])wu","%1wo")
	
	text = mw.ustring.gsub(text, "◌", "-a")
	text = mw.ustring.gsub(text, " ([,.])", "%1")

	return text
end

return export