Module:tg-Latn-Cyrl-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Tajik language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tg-Latn-Cyrl-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {text}

local tt = {
	["t"] = "т",
	["r"] = "р",
	["f"] = "ф",
	["š"] = "ш",
	["h"] = "ҳ",
	["\'"] = "ъ",
	["n"] = "н",
	["p"] = "п",
	["y"] = "й",
	["l"] = "л",
	["z"] = "з",
	["e"] = "е",
	["g"] = "г",
	["b"] = "б",
	["u"] = "у",
	["ü"] = "ӯ",
	["s"] = "с",
	["x"] = "х",
	["č"] = "ч",
	["m"] = "м",
	["o"] = "о",
	["i"] = "и",
	["ž"] = "ж",
	["k"] = "к",
	["d"] = "д",
	["v"] = "в",
	["a"] = "а",
	["j"] = "ҷ",
	["q"] = "қ",
	["ġ"] = "ғ",
	--capital letters
	-- fa-IPA doesnt support letter case.
	-- these are here as a precaution
	["T"] = "Т",
	["R"] = "Р",
	["F"] = "Ф",
	["Š"] = "Ш",
	["H"] = "Ҳ",
	["N"] = "Н",
	["P"] = "П",
	["Y"] = "Й",
	["L"] = "Л",
	["Z"] = "З",
	["E"] = "Е",
	["G"] = "Г",
	["B"] = "Б",
	["U"] = "У",
	["Ü"] = "Ӯ",
	["S"] = "С",
	["X"] = "Х",
	["Č"] = "Ч",
	["M"] = "М",
	["O"] = "О",
	["I"] = "И",
	["Ž"] = "Ж",
	["K"] = "К",
	["D"] = "Д",
	["V"] = "В",
	["A"] = "А",
	["J"] = "Ҷ",
	["Q"] = "Қ",
	["Ġ"] = "Ғ",
	-- ["ʾ"] = "Ъ" - this cant be uppercase in latin

}

local iodated = {
	["ye"] = "е",
	["yi"] = "и",
	["yo"] = "ё",
	["yu"] = "ю",
	["ya"] = "я",
	-- uppercase (fa-IPA doesnt support letter case, these are
	-- precautionary)
	["Ye"] = "Е",
	["Yi"] = "И",
	["Yo"] = "Ё",
	["Yu"] = "Ю",
	["Ya"] = "Я" }

local allcons = "BbVvGgĠġDdŽžZzYyKkQqLlMmNnRrPpSsTtFfXxHhČčJjŠš\'" 
local allvowels = "AaEeIiOoUuÜü"
local allchar = allcons .. allvowels
local gsub = mw.ustring.gsub
local rsplit = mw.text.split
local romanize_tg = require("Module:fa-IPA").romanize_tg -- use dialect conversion built into fa-IPA

function export.tr(text, lang, sc)
	if type(text) == "table" then
		options = {}
		text, script = text.args[1], text.args[2]
	end

	if not sc then
		sc = require("Module:languages").getByCode("tg"):findBestScript(text):getCode()
	end

	if sc ~= "Latn" then
		if sc == "Arab" or sc == "fa-Arab"
		then return require('Module:fa-cls-translit').tr(text)
		end
		else if sc == "Cyrl" or sc == "tg-Cyrl" then
		return nil
		end
	end
	--to prevent the endings -ī and -i from getting mixed up, we must do this first
	text = gsub(text, "%-ī", "ī")
	
	-- if the input is Classical Persian, fix it
	text = gsub(text, "%%", "")
	text = romanize_tg(text)

	-- treat every word as an individual string
	--[=[
	rsplit( "([" .. allchar .. "])" .. "([^" .. allchar .. "])", ",")
	rsplit( "([^" .. allchar .. "])" .. "([" .. allchar .. "])", ",")
	rsplit( "([" .. allchar .. "])" .. "(%s%-)", ",")
	rsplit( "(%s%-)" .. "([^" .. allchar .. "])" , ",") ]=]
	text = gsub(text, "([" .. allchar .. "])([^" .. allchar .. "])", "%1##%2")
	text = gsub(text, "([^" .. allchar .. "])([" .. allchar .. "])", "%1##%2")
	text = gsub(text, " | ", "# | #")
	text = "##" .. gsub(text, " ", "# #") .. "##"
	--rsplit( "#" .."%s" .. "#", ",")
	--rsplit( "#" .."_" .. "#", ",")
	
	--glottal stop isnt typically written when its obvious
	text = gsub(text, "([AaEeIiOoUuÜü])'([AaOoUuÜü])", "%1%2")
	--glottal stop is almost always lost after historical ā'i, or a'i
	text = gsub(text, "([AaOo])'([Ii])", "%1%2")
	--initial "e" forms
	text = gsub(text, "#e", "#э")
	text = gsub(text, "#E", "#Э")
	-- underlying geminate (only geminated when inflected)
	text = gsub(text, "([" .. allcons .. "])%1##", "%1##")
	text = gsub(text, "([" .. allchar .. "])###" .. " " .. "###u##" , "%1u##" )
	--tajik orthography uses dashes but NOT the way fa-IPA does
	--so they need to be removed
	text = gsub(text, "([y]?)i##%-##([" .. allchar .. "])", "и##%2")
	text = gsub(text, "##([y]?)i#", "и#")
	text = gsub(text, "([y]?)i#", "ӣ#")
	text = gsub(text, "%-", "")
	text = gsub(text, "#", "")
	text = gsub(text, 
		"([" .. allvowels .. "Yy])([Yy][AaEeIiOoUu])", 
		function(a,e)
		return a .. iodated[e]
		end)
	
	text = gsub(text, "[Yy][eoua]", iodated)
	--text = gsub(text, "([" .. allcons .. "])yi", "%1йи")
	text = gsub(text, ".", tt)
	text = gsub(text, "ӣи", "ии")

	return text
end

return export