Module:tru-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Turoyo language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:tru-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local U = require("Module:string/char")
local rsub = mw.ustring.gsub
local unpack = unpack or table.unpack -- Lua 5.2 compatibility
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
	while true do
		local new_term = rsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local rbasa_below = U(0x737)
local pthaha_below = U(0x731)
local rbasa = U(0x736)
local zqapha = U(0x733)
local pthaha = U(0x730)
local vowel_diacritics_capture = "([" .. rbasa_below .. pthaha_below .. rbasa .. zqapha .. pthaha .. "])"

-- we declare consonants representing vowels (matres lectionis) as constants to mitigate differences in how mixing
-- right-to-left and left-to-right characters in the same line appears in an IDE vs wiktionary.  Since matres is used in
-- concatenation via the .. operator,  "ܘ" .. "ܐ" on wiktionary would render as "ܐ" .. "ܘ" in an IDE
local alaph = U(0x710)
local waw = U(0x718)
local yudh = U(0x71D)

local combining_diaeresis = U(0x308)
local combining_tilde_below = U(0x330)
local qushshaya = U(0x741)
local rukkakha = U(0x742)

local tt_transpose_punc = {
	-- left/right single/double quotes
	["“"] = "”",
	["”"] = "“",
	["‘"] = "’",
	["’"] = "‘",
	["؟"] = "?", -- question mark
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	-- skewed colons from https://r12a.github.io/scripts/syrc/tru.html#phrase
	["܇"] = ',',
	["܆"] = ';'
}

local tt_transpose_punc_keys = ''
for key, _ in pairs(tt_transpose_punc) do tt_transpose_punc_keys = tt_transpose_punc_keys .. key end

local fix = {
	{ vowel_diacritics_capture .. qushshaya, qushshaya .. "%1" },
	{ vowel_diacritics_capture .. rukkakha, rukkakha .. "%1" },
	{ vowel_diacritics_capture .. combining_tilde_below, combining_tilde_below .. "%1" },
	-- partition punctuation marks so "starts with" and "ends with" substitutions work
	{"([".. tt_transpose_punc_keys .. "()!.:\"'])", "#%1#"},
}

local tt = {
	["ܦ"] = "f", ["ܒ"] = "b", ["ܬ"] = "t", ["ܛ"] = "ṭ", ["ܕ"] = "d", ["ܟ"] = "k",
	["ܓ"] = "g", ["ܩ"] = "q", ["ܔ"] = "j", ["ܣ"] = "s", ["ܨ"] = "ṣ", ["ܙ"] = "z",
	["ܫ"] = "š", ["ܚ"] = "ḥ", ["ܥ"] = "c", ["ܗ"] = "h", ["ܡ"] = "m", ["ܢ"] = "n",
	["ܪ"] = "r", ["ܠ"] = "l",
}

local tt_next = {
	[waw] = "w",
	[yudh] = "y",

	[rbasa_below] = "ë",
	[pthaha_below] = "ä",
	[rbasa] = "e",
	[zqapha] = "o",
	[pthaha] = "a",
}

local consonants = "fbtṭdkgqjsṣzščḥchmnrlvžpvṯḏxġ" .. yudh .. waw
local consonants_group = "([" .. consonants .. "])"

local special_cases = {
	-- { matching_aii_text, latin_substitution }
	--
	-- the # symbol pads the start and end of a word, consider the follow examples for matching_aii_text
	-- #float#    only float matches
	-- #float     words starting with float like float or floats match
	-- float#     words ending with float like float or afloat match
	-- float      words containing float like float, floats, afloat and refloats match
	{"ܡܳܪܝ#", "mor#"},
}

function export.tr(text, lang, sc)

	text = rsub(text, " | ", "# | #")
	text = "##" .. rsub(text, " ", "# #") .. "##"
	text = rsub(text, "ـ", "")
	text = rsub(text, combining_diaeresis, "")
	for _, sub in ipairs(fix) do text = rsub(text, unpack(sub)) end

	-- Special cases
	for _, sub in ipairs(special_cases) do text = rsub(text, unpack(sub)) end

	text = rsub(text, "ܫ" .. combining_tilde_below, "č")
	text = rsub(text, "ܙ" .. combining_tilde_below, "ž")

	text = rsub(text, "ܦ" .. qushshaya, "p")

	text = rsub(text, "ܒ" .. rukkakha, "v")
	text = rsub(text, "ܬ" .. rukkakha, "ṯ")
	text = rsub(text, "ܕ" .. rukkakha, "ḏ")
	text = rsub(text, "ܟ" .. rukkakha, "x")
	text = rsub(text, "ܓ" .. rukkakha, "ġ")

	text = rsub(text, ".", tt_transpose_punc)
	text = rsub(text, ".", tt)

	text = rsub_repeatedly(text, consonants_group .. waw .. consonants_group, "%1u%2")
	text = rsub_repeatedly(text, consonants_group .. yudh .. consonants_group, "%1i%2")
	text = rsub(text, "#" .. waw .. consonants_group, "#u%1")
	text = rsub(text, "#" .. yudh .. consonants_group, "#i%1") -- this needs a test case

	text = rsub(text, alaph .. pthaha ..  waw .. "#", "#aw")
	text = rsub(text, alaph .. pthaha .. yudh .. "#", "#ay")

	text = rsub(text, "#" .. alaph .. waw, "#u")
	text = rsub(text, "#" .. alaph .. yudh, "#i")

	text = rsub(text, pthaha .. waw .. "#", "aw#") --<ܝܳܬܰܘ> = yotaw, not yotau
	text = rsub(text, pthaha .. yudh .. "#", "ay#") --<ܚܙܰܝ> = ḥzay, not ḥzai

	text = rsub(text, waw .. "#", "u#")
	text = rsub(text, yudh .. "#", "i#")

	text = rsub(text, pthaha .. alaph .. "#", "a#")
	text = rsub(text, rbasa .. alaph .. "#", "e#")
	text = rsub(text, zqapha .. alaph .. "#", "o#")
	text = rsub(text, alaph .. "#", "o#")
	text = rsub(text, alaph, "")

	text = rsub(text, ".", tt_next)

	text = rsub(text, "cc", "c")
	text = rsub(text, "ḥḥ", "ḥ")
	text = rsub(text, "šš", "š")
	text = rsub(text, "ṯṯ", "ṯ")
	text = rsub(text, "xx", "x")

	text = rsub(text, "#", "")

	return text
end

return export