Module:mdf-translit

From Linguifex
Revision as of 19:03, 31 March 2026 by wikt>Thadh
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:mdf-translit/doc

local export = {}

local gsub = mw.ustring.gsub
local lower = mw.ustring.lower
local trim = mw.text.trim

-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

local letters = {
	["А"]="A", ["Б"]="B", ["В"]="V", ["Г"]="G", ["Д"]="D", ["Е"]="E", ["Ё"]="O", ["Ж"]="Ž", ["З"]="Z", ["И"]="I", ["Й"]="J",
	["К"]="K", ["Л"]="L", ["М"]="M", ["Н"]="N", ["О"]="O", ["П"]="P", ["Р"]="R", ["С"]="S", ["Т"]="T", ["У"]="U", ["Ф"]="F",
	["Х"]="H", ["Ц"]="C", ["Ч"]="Č", ["Ш"]="Š", ["Щ"]="Šč", ["Ъ"]="Ə", ["Ы"]="Ï", ["Ь"]="", ["Э"]="Ë", ["Ю"]="U", ["Я"]="A",
	["Ӕ"]="Ä", ["Ѣ"]="E", ["Ҥ"]="Ŋ", ["І"]="I", ["Ѳ"]="Kv",
	["а"]="a", ["б"]="b", ["в"]="v", ["г"]="g", ["д"]="d", ["е"]="e", ["ё"]="o", ["ж"]="ž", ["з"]="z", ["и"]="i", ["й"]="j",
	["к"]="k", ["л"]="l", ["м"]="m", ["н"]="n", ["о"]="o", ["п"]="p", ["р"]="r", ["с"]="s", ["т"]="t", ["у"]="u", ["ф"]="f",
	["х"]="h", ["ц"]="c", ["ч"]="č", ["ш"]="š", ["щ"]="šč", ["ъ"]="ə", ["ы"]="ï", ["ь"]="", ["э"]="ë", ["ю"]="u", ["я"]="a",
	["ӕ"]="ä", ["ѣ"]="e", ["ҥ"]="ŋ", ["і"]="i", ["ѳ"]="kv"
}

local palatals = {
	["Д"]="Ď", ["З"] = "Ź", ["Л"] = "Ľ", ["Н"] = "Ń", ["Р"] = "Ŕ", ["С"] = "Ś", ["Т"] = "Ť", ["Ц"] = "Ć",
	["д"]="ď", ["з"] = "ź", ["л"] = "ľ", ["н"] = "ń", ["р"] = "ŕ", ["с"] = "ś", ["т"] = "ť", ["ц"] = "ć",
}

local vowels = "аоуыэяёюиеьАОУЫЭЯЁЮИЕЬ"
local front = "ЕЁИЮЯІӔӒеёиюяіӕӓ"
local accents = "́̀"
local nonpalatals = "бвгжкпмшБВГЖКПМШ"

function export.tr(text, lang, sc)
	-- make all word borders have a space
	text = " " .. text .. " "

	-- front vowels after another vowel are written with initial j
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([еёию])", "%1j%2")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([ЕЁИЮ])", "%1J%2")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([я])", "%1jä")
	text = gsub_repeatedly(text, "([" .. vowels .. "][" .. accents .. "]?)([Я])", "%1JÄ")

	-- е, ё, ю, я at the beginning of a word are also written with initial j
	text = gsub(text, "([%s%p])([еёюя])", "%1j%2")
	text = gsub(text, "([%s%p])([ЕЁЮЯ])", "%1J%2")

	-- э at the beginning of a word is written as e
	text = gsub(text, "([%s%p])э", "%1e")
	text = gsub(text, "([%s%p])Э", "%1E")

	-- я after non-palatal consonants becomes ä
	text = gsub(text, "([" .. nonpalatals .. "])я", "%1ä")
	text = gsub(text, "([" .. nonpalatals .. "])Я", "%1Ä")

	-- ё after non-palatal consonants becomes ö
	text = gsub(text, "([" .. nonpalatals .. "])ё", "%1ö")
	text = gsub(text, "([" .. nonpalatals .. "])Ё", "%1Ö")

	-- make Е, Ё, Ю, Я lowercase if preceding a non-capital letter
	text = gsub(text, "([ЕЁЮЯ])([^%u])", function(v, l)
		return lower(v) .. l
	end)

	-- consonants before ь and certain vowels are palatalised
	for i, v in pairs(palatals) do
		text = gsub(text, i .. "ь", v)

		-- special handling of я
		text = gsub(text, i .. "ь?я", v .. "a")
		text = gsub(text, i .. "ь?Я", v .. "A")

		-- other front vowels
		text = gsub(text, i .. "ь?([" .. front .. "])", v .. "%1")
	end

	-- ъ and ь are omitted before Е, Ё, Ю, Я
	text = gsub(text, "ъ([еёюя])","j%1")
	text = gsub(text, "Ъ([еёюя])","J%1")

	return trim(gsub(text, ".", letters))
end

return export