Module:be-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate Belarusian language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:be-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local AC = require("Module:string/char")(0x0301) -- acute =  ́

local rsubn = mw.ustring.gsub
local rfind = mw.ustring.find

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local tt = {
	["А"]='A', ["а"]='a', ["Б"]='B', ["б"]='b', ["В"]='V', ["в"]='v', ["Г"]='H', ["г"]='h', ["Д"]='D', ["д"]='d', 
	["Е"]='Je', ["е"]='je', ["Ё"]='Jo', ["ё"]='jo', ["Ж"]='Ž', ["ж"]='ž', ["З"]='Z', ["з"]='z', ["І"]='I', ["і"]='i', 
	["Й"]='J', ["й"]='j', ["К"]='K', ["к"]='k', ["Л"]='L', ["л"]='l', ["М"]='M', ["м"]='m', ["Н"]='N', ["н"]='n', 
	["О"]='O', ["о"]='o', ["П"]='P', ["п"]='p', ["Р"]='R', ["р"]='r', ["С"]='S', ["с"]='s', ["Т"]='T', ["т"]='t', 
	["У"]='U', ["у"]='u', ["Ў"]='W', ["ў"]='w', ["Ф"]='F', ["ф"]='f', ["Х"]='X', ["х"]='x', ["Ц"]='C', ["ц"]='c', 
	["Ч"]='Č', ["ч"]='č', ["Ш"]='Š', ["ш"]='š', ["Ы"]='Y', ["ы"]='y', ["Ь"]='ʹ', ["ь"]='ʹ', ["Э"]='E', ["э"]='e', 
	["Ю"]='Ju', ["ю"]='ju', ["Я"]='Ja', ["я"]='ja', 
	["’"]='ʺ', ["ʼ"]='ʺ',
	-- currently non-standard, used in some older norms
	["Ґ"]='G', ["ґ"]='g', ["И"]='I', ["и"]='i', ["Ј"]='J', ["ј"]='j',
	-- Belarusian style quotes
	['«']='“', ['»']='”',
};

local unstressed_vowels = "aeiyuAEIYU"
local unstressed_vowel = "[" .. unstressed_vowels .. "]"

local acute_decomposer = {
	["á"] = "a" .. AC,
	["é"] = "e" .. AC,
	["í"] = "i" .. AC,
	["ó"] = "o" .. AC,
	["ú"] = "u" .. AC,
	["ý"] = "y" .. AC,
	["Á"] = "A" .. AC,
	["É"] = "E" .. AC,
	["Í"] = "I" .. AC,
	["Ó"] = "O" .. AC,
	["Ú"] = "U" .. AC,
	["Ý"] = "Y" .. AC,
}

function export.tr(text, lang, sc)
    text = rsub(text, "'+", { ["'"] = 'ʺ' }) -- neutral apostrophe
    text = rsub(text, '.', tt)

	-- Mark word boundaries
	text = rsub(text, "(%s+)", "#%1#")
	text = "#" .. text .. "#"

	-- Mark stress on <o>
	text = rsub(text, "(#[^#Oo" .. AC .. "]*)([Oo])([^#Oo" .. AC .. "]*" .. unstressed_vowel .. "[^#Oo" .. AC .. "]*#)", "%1%2" .. AC .. "%3")
	text = rsub(text, "(#[^#Oo" .. AC .. "]*" .. unstressed_vowel .. "[^#Oo" .. AC .. "]*)([Oo])([^#Oo" .. AC .. "]*#)", "%1%2" .. AC .. "%3")

	--Strip hashes
	text = rsub(text, "#", "")

    return text
end

function export.reverse_tr(text)--reverse-translit any words or phrases
	local reverse_tt = {}
	for k, v in pairs(tt) do
		reverse_tt[v] = k
	end
	reverse_tt['ʺ'] = "'"
	reverse_tt['ʹ'] = "ь"
	reverse_tt['i'] = "і"
	reverse_tt['I'] = "І"
	text = rsub(text, '.', acute_decomposer)
	text = rsub(text, '[Jj][aeou]', reverse_tt)
	text = rsub(text, '.', reverse_tt)
	return text
end

return export