Module:Cyrs-Glag-translit

From Linguifex
Jump to navigation Jump to search


local export = {}

local letters = {}
local digraphs = {}

local double_grave = mw.ustring.char(0x30F)

letters["Cyrs"] = {
	["А"]='A', ["а"]='a',
	["Б"]='B', ["б"]='b',
	["В"]='V', ["в"]='v',
	["Г"]='G', ["г"]='g', 
	["Д"]='D', ["д"]='d',
	["Е"]='E', ["е"]='e', ["Є"]='E', ["є"]='e', 
	["Ж"]='Ž', ["ж"]='ž',
	["Ѕ"]='Dz', ["ѕ"]='dz', ["Ꙃ"]='Dz', ["ꙃ"]='dz',
	["З"]='Z', ["з"]='z', ["Ꙁ"]='Z', ["ꙁ"]='z',
	["И"]='I', ["и"]='i', ["І"]='I', ["і"]='i', ["Ї"]='I', ["ї"]='i', ["Ѝ"]='Ì', ["ѝ"]='ì',
	["Ꙉ"]='Đ', ["ꙉ"]='đ',
	["К"]='K', ["к"]='k',
	["Л"]='L', ["л"]='l', 
	["М"]='M', ["м"]='m',
	["Н"]='N', ["н"]='n',
	["О"]='O', ["о"]='o', ["Ѡ"]='O', ["ѡ"]='o', ["Ѿ"]='Otŭ', ["ѿ"]='otŭ', ["Ꙩ"]='O', ["ꙩ"]='o', ["Ꙫ"]='O', ["ꙫ"]='o', ["Ꙭ"]='O', ["ꙭ"]='o', ["ꙮ"]='o', ["Ѻ"]='O', ["ѻ"]='o', ["Ꙍ"]='O', ["ꙍ"]='o',
	["П"]='P', ["п"]='p', 
	["Р"]='R', ["р"]='r',
	["С"]='S', ["с"]='s',
	["Т"]='T', ["т"]='t',
	["Ѹ"]='U', ["ѹ"]='u', ["Ꙋ"]='U', ["ꙋ"]='u', ["У"]='U', ["у"]='u',
	["Ф"]='F', ["ф"]='f',
	["Х"]='X', ["х"]='x',
	
	["Ц"]='C', ["ц"]='c',
	["Ч"]='Č', ["ч"]='č', 
	["Ш"]='Š', ["ш"]='š',
	-- For Щ see below
	["Ъ"]='Ŭ', ["ъ"]='ŭ',
	["Ꙑ"]='Y', ["ꙑ"]='y', ["Ы"]='Y', ["ы"]='y',
	["Ь"]='Ĭ', ["ь"]='ĭ',
	["Ѣ"]='Ě', ["ѣ"]='ě',
	
	["Ю"]='Ju', ["ю"]='ju', 
	["Ꙗ"]='Ja', ["ꙗ"]='ja', ["Я"]='Ja', ["я"]='ja',
	["Ѥ"]='Je', ["ѥ"]='je',
	["Ѧ"]='Ę', ["ѧ"]='ę', ["Ꙙ"]='Ę', ["ꙙ"]='ę',
	["Ѩ"]='Ję', ["ѩ"]='ję', ["Ꙝ"]='Ję', ["ꙝ"]='ję',
	["Ѫ"]='Ǫ', ["ѫ"]='ǫ',
	["Ѭ"]='Jǫ', ["ѭ"]='jǫ',
	["Ꙓ"]='Jě', ["ꙓ"]='jě',
	
	["Ѯ"]='Ks', ["ѯ"]='ks',
	["Ѱ"]='Ps', ["ѱ"]='ps',
	["Ѳ"]='Θ', ["ѳ"]='θ',
	["Ѵ"]='Ü', ["ѵ"]='ü', ["Ѷ"]='Ü' .. double_grave, ["ѷ"]='ü' .. double_grave,
	-- newer letters
	["Й"]='J', ["й"]='j', -- starting from 15th century
}

digraphs["Cyrs"] = {
	["О[УѴуѵ]"]="U", ["о[уѵ]"]="u",
	["Ъ[Ии]"]="Y", ["ъи"]="y",
}

letters["Glag"] = {
	["Ⰰ"]='A', ["ⰰ"]='a', ["Ⱝ"]='A', ["ⱝ"]='a',
	["Ⰱ"]='B', ["ⰱ"]='b',
	["Ⰲ"]='V', ["ⰲ"]='v',
	["Ⰳ"]='G', ["ⰳ"]='g', 
	["Ⰴ"]='D', ["ⰴ"]='d',
	["Ⰵ"]='E', ["ⰵ"]='e',
	["Ⰶ"]='Ž', ["ⰶ"]='ž',
	["Ⰷ"]='Dz', ["ⰷ"]='dz',
	["Ⰸ"]='Z', ["ⰸ"]='z',
	["Ⰹ"]='I', ["ⰹ"]='i', ["Ⰺ"]='I', ["ⰺ"]='i', ["Ⰻ"]='I', ["ⰻ"]='i',
	["Ⰼ"]='Đ', ["ⰼ"]='đ',
	["Ⰽ"]='K', ["ⰽ"]='k',
	["Ⰾ"]='L', ["ⰾ"]='l', 
	["Ⰿ"]='M', ["ⰿ"]='m', ["Ⱞ"]='M', ["ⱞ"]='m',
	["Ⱀ"]='N', ["ⱀ"]='n',
	["Ⱁ"]='O', ["ⱁ"]='o', ["Ⱉ"]='O', ["ⱉ"]='o',
	["Ⱂ"]='P', ["ⱂ"]='p', 
	["Ⱃ"]='R', ["ⱃ"]='r',
	["Ⱄ"]='S', ["ⱄ"]='s',
	["Ⱅ"]='T', ["ⱅ"]='t',
	["Ⱆ"]='U', ["ⱆ"]='u', 
	["Ⱇ"]='F', ["ⱇ"]='f',
	["Ⱈ"]='X', ["ⱈ"]='x', ["Ⱒ"]='X', ["ⱒ"]='x',
	
	["Ⱌ"]='C', ["ⱌ"]='c',
	["Ⱍ"]='Č', ["ⱍ"]='č', 
	["Ⱎ"]='Š', ["ⱎ"]='š',
	-- For Ⱋ see below
	["Ⱏ"]='Ŭ', ["ⱏ"]='ŭ',
	["Ⱐ"]='Ĭ', ["ⱐ"]='ĭ', ["Ⱜ"]='Ĭ', ["ⱜ"]='ĭ',
	["Ⱑ"]='Ě', ["ⱑ"]='ě',
	
	["Ⱓ"]='Ju', ["ⱓ"]='ju', 
	["Ⱔ"]='Ę', ["ⱔ"]='ę',
	["Ⱕ"]='Y̨', ["ⱕ"]='y̨',
	["Ⱗ"]='Ję', ["ⱗ"]='ję',
	["Ⱘ"]='Ǫ', ["ⱘ"]='ǫ', ["Ⱖ"]='Ǫ', ["ⱖ"]='ǫ',
	["Ⱙ"]='Jǫ', ["ⱙ"]='jǫ',
	
	["Ⱚ"]='Θ', ["ⱚ"]='θ',
	["Ⱛ"]='Ü', ["ⱛ"]='ü',
}

digraphs["Glag"] = {
	["Ⱏ[ⰉⰊⰋⰹⰺⰻ]"]="Y", ["ⱏ[ⰹⰺⰻ]"]="y",
}

function export.tr(text, lang, sc)
	if not sc then
		sc = require("Module:scripts").findBestScript(text,
			require("Module:languages").getByCode(lang)):getCode()
	end
	
	-- Щ was pronounced differently in Old East Slavic
	if lang == "orv" then
		letters["Cyrs"]["Щ"]='Šč'
		letters["Cyrs"]["щ"]='šč'
		letters["Glag"]["Ⱋ"]='Šč'
		letters["Glag"]["ⱋ"]='šč'
	else
		letters["Cyrs"]["Щ"]='Št'
		letters["Cyrs"]["щ"]='št'
		letters["Glag"]["Ⱋ"]='Št'
		letters["Glag"]["ⱋ"]='št'
	end
	
	-- Transliterate the kamora as prime
	text = string.gsub(text, "\210\132", "ʹ")
	
	if sc == "Cyrs" or sc == "Glag" then
		for key, repl in pairs(digraphs[sc]) do
			text = mw.ustring.gsub(text, key, repl)
		end
		
		-- pattern for one non-ASCII character
		text = string.gsub(text, '[\194-\244][\128-\191]+', letters[sc])
	else
		-- error("This module can only transliterate Old Cyrillic (Cyrs) and Glagolitic (Glag).")
	end

	return text
end

return export