Module:Osge-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate text in the Osage script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Osge-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")

-- Import libraries
local U = require("Module:string/char")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local sub = m_str_utils.sub

local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local upper = m_str_utils.upper

-- Apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- Check if given character is uppercase
local function is_upper(char)
	return char == upper(char)
end

local letters = {  -- general table
	["𐒰"]="A", ["𐒱"]="Ai", ["𐒲"]="Aį", ["𐒳"]="Ə", ["𐒴"]="Br", ["𐒵"]="Č", ["𐒶"]="Hč", ["𐒷"]="E", ["𐒸"]="Eį", ["𐒹"]="H", ["𐒺"]="Hy",
	["𐒻"]="I", ["𐒼"]="K", ["𐒽"]="Hk", ["𐒾"]="Ky", ["𐒿"]="L", ["𐓀"]="M", ["𐓁"]="N", ["𐓂"]="O", ["𐓃"]="Oį", ["𐓄"]="P", ["𐓅"]="Hp",
	["𐓆"]="S", ["𐓇"]="Š", ["𐓈"]="T", ["𐓉"]="Ht", ["𐓊"]="C", ["𐓋"]="Hc", ["𐓌"]="Ch", ["𐓍"]="Ð", ["𐓎"]="U", ["𐓏"]="W", ["𐓐"]="X",
	["𐓑"]="Ɣ", ["𐓒"]="Z", ["𐓓"]="Ž",
	["𐓘"]="a", ["𐓙"]="ai", ["𐓚"]="aį", ["𐓛"]="ə", ["𐓜"]="br", ["𐓝"]="č", ["𐓞"]="hč", ["𐓟"]="e", ["𐓠"]="eį", ["𐓡"]="h", ["𐓢"]="hy",
	["𐓣"]="i", ["𐓤"]="k", ["𐓥"]="hk", ["𐓦"]="ky", ["𐓧"]="l", ["𐓨"]="m", ["𐓩"]="n", ["𐓪"]="o", ["𐓫"]="oį", ["𐓬"]="p", ["𐓭"]="hp",
	["𐓮"]="s", ["𐓯"]="š", ["𐓰"]="t", ["𐓱"]="ht", ["𐓲"]="c", ["𐓳"]="hc", ["𐓴"]="ch", ["𐓵"]="ð", ["𐓶"]="u", ["𐓷"]="w", ["𐓸"]="x",
	["𐓹"]="ɣ", ["𐓺"]="z", ["𐓻"]="ž",
	[U(0x0358)]=U(0x0328), -- combining dot above -> combining ogonek (nasalisation)
	[U(0x030B)]=U(0x0304)..U(0x0301) -- combining double acute accent -> combining macron + combining acute accent (long high tone)
}
local accents = U(0x0301) .. U(0x0304) .. U(0x030B)  -- list of combining diacritics
local letters_reversed = m_table.invert(letters)  -- reverse transliteration table
local digraphs = ""  -- generate list of osage letters that represent digraphs (uppercase only)
local digraphs_reversed = {}  -- generate list of latin letters that represent digraphs
for k, v in pairs(letters) do
	if len(v) > 1 then
		if is_upper(k) then digraphs = digraphs .. k end
		table.insert(digraphs_reversed, v)
	end
end

function export.tr(text, lang, sc)
	-- handle vowel and other uppercase digraphs first
	text = gsub(text, "([" .. digraphs .. "𐓙𐓚𐓠𐓫])([" .. accents .. "]?)(.?)", function(d, a, d_next)  -- run multiple times to catch all instances
		if is_upper(d_next) then
			return upper(sub(letters[d], 1, 1) .. a .. sub(letters[d], 2)) .. d_next  -- place diacritics in between for vowels
		end
		return sub(letters[d], 1, 1) .. a .. sub(letters[d], 2) .. d_next
	end)

	-- move combining dot above before other diacritics
	text = gsub(text, "([" .. accents .. "])" .. U(0x0358), U(0x0358) .. "%1")

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters))
end

function export.tr_reverse(text)
	-- decompose letters (excluding letters with caron)
	text = gsub(text, "([^ČčŠšŽž]+)", function(v) return decomp(v) end)

	-- handle digraphs first
	text = gsub(text, "([AEOaeo])([" .. accents .. "]*)[Ii](" .. U(0x0328) .. "?)", function(v, a, n)  -- catch any diacritics in between vowel digraphs
		if n then  -- for nasalised vowels
			return letters_reversed[v .. "į"] .. a
		end
		return letters_reversed[v .. "i"] .. a  -- for other vowels
	end)
	for _, v in ipairs(digraphs_reversed) do  -- change uppercase second letter to lowercase for remaining digraphs
		local match_pattern = sub(v, 1, 1) .. "[" .. sub(v, 2, 2) .. upper(sub(v, 2, 2)) .. "]"
		text = gsub_repeatedly(text, match_pattern, letters_reversed[v])  -- run multiple times to catch all instances
	end

	-- move combining ogonek after other diacritics
	text = gsub(text, "([" .. accents .. "]+)" .. U(0x0358), U(0x0358) .. "%1")

	-- macron + acute accent -> double acute accent
	text = gsub(text, U(0x0301) .. U(0x0304), U(0x0304) .. U(0x0301))  -- swap to catch both orders
	text = gsub(text, U(0x0304) .. U(0x0301), letters_reversed)

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters_reversed))
end

return export