Module:Osge-translit

Revision as of 12:46, 21 April 2026 by Sware (talk | contribs) (1 revision imported)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:Osge-translit/doc

local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")

-- Import libraries
local U = require("Module:string/char")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local sub = m_str_utils.sub

local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local upper = m_str_utils.upper

-- Apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- Check if given character is uppercase
local function is_upper(char)
	return char == upper(char)
end

local letters = {  -- general table
	["𐒰"]="A", ["𐒱"]="Ai", ["𐒲"]="Aį", ["𐒳"]="Ə", ["𐒴"]="Br", ["𐒵"]="Č", ["𐒶"]="Hč", ["𐒷"]="E", ["𐒸"]="Eį", ["𐒹"]="H", ["𐒺"]="Hy",
	["𐒻"]="I", ["𐒼"]="K", ["𐒽"]="Hk", ["𐒾"]="Ky", ["𐒿"]="L", ["𐓀"]="M", ["𐓁"]="N", ["𐓂"]="O", ["𐓃"]="Oį", ["𐓄"]="P", ["𐓅"]="Hp",
	["𐓆"]="S", ["𐓇"]="Š", ["𐓈"]="T", ["𐓉"]="Ht", ["𐓊"]="C", ["𐓋"]="Hc", ["𐓌"]="Ch", ["𐓍"]="Ð", ["𐓎"]="U", ["𐓏"]="W", ["𐓐"]="X",
	["𐓑"]="Ɣ", ["𐓒"]="Z", ["𐓓"]="Ž",
	["𐓘"]="a", ["𐓙"]="ai", ["𐓚"]="aį", ["𐓛"]="ə", ["𐓜"]="br", ["𐓝"]="č", ["𐓞"]="hč", ["𐓟"]="e", ["𐓠"]="eį", ["𐓡"]="h", ["𐓢"]="hy",
	["𐓣"]="i", ["𐓤"]="k", ["𐓥"]="hk", ["𐓦"]="ky", ["𐓧"]="l", ["𐓨"]="m", ["𐓩"]="n", ["𐓪"]="o", ["𐓫"]="oį", ["𐓬"]="p", ["𐓭"]="hp",
	["𐓮"]="s", ["𐓯"]="š", ["𐓰"]="t", ["𐓱"]="ht", ["𐓲"]="c", ["𐓳"]="hc", ["𐓴"]="ch", ["𐓵"]="ð", ["𐓶"]="u", ["𐓷"]="w", ["𐓸"]="x",
	["𐓹"]="ɣ", ["𐓺"]="z", ["𐓻"]="ž",
	[U(0x0358)]=U(0x0328), -- combining dot above -> combining ogonek (nasalisation)
	[U(0x030B)]=U(0x0304)..U(0x0301) -- combining double acute accent -> combining macron + combining acute accent (long high tone)
}
local accents = U(0x0301) .. U(0x0304) .. U(0x030B)  -- list of combining diacritics
local letters_reversed = m_table.invert(letters)  -- reverse transliteration table
local digraphs = ""  -- generate list of osage letters that represent digraphs (uppercase only)
local digraphs_reversed = {}  -- generate list of latin letters that represent digraphs
for k, v in pairs(letters) do
	if len(v) > 1 then
		if is_upper(k) then digraphs = digraphs .. k end
		table.insert(digraphs_reversed, v)
	end
end

function export.tr(text, lang, sc)
	-- handle vowel and other uppercase digraphs first
	text = gsub(text, "([" .. digraphs .. "𐓙𐓚𐓠𐓫])([" .. accents .. "]?)(.?)", function(d, a, d_next)  -- run multiple times to catch all instances
		if is_upper(d_next) then
			return upper(sub(letters[d], 1, 1) .. a .. sub(letters[d], 2)) .. d_next  -- place diacritics in between for vowels
		end
		return sub(letters[d], 1, 1) .. a .. sub(letters[d], 2) .. d_next
	end)

	-- move combining dot above before other diacritics
	text = gsub(text, "([" .. accents .. "])" .. U(0x0358), U(0x0358) .. "%1")

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters))
end

function export.tr_reverse(text)
	-- decompose letters (excluding letters with caron)
	text = gsub(text, "([^ČčŠšŽž]+)", function(v) return decomp(v) end)

	-- handle digraphs first
	text = gsub(text, "([AEOaeo])([" .. accents .. "]*)[Ii](" .. U(0x0328) .. "?)", function(v, a, n)  -- catch any diacritics in between vowel digraphs
		if n then  -- for nasalised vowels
			return letters_reversed[v .. "į"] .. a
		end
		return letters_reversed[v .. "i"] .. a  -- for other vowels
	end)
	for _, v in ipairs(digraphs_reversed) do  -- change uppercase second letter to lowercase for remaining digraphs
		local match_pattern = sub(v, 1, 1) .. "[" .. sub(v, 2, 2) .. upper(sub(v, 2, 2)) .. "]"
		text = gsub_repeatedly(text, match_pattern, letters_reversed[v])  -- run multiple times to catch all instances
	end

	-- move combining ogonek after other diacritics
	text = gsub(text, "([" .. accents .. "]+)" .. U(0x0358), U(0x0358) .. "%1")

	-- macron + acute accent -> double acute accent
	text = gsub(text, U(0x0301) .. U(0x0304), U(0x0304) .. U(0x0301))  -- swap to catch both orders
	text = gsub(text, U(0x0304) .. U(0x0301), letters_reversed)

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters_reversed))
end

return export