Module:Osge-translit

Revision as of 11:05, 17 October 2025 by wikt>AmazingJus (not needed)
(diff) โ† Older revisionย | Latest revision (diff)ย | Newer revision โ†’ (diff)

Documentation for this module may be created at Module:Osge-translit/doc

local export = {}

local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")

-- Import libraries
local U = require("Module:string/char")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local sub = m_str_utils.sub

local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local upper = m_str_utils.upper

-- Apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end

-- Check if given character is uppercase
local function is_upper(char)
	return char == upper(char)
end

local letters = {  -- general table
	["๐’ฐ"]="A", ["๐’ฑ"]="Ai", ["๐’ฒ"]="Aฤฏ", ["๐’ณ"]="ฦ", ["๐’ด"]="Br", ["๐’ต"]="ฤŒ", ["๐’ถ"]="Hฤ", ["๐’ท"]="E", ["๐’ธ"]="Eฤฏ", ["๐’น"]="H", ["๐’บ"]="Hy",
	["๐’ป"]="I", ["๐’ผ"]="K", ["๐’ฝ"]="Hk", ["๐’พ"]="Ky", ["๐’ฟ"]="L", ["๐“€"]="M", ["๐“"]="N", ["๐“‚"]="O", ["๐“ƒ"]="Oฤฏ", ["๐“„"]="P", ["๐“…"]="Hp",
	["๐“†"]="S", ["๐“‡"]="ล ", ["๐“ˆ"]="T", ["๐“‰"]="Ht", ["๐“Š"]="C", ["๐“‹"]="Hc", ["๐“Œ"]="Ch", ["๐“"]="ร", ["๐“Ž"]="U", ["๐“"]="W", ["๐“"]="X",
	["๐“‘"]="ฦ”", ["๐“’"]="Z", ["๐““"]="ลฝ",
	["๐“˜"]="a", ["๐“™"]="ai", ["๐“š"]="aฤฏ", ["๐“›"]="ษ™", ["๐“œ"]="br", ["๐“"]="ฤ", ["๐“ž"]="hฤ", ["๐“Ÿ"]="e", ["๐“ "]="eฤฏ", ["๐“ก"]="h", ["๐“ข"]="hy",
	["๐“ฃ"]="i", ["๐“ค"]="k", ["๐“ฅ"]="hk", ["๐“ฆ"]="ky", ["๐“ง"]="l", ["๐“จ"]="m", ["๐“ฉ"]="n", ["๐“ช"]="o", ["๐“ซ"]="oฤฏ", ["๐“ฌ"]="p", ["๐“ญ"]="hp",
	["๐“ฎ"]="s", ["๐“ฏ"]="ลก", ["๐“ฐ"]="t", ["๐“ฑ"]="ht", ["๐“ฒ"]="c", ["๐“ณ"]="hc", ["๐“ด"]="ch", ["๐“ต"]="รฐ", ["๐“ถ"]="u", ["๐“ท"]="w", ["๐“ธ"]="x",
	["๐“น"]="ษฃ", ["๐“บ"]="z", ["๐“ป"]="ลพ",
	[U(0x0358)]=U(0x0328), -- combining dot above -> combining ogonek (nasalisation)
	[U(0x030B)]=U(0x0304)..U(0x0301) -- combining double acute accent -> combining macron + combining acute accent (long high tone)
}
local accents = U(0x0301) .. U(0x0304) .. U(0x030B)  -- list of combining diacritics
local letters_reversed = m_table.invert(letters)  -- reverse transliteration table
local digraphs = ""  -- generate list of osage letters that represent digraphs (uppercase only)
local digraphs_reversed = {}  -- generate list of latin letters that represent digraphs
for k, v in pairs(letters) do
	if len(v) > 1 then
		if is_upper(k) then digraphs = digraphs .. k end
		table.insert(digraphs_reversed, v)
	end
end

function export.tr(text, lang, sc)
	-- handle vowel and other uppercase digraphs first
	text = gsub(text, "([" .. digraphs .. "๐“™๐“š๐“ ๐“ซ])([" .. accents .. "]?)(.?)", function(d, a, d_next)  -- run multiple times to catch all instances
		if is_upper(d_next) then
			return upper(sub(letters[d], 1, 1) .. a .. sub(letters[d], 2)) .. d_next  -- place diacritics in between for vowels
		end
		return sub(letters[d], 1, 1) .. a .. sub(letters[d], 2) .. d_next
	end)

	-- move combining dot above before other diacritics
	text = gsub(text, "([" .. accents .. "])" .. U(0x0358), U(0x0358) .. "%1")

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters))
end

function export.tr_reverse(text)
	-- decompose letters (excluding letters with caron)
	text = gsub(text, "([^ฤŒฤล ลกลฝลพ]+)", function(v) return decomp(v) end)

	-- handle digraphs first
	text = gsub(text, "([AEOaeo])([" .. accents .. "]*)[Ii](" .. U(0x0328) .. "?)", function(v, a, n)  -- catch any diacritics in between vowel digraphs
		if n then  -- for nasalised vowels
			return letters_reversed[v .. "ฤฏ"] .. a
		end
		return letters_reversed[v .. "i"] .. a  -- for other vowels
	end)
	for _, v in ipairs(digraphs_reversed) do  -- change uppercase second letter to lowercase for remaining digraphs
		local match_pattern = sub(v, 1, 1) .. "[" .. sub(v, 2, 2) .. upper(sub(v, 2, 2)) .. "]"
		text = gsub_repeatedly(text, match_pattern, letters_reversed[v])  -- run multiple times to catch all instances
	end

	-- move combining ogonek after other diacritics
	text = gsub(text, "([" .. accents .. "]+)" .. U(0x0358), U(0x0358) .. "%1")

	-- macron + acute accent -> double acute accent
	text = gsub(text, U(0x0301) .. U(0x0304), U(0x0304) .. U(0x0301))  -- swap to catch both orders
	text = gsub(text, U(0x0304) .. U(0x0301), letters_reversed)

	-- then substitute all other letters
	return recomp(gsub(text, ".", letters_reversed))
end

return export