Module:Osge-translit: Difference between revisions
Jump to navigation
Jump to search
not needed ย |
m 1 revision imported ย |
(No difference)
| |
Latest revision as of 12:46, 21 April 2026
Documentation for this module may be created at Module:Osge-translit/doc
local export = {}
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
-- Import libraries
local U = require("Module:string/char")
local gsub = m_str_utils.gsub
local len = m_str_utils.len
local sub = m_str_utils.sub
local decomp = mw.ustring.toNFD
local recomp = mw.ustring.toNFC
local upper = m_str_utils.upper
-- Apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
while true do
local new_term = gsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- Check if given character is uppercase
local function is_upper(char)
return char == upper(char)
end
local letters = { -- general table
["๐ฐ"]="A", ["๐ฑ"]="Ai", ["๐ฒ"]="Aฤฏ", ["๐ณ"]="ฦ", ["๐ด"]="Br", ["๐ต"]="ฤ", ["๐ถ"]="Hฤ", ["๐ท"]="E", ["๐ธ"]="Eฤฏ", ["๐น"]="H", ["๐บ"]="Hy",
["๐ป"]="I", ["๐ผ"]="K", ["๐ฝ"]="Hk", ["๐พ"]="Ky", ["๐ฟ"]="L", ["๐"]="M", ["๐"]="N", ["๐"]="O", ["๐"]="Oฤฏ", ["๐"]="P", ["๐
"]="Hp",
["๐"]="S", ["๐"]="ล ", ["๐"]="T", ["๐"]="Ht", ["๐"]="C", ["๐"]="Hc", ["๐"]="Ch", ["๐"]="ร", ["๐"]="U", ["๐"]="W", ["๐"]="X",
["๐"]="ฦ", ["๐"]="Z", ["๐"]="ลฝ",
["๐"]="a", ["๐"]="ai", ["๐"]="aฤฏ", ["๐"]="ษ", ["๐"]="br", ["๐"]="ฤ", ["๐"]="hฤ", ["๐"]="e", ["๐ "]="eฤฏ", ["๐ก"]="h", ["๐ข"]="hy",
["๐ฃ"]="i", ["๐ค"]="k", ["๐ฅ"]="hk", ["๐ฆ"]="ky", ["๐ง"]="l", ["๐จ"]="m", ["๐ฉ"]="n", ["๐ช"]="o", ["๐ซ"]="oฤฏ", ["๐ฌ"]="p", ["๐ญ"]="hp",
["๐ฎ"]="s", ["๐ฏ"]="ลก", ["๐ฐ"]="t", ["๐ฑ"]="ht", ["๐ฒ"]="c", ["๐ณ"]="hc", ["๐ด"]="ch", ["๐ต"]="รฐ", ["๐ถ"]="u", ["๐ท"]="w", ["๐ธ"]="x",
["๐น"]="ษฃ", ["๐บ"]="z", ["๐ป"]="ลพ",
[U(0x0358)]=U(0x0328), -- combining dot above -> combining ogonek (nasalisation)
[U(0x030B)]=U(0x0304)..U(0x0301) -- combining double acute accent -> combining macron + combining acute accent (long high tone)
}
local accents = U(0x0301) .. U(0x0304) .. U(0x030B) -- list of combining diacritics
local letters_reversed = m_table.invert(letters) -- reverse transliteration table
local digraphs = "" -- generate list of osage letters that represent digraphs (uppercase only)
local digraphs_reversed = {} -- generate list of latin letters that represent digraphs
for k, v in pairs(letters) do
if len(v) > 1 then
if is_upper(k) then digraphs = digraphs .. k end
table.insert(digraphs_reversed, v)
end
end
function export.tr(text, lang, sc)
-- handle vowel and other uppercase digraphs first
text = gsub(text, "([" .. digraphs .. "๐๐๐ ๐ซ])([" .. accents .. "]?)(.?)", function(d, a, d_next) -- run multiple times to catch all instances
if is_upper(d_next) then
return upper(sub(letters[d], 1, 1) .. a .. sub(letters[d], 2)) .. d_next -- place diacritics in between for vowels
end
return sub(letters[d], 1, 1) .. a .. sub(letters[d], 2) .. d_next
end)
-- move combining dot above before other diacritics
text = gsub(text, "([" .. accents .. "])" .. U(0x0358), U(0x0358) .. "%1")
-- then substitute all other letters
return recomp(gsub(text, ".", letters))
end
function export.tr_reverse(text)
-- decompose letters (excluding letters with caron)
text = gsub(text, "([^ฤฤล ลกลฝลพ]+)", function(v) return decomp(v) end)
-- handle digraphs first
text = gsub(text, "([AEOaeo])([" .. accents .. "]*)[Ii](" .. U(0x0328) .. "?)", function(v, a, n) -- catch any diacritics in between vowel digraphs
if n then -- for nasalised vowels
return letters_reversed[v .. "ฤฏ"] .. a
end
return letters_reversed[v .. "i"] .. a -- for other vowels
end)
for _, v in ipairs(digraphs_reversed) do -- change uppercase second letter to lowercase for remaining digraphs
local match_pattern = sub(v, 1, 1) .. "[" .. sub(v, 2, 2) .. upper(sub(v, 2, 2)) .. "]"
text = gsub_repeatedly(text, match_pattern, letters_reversed[v]) -- run multiple times to catch all instances
end
-- move combining ogonek after other diacritics
text = gsub(text, "([" .. accents .. "]+)" .. U(0x0358), U(0x0358) .. "%1")
-- macron + acute accent -> double acute accent
text = gsub(text, U(0x0301) .. U(0x0304), U(0x0304) .. U(0x0301)) -- swap to catch both orders
text = gsub(text, U(0x0304) .. U(0x0301), letters_reversed)
-- then substitute all other letters
return recomp(gsub(text, ".", letters_reversed))
end
return export