Module:mn-translit: Difference between revisions

Created page with "local export = {} local mn = require("Module:mn-common") local gsub = mw.ustring.gsub local preConv = { ["є"] = "ө", ["ѳ"] = "ө", ["Є"] = "Ө", ["Ѳ"] = "Ө" } local oneChar = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "je", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["ө"] = "ö", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["..."
 
m 1 revision imported
 
(One intermediate revision by one other user not shown)
Line 2: Line 2:
local mn = require("Module:mn-common")
local mn = require("Module:mn-common")
local gsub = mw.ustring.gsub
local gsub = mw.ustring.gsub
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"


local preConv = {
local preConv = {
["є"] = "ө", ["ѳ"] = "ө",
["є"] = "ө", ["ѳ"] = "ө", ["ї"] = "ү",
["Є"] = "Ө", ["Ѳ"] = "Ө"
["Є"] = "Ө", ["Ѳ"] = "Ө", ["Ї"] = "Ү"
}
}


Line 22: Line 25:
return nil
return nil
end
end
-- Pre-convert any substitute characters.
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
text = text:gsub(UTF8_char, preConv)
-- Decompose (except for "ё" and "й") so that accents can be consistently detected.
-- Decompose (except for "ё" and "й") so that accents can be consistently detected.
text = mw.ustring.toNFD(text)
text = toNFD(text)
text = gsub(text, "[еЕ]̈", mw.ustring.toNFC)
text = gsub(text, "[еЕ]̈", toNFC)
text = gsub(text, "[иИ]̆", mw.ustring.toNFC)
text = gsub(text, "[иИ]̆", toNFC)
-- Pre-convert "є" and "ѳ" to "ө".
text = string.gsub(text, UTF8_char, preConv)
-- Hard sign does nothing if word-final (extremely rare, but attested in borrowings of affected Russian proper nouns like "Коммерсантъ").
-- Hard sign does nothing if word-final (extremely rare, but attested in borrowings of affected Russian proper nouns like "Коммерсантъ").
text = gsub(text, "[Ъъ]([^а-яёөү])", "%1")
text = gsub(text, "[Ъъ]([^а-яёөү])", "%1")
Line 63: Line 64:
-- Do primary substitutions. If still present, Cyrillic "е" becomes "je" and "ю" becomes "ju".
-- Do primary substitutions. If still present, Cyrillic "е" becomes "je" and "ю" becomes "ju".
for digraph, replacement in pairs(twoChars) do
for digraph, replacement in pairs(twoChars) do
text[i].substring = string.gsub(text[i].substring, digraph, replacement)
text[i].substring = text[i].substring:gsub(digraph, replacement)
end
end
text[i].substring = string.gsub(text[i].substring, UTF8_char, oneChar)
text[i].substring = text[i].substring:gsub(UTF8_char, oneChar)
table.insert(text.translit, text[i].substring)
table.insert(text.translit, text[i].substring)
end
end
return mw.ustring.toNFC(table.concat(text.translit, ""))
return toNFC(table.concat(text.translit, ""))
end
end


return export
return export