Module:mn-translit: Difference between revisions
Created page with "local export = {} local mn = require("Module:mn-common") local gsub = mw.ustring.gsub local preConv = { ["є"] = "ө", ["ѳ"] = "ө", ["Є"] = "Ө", ["Ѳ"] = "Ө" } local oneChar = { ["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "je", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["ө"] = "ö", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["..." |
m Protected "Module:mn-translit": (bot) automatically protect highly visible templates/modules (reference score: 2000+ >= 1000) ([Edit=Allow only autoconfirmed users] (indefinite) [Move=Allow only autoconfirmed users] (indefinite)) |
||
| Line 2: | Line 2: | ||
local mn = require("Module:mn-common") | local mn = require("Module:mn-common") | ||
local gsub = mw.ustring.gsub | local gsub = mw.ustring.gsub | ||
local toNFC = mw.ustring.toNFC | |||
local toNFD = mw.ustring.toNFD | |||
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*" | |||
local preConv = { | local preConv = { | ||
["є"] = "ө", ["ѳ"] = "ө", | ["є"] = "ө", ["ѳ"] = "ө", ["ї"] = "ү", | ||
["Є"] = "Ө", ["Ѳ"] = "Ө" | ["Є"] = "Ө", ["Ѳ"] = "Ө", ["Ї"] = "Ү" | ||
} | } | ||
| Line 22: | Line 25: | ||
return nil | return nil | ||
end | end | ||
-- Pre-convert any substitute characters. | |||
text = text:gsub(UTF8_char, preConv) | |||
-- Decompose (except for "ё" and "й") so that accents can be consistently detected. | -- Decompose (except for "ё" and "й") so that accents can be consistently detected. | ||
text = | text = toNFD(text) | ||
text = gsub(text, "[еЕ]̈", | text = gsub(text, "[еЕ]̈", toNFC) | ||
text = gsub(text, "[иИ]̆", | text = gsub(text, "[иИ]̆", toNFC) | ||
-- Hard sign does nothing if word-final (extremely rare, but attested in borrowings of affected Russian proper nouns like "Коммерсантъ"). | -- Hard sign does nothing if word-final (extremely rare, but attested in borrowings of affected Russian proper nouns like "Коммерсантъ"). | ||
text = gsub(text, "[Ъъ]([^а-яёөү])", "%1") | text = gsub(text, "[Ъъ]([^а-яёөү])", "%1") | ||
| Line 63: | Line 64: | ||
-- Do primary substitutions. If still present, Cyrillic "е" becomes "je" and "ю" becomes "ju". | -- Do primary substitutions. If still present, Cyrillic "е" becomes "je" and "ю" becomes "ju". | ||
for digraph, replacement in pairs(twoChars) do | for digraph, replacement in pairs(twoChars) do | ||
text[i].substring = | text[i].substring = text[i].substring:gsub(digraph, replacement) | ||
end | end | ||
text[i].substring = | text[i].substring = text[i].substring:gsub(UTF8_char, oneChar) | ||
table.insert(text.translit, text[i].substring) | table.insert(text.translit, text[i].substring) | ||
end | end | ||
return | return toNFC(table.concat(text.translit, "")) | ||
end | end | ||
return export | return export | ||