Module:Cyrs-translit: Difference between revisions

Created page with "local export = {} local numbers = mw.loadData("Module:Cyrs-translit/numbers") local ugsub = mw.ustring.gsub local toNFC = mw.ustring.toNFC local toNFD = mw.ustring.toNFD local U = mw.ustring.char local umatch = mw.ustring.match local usub = mw.ustring.sub local ulower = mw.ustring.lower local acute = U(0x301) local grave = U(0x300) local circumflex = U(0x302) local palatalization = U(0x0484) local titlo = U(0x0483) local dasia = U(0x0485) local psili = U(0x0486) local..."
 
m 1 revision imported
 
(One intermediate revision by one other user not shown)
Line 14: Line 14:
local grave = U(0x300)
local grave = U(0x300)
local circumflex = U(0x302)
local circumflex = U(0x302)
local kamora = U(0x0311)
local palatalization = U(0x0484)
local palatalization = U(0x0484)
local titlo = U(0x0483)
local titlo = U(0x0483)
Line 22: Line 23:
local breathing = psili .. dasia
local breathing = psili .. dasia
local accent = "[" .. acute .. grave .. circumflex .. breathing .. "]*"
local accent = "[" .. acute .. grave .. circumflex .. breathing .. "]*"
local vowels = "aAeEiIoOuUyY"
local vowels = "aAæÆeEiIoOœŒꝏꝎuUyY"
local vowel_or_soft = "[" .. vowels .. "ʹ]"
local vowel_or_soft = "[" .. vowels .. "ʹ]"


Line 38: Line 39:
["І"] = 'I', ["і"] = 'i', -- Contrastive with "И".
["І"] = 'I', ["і"] = 'i', -- Contrastive with "И".
["Й"] = 'J', ["й"] = 'j',
["Й"] = 'J', ["й"] = 'j',
["Ꙉ"] = 'Đ', ["ꙉ"] = 'đ',
["Ꙉ"] = 'Ǵ', ["ꙉ"] = 'ǵ',
["К"] = 'K', ["к"] = 'k',
["К"] = 'K', ["к"] = 'k',
["Л"] = 'L', ["л"] = 'l',
["Л"] = 'L', ["л"] = 'l',
Line 44: Line 45:
["Н"] = 'N', ["н"] = 'n',
["Н"] = 'N', ["н"] = 'n',
["О"] = 'O', ["о"] = 'o',
["О"] = 'O', ["о"] = 'o',
["Ꚙ"] = 'Ꝏ', ["ꚙ"] = 'ꝏ',
["П"] = 'P', ["п"] = 'p',
["П"] = 'P', ["п"] = 'p',
["Р"] = 'R', ["р"] = 'r',
["Р"] = 'R', ["р"] = 'r',
["С"] = 'S', ["с"] = 's',
["С"] = 'S', ["с"] = 's',
["Т"] = 'T', ["т"] = 't',
["Т"] = 'T', ["т"] = 't',
["У"] = 'U', ["у"] = 'u',
["Ꙋ"] = 'U', ["ꙋ"] = 'u',
["Ꙋ"] = 'U', ["ꙋ"] = 'u',
["У"] = 'U', ["у"] = 'u',
["Ф"] = 'F', ["ф"] = 'f',
["Ф"] = 'F', ["ф"] = 'f',
["Х"] = 'X', ["х"] = 'x',
["Х"] = 'X', ["х"] = 'x',
Line 58: Line 60:
["Ꙡ"] = 'Ć', ["ꙡ"] = 'ć', -- From a merger of "Ц" and "Ч" in Old Novgorodian.
["Ꙡ"] = 'Ć', ["ꙡ"] = 'ć', -- From a merger of "Ц" and "Ч" in Old Novgorodian.
["Ч"] = 'Č', ["ч"] = 'č',
["Ч"] = 'Č', ["ч"] = 'č',
["Џ"] = 'Dž', ["џ"] = 'dž',
["Ш"] = 'Š', ["ш"] = 'š',
["Ш"] = 'Š', ["ш"] = 'š',
["Щ"] = 'Št', ["щ"] = 'št',
["Щ"] = 'Št', ["щ"] = 'št',
Line 65: Line 68:
["Ѣ"] = 'Ě', ["ѣ"] = 'ě',
["Ѣ"] = 'Ě', ["ѣ"] = 'ě',
["Ꙓ"] = 'Jě', ["ꙓ"] = 'jě',
["Ꙓ"] = 'Jě', ["ꙓ"] = 'jě',
["Ю"] = 'Ju', ["ю"] = 'ju',
["Ꙗ"] = 'Ja', ["ꙗ"] = 'ja',
["Ꙗ"] = 'Ja', ["ꙗ"] = 'ja',
["Ѥ"] = 'Je', ["ѥ"] = 'je',
["Ѥ"] = 'Je', ["ѥ"] = 'je',
["Ю"] = 'Ju', ["ю"] = 'ju',
["Ѧ"] = 'Ę', ["ѧ"] = 'ę',
["Ѫ"] = 'Ǫ', ["ѫ"] = 'ǫ',
["Ѫ"] = 'Ǫ', ["ѫ"] = 'ǫ',
["Ѩ"] = 'Ję', ["ѩ"] = 'ję',
["Ѭ"] = 'Jǫ', ["ѭ"] = 'jǫ',
["Ѭ"] = 'Jǫ', ["ѭ"] = 'jǫ',
["Ѧ"] = 'Ę', ["ѧ"] = 'ę',
[""] = 'Œ', [""] = 'œ', -- Becomes "œ̨".
["Ѩ"] = 'Ję', ["ѩ"] = 'ję',
["Ѯ"] = 'Ks', ["ѯ"] = 'ks',
["Ѯ"] = 'Ks', ["ѯ"] = 'ks',
["Ѱ"] = 'Ps', ["ѱ"] = 'ps',
["Ѱ"] = 'Ps', ["ѱ"] = 'ps',
Line 78: Line 82:
["Ѷ"] = 'Ü', ["ѷ"] = 'ü', -- Contrastive with "Ѵ".
["Ѷ"] = 'Ü', ["ѷ"] = 'ü', -- Contrastive with "Ѵ".
["Ҁ"] = 'Q', ["ҁ"] = 'q',
["Ҁ"] = 'Q', ["ҁ"] = 'q',
[psili] = '',
[kamora] = circumflex,
}
}


Line 83: Line 89:
["ᲀ"] = 'в',
["ᲀ"] = 'в',
["Ґ"] = 'Г', ["ґ"] = 'г',
["Ґ"] = 'Г', ["ґ"] = 'г',
-- ["Ђ"] = 'Ꙉ', ["ђ"] = 'ꙉ',
["ᲁ"] = 'д',
["ᲁ"] = 'д',
["Ꙣ"] = 'Д' .. palatalization, ["ꙣ"] = 'д' .. palatalization,
["Ꙣ"] = 'Д' .. palatalization, ["ꙣ"] = 'д' .. palatalization,
Line 93: Line 100:
["Ї"] = 'І', ["ї"] = 'і',
["Ї"] = 'І', ["ї"] = 'і',
["Ꙇ"] = 'І', ["ꙇ"] = 'і',
["Ꙇ"] = 'І', ["ꙇ"] = 'і',
-- ["Ћ"] = 'Ꙉ', ["ћ"] = 'ꙉ',
["Ꙥ"] = 'Л' .. palatalization, ["ꙥ"] = 'л' .. palatalization,
["Ꙥ"] = 'Л' .. palatalization, ["ꙥ"] = 'л' .. palatalization,
["Ꙧ"] = 'М' .. palatalization, ["ꙧ"] = 'м' .. palatalization,
["Ꙧ"] = 'М' .. palatalization, ["ꙧ"] = 'м' .. palatalization,
Line 100: Line 108:
["Ꙫ"] = 'О', ["ꙫ"] = 'о',
["Ꙫ"] = 'О', ["ꙫ"] = 'о',
["Ꚛ"] = 'О', ["ꚛ"] = 'о',
["Ꚛ"] = 'О', ["ꚛ"] = 'о',
["Ꚙ"] = 'О', ["ꚙ"] = 'о',
["Ꙭ"] = '', ["ꙭ"] = '',
["Ꙭ"] = 'О', ["ꙭ"] = 'о',
["ꙮ"] = 'о',
["ꙮ"] = 'о',
["ᲂ"] = 'о',
["ᲂ"] = 'о',
Line 114: Line 121:
["Я"] = 'Ꙗ', ["я"] = 'ꙗ',
["Я"] = 'Ꙗ', ["я"] = 'ꙗ',
["Ꙕ"] = 'Ю', ["ꙕ"] = 'ю',
["Ꙕ"] = 'Ю', ["ꙕ"] = 'ю',
["Ꙛ"] = 'Ѫ', ["ꙛ"] = 'ѫ',
["Ꙙ"] = 'Ѧ', ["ꙙ"] = 'ѧ',
["Ꙙ"] = 'Ѧ', ["ꙙ"] = 'ѧ',
["Ꙝ"] = 'Ѩ', ["ꙝ"] = 'ѩ',
["Ꙝ"] = 'Ѩ', ["ꙝ"] = 'ѩ',
}
-- A second round of substitutions, e.g. if the final output isn't a precomposed character, but needs to behave like one during processing.
local final_substitutions = {
["Œ"] = "Œ̨", ["œ"] = "œ̨",
[titlo] = ":", [vzmet] = ":"
}
}


Line 149: Line 161:
-- Old Novgorodian
-- Old Novgorodian
lang_letters["zle-ono"] = setmetatable({
lang_letters["zle-ono"] = setmetatable({
["Ц"] = 'Ć', ["ц"] = 'ć',
["Ц"] = '', ["ц"] = '',
["Ч"] = 'Ć', ["ч"] = 'ć',
["Ч"] = '', ["ч"] = '',
["Щ"] = 'Ść', ["щ"] = 'ść',
["Щ"] = 'Sʹcʹ', ["щ"] = 'sʹcʹ',
}, {__index = common_letters})
}, {__index = common_letters})
Line 159: Line 171:
-- Old Pskovian
-- Old Pskovian
lang_letters["zle-ops"] = setmetatable({ -- In addition to zle-ono above.
lang_letters["zle-ops"] = setmetatable({ -- In addition to zle-ono above.
["Ж"] = 'Ź', ["ж"] = 'ź',
["Ж"] = '', ["ж"] = '',
["Ѕ"] = '', ["ѕ"] = '',
["Ѕ"] = 'Dzʹ', ["ѕ"] = 'dzʹ',
["З"] = 'Ź', ["з"] = 'ź',
["З"] = '', ["з"] = '',
["С"] = 'Ś', ["с"] = 'ś',
["С"] = '', ["с"] = 'sʹ',
["Ш"] = 'Ś', ["ш"] = 'ś',
["Џ"] = 'Dzʹ', ["џ"] = 'dzʹ',
["Щ"] = 'Šk', ["щ"] = 'šk',
["Ш"] = '', ["ш"] = '',
["Щ"] = 'Sʹk', ["щ"] = 'sʹk',
["Ѣ"] = 'Æ', ["ѣ"] = 'æ',
["Ꙓ"] = 'Jæ', ["ꙓ"] = 'jæ',
}, {__index = lang_letters["zle-ono"]})
}, {__index = lang_letters["zle-ono"]})
Line 185: Line 200:
end
end


local function handle_breathing(vowel, br)
local function handle_rough_breathing(base1, base2, diacritics)
-- Don't mark smooth breathing.
-- Mark rough breathing with "h".
if br == psili then
local base2_lower = ulower(base2)
return vowel
if not vowels:match(base2_lower) then
return base1 .. base2 .. diacritics .. "h"
end
end
-- Mark rough breathing with "h".
local base1_lower = ulower(base1)
local vowel_lower = ulower(vowel)
if not vowels:match(base1_lower) then
return (vowel_lower == vowel and "h" or "H") .. vowel_lower
return base1 .. (base2_lower == base2 and "h" or "H") .. base2_lower .. diacritics
end
return (base1_lower == base1 and "h" or "H") .. base1 .. base2 .. diacritics
end
end


Line 268: Line 286:
-- In some languages, treat "уо" ("uo") as "у" ("u").
-- In some languages, treat "уо" ("uo") as "у" ("u").
if uo_is_u[lang] then
if uo_is_u[lang] then
-- Not "ꚙ", which is an orthographically doubled "о".
text = ugsub(text, "([уУѵѴѷѶ]" .. accent .. ")[оО]", "%1")
text = ugsub(text, "([уУѵѴѷѶ]" .. accent .. ")[оО]", "%1")
end
end
-- Treat "оу" ("ou") as "у" ("u").
-- Treat "оу" ("ou") as "у" ("u") (but not "ꚙ").
text = ugsub(text, "([оО])(" .. accent .. ")[уУѵѴѷѶ]", handle_ou)
text = ugsub(text, "([оО])(" .. accent .. ")[уУѵѴѷѶ]", handle_ou)
Line 282: Line 301:
text = text:gsub(".[\128-\191]*", letters)
text = text:gsub(".[\128-\191]*", letters)
-- Handle any breathing marks.
-- Handle any rough breathing marks.
text = ugsub(toNFD(text), "([" .. vowels .. "][" .. vowels .. "%W]-)([" .. breathing .. "])", handle_breathing)
-- FIXME: this can't handle various edge cases.
text = ugsub(toNFD(text), "(%w)(%w?)([^%w%s]*)[" .. dasia .. "]", handle_rough_breathing)
if umatch(text, "[" .. breathing .. "]") then
if umatch(text, "[" .. breathing .. "]") then
Line 289: Line 309:
end
end
-- Transliterate the titlo and vzmet as colon.
-- Final substitutions.
text = ugsub(text, "[" .. titlo .. vzmet .. "]", ":")
text = text:gsub(".[\128-\191]*", final_substitutions)


return toNFC(text)
return toNFC(text)