47,697
edits
![]() | We're back! Sorry, bad combo of sickness, funeral and a month-long trip abroad. The site is back now. ![]() |
No edit summary |
No edit summary |
||
Line 9: | Line 9: | ||
-- UTF-8 encoded strings for some commonly-used diacritics. | -- UTF-8 encoded strings for some commonly-used diacritics. | ||
local c = { | local c = { | ||
prime = u(0x02B9), | |||
grave = u(0x0300), | grave = u(0x0300), | ||
acute = u(0x0301), | acute = u(0x0301), | ||
Line 46: | Line 47: | ||
small_o = u(0x0366), | small_o = u(0x0366), | ||
small_u = u(0x0367), | small_u = u(0x0367), | ||
keraia = u(0x0374), | |||
lowerkeraia = u(0x0375), | |||
tonos = u(0x0384), | |||
kamora = u(0x0484), | kamora = u(0x0484), | ||
dasiapneumata = u(0x0485), | dasiapneumata = u(0x0485), | ||
Line 60: | Line 64: | ||
hamzaabove = u(0x0654), | hamzaabove = u(0x0654), | ||
nunghunna = u(0x0658), | nunghunna = u(0x0658), | ||
zwarakay = u(0x0659), | |||
smallv = u(0x065A), | smallv = u(0x065A), | ||
superalef = u(0x0670), | superalef = u(0x0670), | ||
udatta = u(0x0951), | |||
anudatta = u(0x0952), | |||
coronis = u(0x1FBD), | |||
psili = u(0x1FBF), | |||
dasia = u(0x1FEF), | |||
ZWNJ = u(0x200C), -- zero width non-joiner | ZWNJ = u(0x200C), -- zero width non-joiner | ||
ZWJ = u(0x200D), -- zero width joiner | ZWJ = u(0x200D), -- zero width joiner | ||
Line 99: | Line 107: | ||
s["cau-Cyrl-displaytext"] = { | s["cau-Cyrl-displaytext"] = { | ||
from = {"[ | from = {"[IlΙІӀ]", "ᴴ"}, | ||
to = {"ӏ", "ᵸ"} | to = {"ӏ", "ᵸ"} | ||
} | } | ||
Line 122: | Line 130: | ||
"г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9] | "г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9] | ||
}, | }, | ||
} | |||
s["Grek-displaytext"] = { | |||
from = {"Þ", "þ", "['" .. c.RSQuo .. c.prime .. c.keraia .. c.coronis .. c.psili .. "]"}, -- Not tonos, used as the numeral sign in entries. | |||
to = {"Ϸ", "ϸ", c.RSQuo} | |||
} | |||
s["Grek-entryname"] = { | |||
remove_diacritics = c.caron .. c.diaerbelow .. c.brevebelow, | |||
from = s["Grek-displaytext"].from, | |||
to = {"Ϸ", "ϸ", "'"} | |||
} | } | ||
s["Grek-sortkey"] = { | s["Grek-sortkey"] = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni, | remove_diacritics = "';·`¨´῀" .. c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni .. c.RSQuo .. c.prime .. c.keraia .. c.lowerkeraia .. c.tonos .. c.coronis .. c.psili .. c.dasia, | ||
from = {"ϝ", "ͷ", "ϛ", "ͱ", " | from = {"ϝ", "ͷ", "ϛ", "ͱ", "ͺ", "ϳ", "ϻ", "[ϟϙ]", "[ςϲ]", "ͳ"}, | ||
to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], " | to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "ι", "ι" .. p[1], "π" .. p[1], "π" .. p[2], "σ", "ϡ"} | ||
} | |||
s["itc-Latn-displaytext"] = { | |||
from = {c.caron}, | |||
to = {c.breve}, | |||
} | |||
s["itc-Latn-entryname"] = {remove_diacritics = c.macron .. c.breve .. c.diaer .. c.caron .. c.dinvbreve} | |||
s["itc-Latn-sortkey"] = { | |||
remove_diacritics = c.circ .. c.tilde .. c.macron .. c.breve .. c.diaer .. c.caron .. c.zigzag .. c.dmacron .. c.dtilde .. c.dinvbreve .. c.small_a .. c.small_e .. c.small_i .. c.small_o .. c.small_u, -- Chiefly medieval abbreviations. | |||
from = {"ᵃ", "æ", "[đꝱꟈ]", "ᵉ", "ⁱ", "ꝁ", "[ƚꝉꝲ]", "ꝳ", "ꝴ", "[ꝋᵒ]", "œ", "[ꝑꝓꝕ]", "[ꝗꝙ]", "[ꝛꝵꝶꝝ]", "[ꟊˢ]", "[ꝷᵗ]", "ᵘ", "ꝟ", "⁊"}, | |||
to = {"a", "ae", "d", "e", "i", "k", "l", "m", "n", "o", "oe", "p", "q", "r", "s", "t", "u", "v", "&"} | |||
} | } | ||
Line 186: | Line 218: | ||
s["Mong-entryname"] = s["Mong-displaytext"] | s["Mong-entryname"] = s["Mong-displaytext"] | ||
s["Polyt-displaytext"] = s["Grek-displaytext"] | |||
s["Polyt-entryname"] = { | s["Polyt-entryname"] = { | ||
remove_diacritics = c.macron .. c.breve .. c.dbrevebelow, | remove_diacritics = c.macron .. c.breve .. c.dbrevebelow, | ||
from = | from = s["Grek-entryname"].from, | ||
to = | to = s["Grek-entryname"].to | ||
} | } | ||
s["Polyt-sortkey"] = s["Grek-sortkey"] | |||
s["roa-oil-sortkey"] = { | s["roa-oil-sortkey"] = { | ||
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'", | remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'", | ||
from = {"æ", "œ"}, | from = {"æ", "œ", "·"}, | ||
to = {"ae", "oe"} | to = {"ae", "oe", " "} | ||
} | } | ||
Line 222: | Line 258: | ||
-- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.) | -- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.) | ||
export.contiguous_substitution = { | export.contiguous_substitution = { | ||
["gmy"] = "tr", | |||
["ja"] = "tr", | ["ja"] = "tr", | ||
["jje"] = "tr", | ["jje"] = "tr", | ||
Line 235: | Line 272: | ||
-- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable | -- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable | ||
-- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed. | -- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed. | ||
-- Aliases in this table are tracked at [[Wiktionary:Tracking/languages/LANG]]; see e.g. | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/languages/RL.]] for the `RL.` alias. | |||
export.aliases = { | export.aliases = { | ||
["CL."] = "la-cla", | ["CL."] = "la-cla", | ||
Line 244: | Line 283: | ||
["VL."] = "la-vul", | ["VL."] = "la-vul", | ||
["prv"] = "oc-pro", | ["prv"] = "oc-pro", | ||
["nan-hnm"] = "hnm", | |||
["nan-luh"] = "luh", | |||
} | } | ||
-- Codes | -- Codes which are tracked. Note that all aliases listed above are also tracked, so should not be duplicated here. | ||
-- Tracking uses the same mechanism described above in the comment above `export.aliases`. | |||
export.track = { | export.track = { | ||
-- Codes duplicated | -- Codes duplicated between full and etymology-only languages. | ||
["lzh-lit"] = true, | ["lzh-lit"] = true, | ||
-- | -- Languages actively being converted to families. | ||
["nan"] = true, | ["bh"] = true, -- inc-bih | ||
["nan"] = true, -- zhx-nan | |||
} | } | ||
return export | return export |