Module:languages/data: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
 
Line 19: Line 19:
dotabove = u(0x0307),
dotabove = u(0x0307),
diaer = u(0x0308),
diaer = u(0x0308),
ringabove = u(0x030A),
hook            = u(0x0309),
hook            = u(0x0309),
ringabove = u(0x030A),
dacute = u(0x030B),
dacute = u(0x030B),
caron = u(0x030C),
caron = u(0x030C),
Line 33: Line 33:
cedilla = u(0x0327),
cedilla = u(0x0327),
ogonek = u(0x0328),
ogonek = u(0x0328),
tildebelow      = u(0x0330),
brevebelow = u(0x032E),
brevebelow = u(0x032E),
tildebelow      = u(0x0330),
macronbelow = u(0x0331),
macronbelow = u(0x0331),
perispomeni = u(0x0342),
perispomeni = u(0x0342),
Line 52: Line 52:
lowerkeraia = u(0x0375),
lowerkeraia = u(0x0375),
tonos = u(0x0384),
tonos = u(0x0384),
kamora          = u(0x0484),
palatalization = u(0x0484),
dasiapneumata   = u(0x0485),
dasiapneumata = u(0x0485),
psilipneumata   = u(0x0486),
psilipneumata = u(0x0486),
kashida = u(0x0640),
kashida = u(0x0640),
fathatan = u(0x064B),
fathatan = u(0x064B),
Line 66: Line 66:
hamzaabove = u(0x0654),
hamzaabove = u(0x0654),
nunghunna = u(0x0658),
nunghunna = u(0x0658),
zwarakay       = u(0x0659),
zwarakay = u(0x0659),
smallv = u(0x065A),
smallv = u(0x065A),
superalef = u(0x0670),
superalef = u(0x0670),
udatta = u(0x0951),
udatta = u(0x0951),
anudatta = u(0x0952),
anudatta = u(0x0952),
dottedgrave = u(0x1DC0),
dottedacute = u(0x1DC1),
coronis = u(0x1FBD),
coronis = u(0x1FBD),
psili = u(0x1FBF),
psili = u(0x1FBF),
Line 77: Line 79:
ZWJ = u(0x200D), -- zero width joiner
ZWJ = u(0x200D), -- zero width joiner
RSQuo = u(0x2019), -- right single quote
RSQuo = u(0x2019), -- right single quote
kavyka = u(0xA67C),
VS01 = u(0xFE00), -- variation selector 1
VS01 = u(0xFE00), -- variation selector 1
-- Punctuation for the standardChars field.
-- Punctuation for the standardChars field.
Line 108: Line 111:
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.
-- These values are placed here to make it possible to synchronise a group of languages without the need for a dedicated function module.


s["cau-Cyrl-displaytext"] = {
-- cau
from = {"[IlΙІӀ]", ""},
do
to = {"ӏ", "ᵸ"}
local cau_remove_diacritics = c.grave .. c.acute .. c.macron
}
local cau_from = {"[IlΙІӀᴴ]"}
local cau_to = {{
["l"] = "ӏ",
["Ι"] = "ӏ",
["І"] = "ӏ",
["Ӏ"] = "ӏ",
["ᴴ"] = "ᵸ",
}}
 
s["cau-Cyrl-displaytext"] = {
from = cau_from,
to = cau_to,
}
 
s["cau-Cyrl-entryname"] = {
remove_diacritics = cau_remove_diacritics,
from = cau_from,
to = cau_to,
}


s["cau-Cyrl-entryname"] = {
s["cau-Latn-entryname"] = {remove_diacritics = cau_remove_diacritics}
remove_diacritics = c.grave .. c.acute .. c.macron,
end
from = s["cau-Cyrl-displaytext"].from,
to = s["cau-Cyrl-displaytext"].to
}


s["cau-Latn-entryname"] = {remove_diacritics = c.grave .. c.acute .. c.macron}
-- Cyrs
do
local Cyrs_remove_diacritics = c.grave .. c.acute .. c.dotabove .. c.diaer .. c.invbreve .. c.palatalization .. c.dasiapneumata .. c.psilipneumata .. c.dottedgrave .. c.dottedacute .. c.kavyka


s["Cyrs-entryname"] = {remove_diacritics = c.grave .. c.acute ..  c.diaer .. c.kamora .. c.dasiapneumata .. c.psilipneumata}
s["Cyrs-entryname"] = {remove_diacritics = Cyrs_remove_diacritics}


s["Cyrs-sortkey"] = {
s["Cyrs-sortkey"] = {
from = {
remove_diacritics = Cyrs_remove_diacritics,
"ї", "оу", -- 2 chars
from = {
"ґ", "ꙣ", "є", "[ѕꙃꙅ]", "", "[іꙇ]", "[ђꙉ]", "[ѻꙩꙫꙭꙮꚙꚛ]", "", "[ѡѿꙍѽ]", "", "ѣ", "", "ѥ", "", "[ѧꙙ]", "[ѩꙝ]", "", "ѫ", "ѭ", "ѯ", "ѱ", "ѳ", "ѵ", "ҁ" -- 1 char
"ї", "оу", -- 2 chars
},
"[ґꙣєѕꙃꙅꙁіꙇђꙉѻꙩꙫꙭꙮꚙꚛꙋѡѿꙍѽꙑѣꙗѥꙕѧꙙѩꙝꙛѫѭѯѱѳѵҁ]"
to = {
},
"и" .. p[1], "у",
to = {
"г" .. p[1], "д" .. p[1], "е", "ж" .. p[1], "з", "и" .. p[1], "и" .. p[2], "о", "у", "х" .. p[1], "ы", "ь" .. p[1], "ь" .. p[2], "ь" .. p[3], "ю", "я", "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4], "я" .. p[5], "я" .. p[6], "я" .. p[7], "я" .. p[8], "я" .. p[9]
"и" .. p[1], "у", {
},
["ґ"] = "г" .. p[1], ["ꙣ"] = "д" .. p[1], ["] = "е", ["ѕ"] = "ж" .. p[1], ["ꙃ"] = "ж" .. p[1],
}
["ꙅ"] = "ж" .. p[1], [""] = "з", ["і"] = "и" .. p[1], ["ꙇ"] = "и" .. p[1], ["ђ"] = "и" .. p[2],
[""] = "и" .. p[2], ["ѻ"] = "о", ["ꙩ"] = "о", ["ꙫ"] = "о", [""] = "о",
[""] = "о", [""] = "о", [""] = "о", ["ꙋ"] = "у", ["ѡ"] = "х" .. p[1],
["ѿ"] = "х" .. p[1], [""] = "х" .. p[1], ["ѽ"] = "х" .. p[1], [""] = "ы", ["ѣ"] = "ь" .. p[1],
[""] = "ь" .. p[2], ["ѥ"] = "ь" .. p[3], ["ꙕ"] = "ю", ["ѧ"] = "я", ["ꙙ"] = "я",
["ѩ"] = "я" .. p[1], ["ꙝ"] = "я" .. p[1], ["ꙛ"] = "я" .. p[2], ["ѫ"] = "я" .. p[3], ["ѭ"] = "я" .. p[4],
["ѯ"] = "я" .. p[5], ["ѱ"] = "я" .. p[6], ["ѳ"] = "я" .. p[7], ["ѵ"] = "я" .. p[8], ["ҁ"] = "я" .. p[9],
}
},
}
end


s["Grek-displaytext"] = {
s["Grek-displaytext"] = {
Line 230: Line 263:


s["Polyt-sortkey"] = s["Grek-sortkey"]
s["Polyt-sortkey"] = s["Grek-sortkey"]
-- Samr
do
s["Samr-entryname"] = {
remove_diacritics = c.CGJ .. u(0x0816) .. "-" .. u(0x082D),
}
s["Samr-sortkey"] = s["Samr-entryname"]
end


s["roa-oil-sortkey"] = {
s["roa-oil-sortkey"] = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove .. c.cedilla .. "'",
remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'",
from = {"æ", "œ", "·"},
from = {"æ", "œ", "·"},
to = {"ae", "oe", " "}
to = {"ae", "oe", " "}
Line 245: Line 287:


s["wen-sortkey"] = {
s["wen-sortkey"] = {
from = {
from = {"ch", "[lłßꞩẜ]", "dz[" .. c.caron .. c.acute .. "]", "[bcefmnoprswz][" .. c.caron .. c.acute .. c.dotabove .. "]"},
"l", -- Ensure "l" comes after "ł".
"b́", "č", "ć", "dź", "ě", "f́", "ch", "ł", "ḿ", "ń", "ó", "ṕ", "ř", "ŕ", "š", "ś", "ẃ", "ž", "ż", "ź"
},
to = {
to = {
"l" .. p[1],
"h" .. p[1],
"b" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e" .. p[1], "f" .. p[1], "h" .. p[1], "l", "m" .. p[1], "n" .. p[1], "o" .. p[1], "p" .. p[1], "r" .. p[1], "r" .. p[2], "s" .. p[1], "s" .. p[2], "w" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]
{
["l"] = "l" .. p[1], ["ł"] = "l", ["ß"] = "s", ["ꞩ"] = "š", ["ẜ"] = "š",
},
{
["dz" .. c.caron] = "d" .. p[1], ["dz" .. c.acute] = "d" .. p[2]
},
{
["b" .. c.acute] = "b" .. p[1],
["c" .. c.caron] = "c" .. p[1], ["c" .. c.acute] = "c" .. p[2],
["e" .. c.caron] = "e" .. p[1], ["e" .. c.dotabove] = "e" .. p[1],
["f" .. c.acute] = "f" .. p[1],
["m" .. c.acute] = "m" .. p[1],
["n" .. c.acute] = "n" .. p[1],
["o" .. c.acute] = "o" .. p[1],
["p" .. c.acute] = "p" .. p[1],
["r" .. c.caron] = "r" .. p[1], ["r" .. c.acute] = "r" .. p[2],
["s" .. c.caron] = "s" .. p[1], ["s" .. c.acute] = "s" .. p[2],
["w" .. c.acute] = "w" .. p[1],
["z" .. c.caron] = "z" .. p[1], ["z" .. c.acute] = "z" .. p[2],
}
}
}
}
}
Line 257: Line 315:
export.shared = s
export.shared = s


-- Short-term solution to override the standard substitution process, by forcing the module to substitute the entire text in one pass. This results in any PUA characters that are used as stand-ins for formatting being handled by the language-specific substitution process, which is usually undesirable.
-- Short-term solution to override the standard substitution process, by forcing the module to substitute the entire text in one pass, if "cont" is given. This results in any PUA characters that are used as stand-ins for formatting being handled by the language-specific substitution process, which is usually undesirable. If the value is "none" then the formatting tags do not get turned into PUA characters in the first place.
-- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.)
-- This override is provided for languages which use formatting between strings of text which might need to interact with each other (e.g. Korean 값이 transliterates as "gaps-i", but [[값]] has the formatting '''값'''[[-이]]. The normal process would split the text at the second '''.)
export.contiguous_substitution = {
export.substitution = {
["gmy"] = "tr",
["gmy"] = "none",
["ja"] = "tr",
["ja"] = "cont",
["jje"] = "tr",
["jje"] = "cont",
["ko"] = "tr",
["ko"] = "cont",
["ko-ear"] = "tr",
["ko-ear"] = "cont",
["ru"] = "tr",
["ru"] = "cont",
["th-new"] = "tr",
["th-new"] = "cont",
["sa"] = "tr",
["sa"] = "cont",
["zkt"] = "tr",
["zkt"] = "cont",
}
}


Line 277: Line 335:
-- [[Special:WhatLinksHere/Wiktionary:Tracking/languages/RL.]] for the `RL.` alias.
-- [[Special:WhatLinksHere/Wiktionary:Tracking/languages/RL.]] for the `RL.` alias.
export.aliases = {
export.aliases = {
["CL."] = "la-cla",
["EL."] = "la-ecc",
["EL."] = "la-ecc",
["LL."] = "la-lat",
["LL."] = "la-lat",
["ML."] = "la-med",
["ML."] = "la-med",
["NL."] = "la-new",
["NL."] = "la-new",
["RL."] = "la-ren",
["VL."] = "la-vul",
["VL."] = "la-vul",
["prv"] = "oc-pro",
["nds-DE"] = "nds-de",
["nan-hnm"] = "hnm",
["nds-NL"] = "nds-nl",
["nan-luh"] = "luh",
["roa-oan"] = "roa-ona",
}
}


Navigation menu