45,647
edits
(Created page with "local u = mw.ustring.char local export = {} --[=[ Here is a list of the language fields by order of frequency according to User:Erutuon/language_stuff. If the order changes, change the order here for potentially greater efficiency. local fields = { "canonical_name", "wikidata_item", "family", "scripts", "other_names", "ancestors", "type", "translit", "entry_name", "sort_key", "override_translit", "wikimedia_codes", "standard_chars", "wikipedia_article...") |
No edit summary |
||
Line 1: | Line 1: | ||
local u = | local table = table | ||
local insert = table.insert | |||
local u = require("Module:string/char") | |||
local export = {} | local export = {} | ||
-- UTF-8 encoded strings for some commonly-used diacritics. | -- UTF-8 encoded strings for some commonly-used diacritics. | ||
local c = { | local c = { | ||
Line 63: | Line 36: | ||
dbrevebelow = u(0x035C), | dbrevebelow = u(0x035C), | ||
dinvbreve = u(0x0361), | dinvbreve = u(0x0361), | ||
small_e = u(0x0364), | |||
kamora = u(0x0484), | kamora = u(0x0484), | ||
dasiapneumata = u(0x0485), | dasiapneumata = u(0x0485), | ||
Line 91: | Line 65: | ||
local braille = {} | local braille = {} | ||
for i = 0x2800, 0x28FF do | for i = 0x2800, 0x28FF do | ||
insert(braille, u(i)) | |||
end | end | ||
c.braille = table.concat(braille) | c.braille = table.concat(braille) | ||
Line 135: | Line 109: | ||
s["Grek-sortkey"] = { | s["Grek-sortkey"] = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni, | remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni, | ||
from = {"ς"}, | from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"}, | ||
to = {"σ"} | to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]} | ||
} | |||
s["Jpan-sortkey"] = { | |||
Jpan = "Jpan-sortkey", | |||
Hani = "Hani-sortkey", | |||
Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira | |||
Hira = "Hira-sortkey", | |||
Kana = "Kana-sortkey" | |||
} | |||
s["Jpan-translit"] = { | |||
Hrkt = "Hrkt-translit", | |||
Hira = "Hrkt-translit", | |||
Kana = "Hrkt-translit" | |||
} | } | ||
local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters() | local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters() | ||
-- `漢字(한자)`→`漢字` | |||
-- `가-나-다`→`가나다` | |||
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]]) | |||
s["Kore-entryname"] = { | s["Kore-entryname"] = { | ||
remove_diacritics = u(0x302E) .. u(0x302F), | remove_diacritics = u(0x302E) .. u(0x302F), | ||
from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "]+%)"}, | from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "/]+%)"}, | ||
to = {"%1", "%1%2"} | to = {"%1", "%1%2"} | ||
} | |||
s["Lisu-sortkey"] = { | |||
from = {"𑾰"}, | |||
to = {"ꓬ" .. p[1]} | |||
} | } | ||
s["Mong-displaytext"] = { | s["Mong-displaytext"] = { | ||
from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^ | from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^᠋ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"}, | ||
to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"} | to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"} | ||
} | } | ||
s["Mong-entryname"] = s["Mong-displaytext"] | s["Mong-entryname"] = s["Mong-displaytext"] | ||
s["Polyt-entryname"] = { | |||
remove_diacritics = c.macron .. c.breve .. c.dbrevebelow, | |||
from = {"[" .. c.RSQuo .. c.psili .. c.coronis .. "]"}, | |||
to = {"'"} | |||
} | |||
s["roa-oil-sortkey"] = { | s["roa-oil-sortkey"] = { | ||
Line 185: | Line 187: | ||
["jje"] = "tr", | ["jje"] = "tr", | ||
["ko"] = "tr", | ["ko"] = "tr", | ||
["ko-ear"] = "tr", | |||
["ru"] = "tr", | ["ru"] = "tr", | ||
["th-new"] = "tr", | |||
["sa"] = "tr", | |||
["zkt"] = "tr", | |||
} | |||
-- Code aliases. The left side is the alias and the right side is the canonical code. NOTE: These are gradually | |||
-- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable | |||
-- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed. | |||
export.aliases = { | |||
["CL."] = "la-cla", | |||
["EL."] = "la-ecc", | |||
["LL."] = "la-lat", | |||
["ML."] = "la-med", | |||
["NL."] = "la-new", | |||
["RL."] = "la-ren", | |||
["VL."] = "la-vul", | |||
["prv"] = "oc-pro", | |||
} | |||
-- Codes to which are tracked. Note that all aliases listed above are also tracked, so should not be duplicated here. | |||
export.track = { | |||
-- Codes duplicated been full and etymology-only languages | |||
["lzh-lit"] = true, | |||
-- Codes actively being converted to families | |||
["nan"] = true, | |||
} | } | ||
return export | return export |