Module:languages/data: Difference between revisions

Module:languages/data (view source)

Revision as of 21:28, 1 July 2024

1,116 bytes added , 1 July

no edit summary

Sware

Bureaucrats, Administrators

45,647

edits

@@ Line 1: / Line 1: @@
-local u = mw.ustring.char
+local table = table
+local insert = table.insert
+local u = require("Module:string/char")
 local export = {}
---[=[
-Here is a list of the language fields by order of frequency according to [[User:Erutuon/language_stuff]].
-If the order changes, change the order here for potentially greater efficiency.
-local fields = {
-	"canonical_name",
-	"wikidata_item",
-	"family",
-	"scripts",
-	"other_names",
-	"ancestors",
-	"type",
-	"translit",
-	"entry_name",
-	"sort_key",
-	"override_translit",
-	"wikimedia_codes",
-	"standard_chars",
-	"wikipedia_article",
-	"link_tr",
-}
---[=[
-Insert the fields into the table with their values as their frequency ranking.
-{export.most_common_field = 1, export.second_most_common_field = 2, ... }
-for i, field in ipairs(fields) do
-	export[field] = i
-end
-]=]
 -- UTF-8 encoded strings for some commonly-used diacritics.
 local c = {
@@ Line 63: / Line 36: @@
 	dbrevebelow		= u(0x035C),
 	dinvbreve		= u(0x0361),
+	small_e			= u(0x0364),
 	kamora          = u(0x0484),
 	dasiapneumata   = u(0x0485),
@@ Line 91: / Line 65: @@
 local braille = {}
 for i = 0x2800, 0x28FF do
-	table.insert(braille, u(i))
+	insert(braille, u(i))
 end
 c.braille = table.concat(braille)
@@ Line 135: / Line 109: @@
 s["Grek-sortkey"] = {
 	remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
-	from = {"ς"},
+	from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"},
-	to = {"σ"}
+	to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]}
+}
+s["Jpan-sortkey"] = {
+	Jpan = "Jpan-sortkey",
+	Hani = "Hani-sortkey",
+	Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira
+	Hira = "Hira-sortkey",
+	Kana = "Kana-sortkey"
+}
+s["Jpan-translit"] = {
+	Hrkt = "Hrkt-translit",
+	Hira = "Hrkt-translit",
+	Kana = "Hrkt-translit"
 }
 local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
+-- `漢字(한자)`→`漢字`
+-- `가-나-다`→`가나다`
+-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]])
 s["Kore-entryname"] = {
 	remove_diacritics = u(0x302E) .. u(0x302F),
-	from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "]+%)"},
+	from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "/]+%)"},
 	to = {"%1", "%1%2"}
+}
+s["Lisu-sortkey"] = {
+	from = {"𑾰"},
+	to = {"ꓬ" .. p[1]}
 }
 s["Mong-displaytext"] = {
-	from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
+	from = {"([ᠨ-ᡂᡸ])ᠶ([ᠨ-ᡂᡸ])", "([ᠠ-ᡂᡸ])ᠸ([^᠋ᠠ-ᠧ])", "([ᠠ-ᡂᡸ])ᠸ$"},
 	to = {"%1ᠢ%2", "%1ᠧ%2", "%1ᠧ"}
 }
 s["Mong-entryname"] = s["Mong-displaytext"]
+s["Polyt-entryname"] = {
+	remove_diacritics = c.macron .. c.breve .. c.dbrevebelow,
+	from = {"[" .. c.RSQuo .. c.psili .. c.coronis .. "]"},
+	to = {"'"}
+}
 s["roa-oil-sortkey"] = {
@@ Line 185: / Line 187: @@
 	["jje"] = "tr",
 	["ko"] = "tr",
+	["ko-ear"] = "tr",
 	["ru"] = "tr",
+	["th-new"] = "tr",
+	["sa"] = "tr",
+	["zkt"] = "tr",
+}
+-- Code aliases. The left side is the alias and the right side is the canonical code. NOTE: These are gradually
+-- being deprecated, so should not be added to on a permanent basis. Temporary additions are permitted under reasonable
+-- circumstances (e.g. to facilitate changing a language's code). When an alias is no longer used, it should be removed.
+export.aliases = {
+	["CL."] = "la-cla",
+	["EL."] = "la-ecc",
+	["LL."] = "la-lat",
+	["ML."] = "la-med",
+	["NL."] = "la-new",
+	["RL."] = "la-ren",
+	["VL."] = "la-vul",
+	["prv"] = "oc-pro",
+}
+-- Codes to which are tracked. Note that all aliases listed above are also tracked, so should not be duplicated here.
+export.track = {
+	-- Codes duplicated been full and etymology-only languages
+	["lzh-lit"] = true,
+	-- Codes actively being converted to families
+	["nan"] = true,
 }
 return export