Module:languages/data/2: Difference between revisions

m 1 revision imported
No edit summary
Tag: Manual revert
 
Line 20: Line 20:
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"


s["ka-stripdiacritics"] = {remove_diacritics = c.circ}
s["ka-entryname"] = {remove_diacritics = c.circ}


s["no-sortkey"] = {
s["no-sortkey"] = {
Line 31: Line 31:
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc


s["sa-Deva-stripdiacritics"] = { -- Don't use remove_diacritics for accent marks, as १ and ३ should also be removed if (and only if) they carry any.
s["tg-entryname"] = {remove_diacritics = c.grave .. c.acute}
from = {"[१३]?[" .. c.anudatta .. c.udatta .. c.dsvarita .. c.tsvarita .. "]+"},
to = {""},
}


s["tg-stripdiacritics"] = {remove_diacritics = c.grave .. c.acute}
s["tk-entryname"] = {remove_diacritics = c.macron}
 
s["tk-stripdiacritics"] = {remove_diacritics = c.macron}


local m = {}
local m = {}
Line 47: Line 42:
"cus-eas",
"cus-eas",
"Latn, Ethi",
"Latn, Ethi",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.acute},
Latn = {remove_diacritics = c.acute},
},
},
Line 59: Line 54:
translit = {
translit = {
Cyrl = "ab-translit",
Cyrl = "ab-translit",
-- Geor translit in [[Module:scripts/data]]
Geor = "Geor-translit",
},
},
override_translit = true,
override_translit = true,
Line 65: Line 60:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
Line 71: Line 66:
to = {"а"},
to = {"а"},
},
},
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 95: Line 90:
29572,
29572,
"ira-cen",
"ira-cen",
"Avst, Gujr, Deva",
"Avst, Gujr",
translit = {
translit = {
Avst = "Avst-translit"
Avst = "Avst-translit"
Line 142: Line 137:
13955,
13955,
"sem-arb",
"sem-arb",
"Arab, Hebr, Syrc, Brai, Nbat",
"Arab, Hebr, Syrc, Brai",
translit = {
translit = {
Arab = "ar-translit"
Arab = "ar-translit"
},
},
strip_diacritics = {
display_text = {
Arab = "ar-stripdiacritics",
Hebr = "Hebr-common",
},
entry_name = {
Arab = "ar-entryname",
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 175: Line 176:
Cyrl = s["cau-Cyrl-displaytext"],
Cyrl = s["cau-Cyrl-displaytext"],
},
},
strip_diacritics = {
entry_name = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 201: Line 202:
ancestors = "trk-oat",
ancestors = "trk-oat",
dotted_dotless_i = true,
dotted_dotless_i = true,
strip_diacritics = {
entry_name = {
Latn = {
Latn = {
from = {"ʼ"},
from = {"ʼ"},
Line 207: Line 208:
},
},
["fa-Arab"] = {
["fa-Arab"] = {
module = "ar-stripdiacritics",
module = "ar-entryname",
["from"] = {
["from"] = {
"ۆ",
"ۆ",
Line 266: Line 267:
"zle",
"zle",
"Cyrl, Latn",
"Cyrl, Latn",
ancestors = "zle-mbe",
ancestors = "zle-obe",
translit = {
translit = {
Cyrl = "be-translit",
Cyrl = "be-translit",
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 292: Line 293:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Line 306: Line 307:
ancestors = "cu-bgm",
ancestors = "cu-bgm",
translit = "bg-translit",
translit = "bg-translit",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
Line 314: Line 315:
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
},
},
standard_chars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
}
}


Line 362: Line 363:
"Tibt", -- sometimes Deva?
"Tibt", -- sometimes Deva?
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 385: Line 389:
ancestors = "roa-oca",
ancestors = "roa-oca",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
standard_chars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
}
}


Line 401: Line 405:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = s["cau-Cyrl-stripdiacritics"],
Cyrl = s["cau-Cyrl-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
},
},
sort_key = {
sort_key = {
Line 434: Line 438:
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
},
},
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
}
}


Line 457: Line 461:
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
}
}


Line 464: Line 468:
35499,
35499,
"zls",
"zls",
"Cyrs, Glag, Zname",
"Cyrs, Glag",
translit = {
translit = {
Cyrs = "Cyrs-translit",
Cyrs = "Cyrs-translit",
Glag = "Glag-translit"
Glag = "Glag-translit"
},
},
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]
entry_name = {
Cyrs = s["Cyrs-entryname"]
},
sort_key = {
Cyrs = s["Cyrs-sortkey"]
},
}
}


Line 497: Line 506:
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
},
},
standard_chars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
}
}


Line 512: Line 521:
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
}
}


Line 520: Line 529:
"gmw-hgm",
"gmw-hgm",
"Latn, Latf, Brai",
"Latn, Latf, Brai",
ancestors = "de-ear",
ancestors = "gmh",
sort_key = {
sort_key = {
Latn = s["de-Latn-sortkey"],
Latn = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
},
},
standard_chars = {
standardChars = {
Latn = s["de-Latn-standardchars"],
Latn = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Line 542: Line 551:
Diak = "Diak-translit",
Diak = "Diak-translit",
},
},
    ancestors = "dv-old",
override_translit = true,
override_translit = true,
}
}
Line 552: Line 560:
"Tibt",
"Tibt",
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = s["Tibt-displaytext"],
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 574: Line 585:
"Grek, Polyt, Brai",
"Grek, Polyt, Brai",
ancestors = "el-kth",
ancestors = "el-kth",
translit = "el-translit",
translit = {
Grek = "el-translit",
Polyt = "grc-translit",
},
override_translit = true,
override_translit = true,
-- Grek and Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
standard_chars = {
Grek = s["Grek-displaytext"],
Polyt = s["Polyt-displaytext"],
},
entry_name = {
Grek = s["Grek-entryname"],
Polyt = s["Polyt-entryname"],
},
sort_key = {
Grek = s["Grek-sortkey"],
Polyt = s["Polyt-sortkey"],
},
standardChars = {
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Brai = c.braille,
Brai = c.braille,
Line 595: Line 620:
-- Many of these are needed for sorting language names.
-- Many of these are needed for sorting language names.
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
-- These are found in pagenames.
-- These are found in entry names.
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
to = {{
to = {{
Line 607: Line 632:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 624: Line 649:
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
},
},
standard_chars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
}
}


Line 641: Line 666:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 663: Line 688:
}
}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
}
}


Line 675: Line 700:
to = {"c" .. p[1], "n" .. p[1]}
to = {"c" .. p[1], "n" .. p[1]}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
}
}


Line 684: Line 709:
"fa-Arab, Hebr",
"fa-Arab, Hebr",
ancestors = "fa-cls",
ancestors = "fa-cls",
strip_diacritics = {
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
["fa-Arab"] = {
["fa-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
to = {"ه", "ا"},
to = {"ه", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 711: Line 742:
to = {"’"}
to = {"’"}
},
},
strip_diacritics = { -- used to indicate gemination of the next consonant
entry_name = { -- used to indicate gemination of the next consonant
remove_diacritics = "ˣ",
remove_diacritics = "ˣ",
from = {"’"},
from = {"’"},
Line 717: Line 748:
},
},
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
remove_diacritics = "':" .. c.diacritics,
remove_diacritics = "':" .. c.diacritics,
remove_exceptions = {
remove_exceptions = {
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
Line 726: Line 757:
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
},
},
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
}
}


Line 745: Line 776:
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standard_chars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
}
}


Line 753: Line 784:
"roa-oil",
"roa-oil",
"Latn, Brai",
"Latn, Brai",
display_text = {
Latn = {
from = {"'"},
to = {"’"}
},
},
entry_name = {
Latn = {
from = {"’"},
to = {"'"},
},
},
ancestors = "frm",
ancestors = "frm",
sort_key = {
sort_key = {
Latn = s["roa-oil-sortkey"]
Latn = s["roa-oil-sortkey"]
},
},
standard_chars = {
standardChars = {
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 774: Line 817:
to = {"i"}
to = {"i"}
},
},
standard_chars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
}
}


Line 788: Line 831:
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
},
},
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
}
}


Line 798: Line 841:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.grave .. c.acute},
sort_key = {remove_diacritics = c.grave .. c.acute},
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
}
}


Line 811: Line 854:
to = {"n" .. p[1]}
to = {"n" .. p[1]}
},
},
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
}
 
m["gn"] = {
"Guaraní",
35876,
"tup-gua",
"Latn",
}
}


Line 823: Line 873:
Gujr = "gu-translit",
Gujr = "gu-translit",
},
},
strip_diacritics = {
entry_name = {
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Gujr = {remove_diacritics = "઼"},
Gujr = {remove_diacritics = "઼"},
Line 836: Line 886:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.cedilla .. "-"},
sort_key = {remove_diacritics = c.cedilla .. "-"},
standard_chars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
}
}


Line 844: Line 894:
"cdc-wst",
"cdc-wst",
"Latn, Arab",
"Latn, Arab",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
},
},
Line 861: Line 911:
"Hebr, Phnx, Brai, Samr",
"Hebr, Phnx, Brai, Samr",
ancestors = "he-med",
ancestors = "he-med",
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]]
Hebr = "Hebr-common",
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
},
entry_name = {
Hebr = "Hebr-common",
Samr = s["Samr-entryname"],
},
sort_key = {
Hebr = "Hebr-common",
Samr = s["Samr-sortkey"],
},
}
}


Line 874: Line 932:
Deva = "hi-translit"
Deva = "hi-translit"
},
},
standard_chars = {
standardChars = {
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
c.punc
c.punc
Line 924: Line 982:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
c.punc
c.punc
Line 936: Line 994:
"Armn, Brai",
"Armn, Brai",
ancestors = "axm",
ancestors = "axm",
-- Armn translit in [[Module:scripts/data]]
translit = {
Armn = "Armn-translit"
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Armn = {
Armn = {
remove_diacritics = "՛՜՞՟",
remove_diacritics = "՛՜՞՟",
Line 979: Line 1,039:
"Latn",
"Latn",
ancestors = "ms",
ancestors = "ms",
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
}
}


Line 988: Line 1,048:
"Latn",
"Latn",
type = "appendix-constructed",
type = "appendix-constructed",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
}
}


Line 996: Line 1,056:
"alv-igb",
"alv-igb",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.macron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
sort_key = {
sort_key = {
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
Line 1,044: Line 1,104:
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standard_chars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
}
}


Line 1,054: Line 1,114:
ancestors = "roa-oit",
ancestors = "roa-oit",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
standard_chars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
}
}


Line 1,077: Line 1,137:
link_tr = true,
link_tr = true,
display_text = s["jpx-displaytext"],
display_text = s["jpx-displaytext"],
strip_diacritics = s["jpx-stripdiacritics"],
entry_name = s["jpx-entryname"],
sort_key = s["jpx-sortkey"],
sort_key = s["jpx-sortkey"],
}
}
Line 1,085: Line 1,145:
33549,
33549,
"poz",
"poz",
"Latn, Java, Arab",
"Latn, Java",
ancestors = "kaw",
ancestors = "kaw",
translit = {
translit = {
Line 1,091: Line 1,151:
},
},
link_tr = true,
link_tr = true,
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
},
},
Line 1,108: Line 1,168:
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
ancestors = "ka-mid",
ancestors = "ka-mid",
-- Geor, Geok translit in [[Module:scripts/data]]
translit = {
Geor = "Geor-translit",
Geok = "Geok-translit",
},
override_translit = true,
override_translit = true,
strip_diacritics = {
display_text = {
Geor = s["ka-stripdiacritics"],
Hebr = "Hebr-common",
Geok = s["ka-stripdiacritics"],
},
entry_name = {
Geor = s["ka-entryname"],
Geok = s["ka-entryname"],
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
sort_key = {
Hebr = "Hebr-common",
}
}
}


Line 1,162: Line 1,231:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
c.punc
c.punc
Line 1,194: Line 1,263:
"Knda, Tutg",
"Knda, Tutg",
ancestors = "dra-mkn",
ancestors = "dra-mkn",
-- Knda translit in [[Module:scripts/data]]
translit = {
Knda = "kn-translit",
},
}
}


Line 1,206: Line 1,277:
Kore = "ko-translit",
Kore = "ko-translit",
},
},
-- Kore strip_diacritics in [[Module:scripts/data]]
entry_name = {
Kore = s["Kore-entryname"],
},
}
}


Line 1,214: Line 1,287:
"ssa-sah",
"ssa-sah",
"Latn, Arab",
"Latn, Arab",
-- the sortkey and strip_diacritics are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
-- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
},
},
Line 1,234: Line 1,307:
["ks-Arab"] = "ks-Arab-translit",
["ks-Arab"] = "ks-Arab-translit",
Deva = "ks-Deva-translit",
Deva = "ks-Deva-translit",
-- Shrd translit in [[Module:scripts/data]]
Shrd = "Shrd-translit",
},
},
}
}


-- "kv" is treated as "koi", "kpv", see [[WT:LT]]
-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT


m["kw"] = {
m["kw"] = {
Line 1,273: Line 1,346:
397,
397,
"itc-laf",
"itc-laf",
"Latn, Ital",
"Latn",
ancestors = "itc-ola",
ancestors = "itc-ola",
-- Ital translit in [[Module:scripts/data]] (NOTE: formerly not present, probably an accidental omission)
display_text = {
display_text = {
Latn = s["itc-Latn-displaytext"]
Latn = s["itc-Latn-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Latn = s["itc-Latn-stripdiacritics"]
Latn = s["itc-Latn-entryname"]
},
},
sort_key = {
sort_key = {
Latn = s["itc-Latn-sortkey"]
Latn = s["itc-Latn-sortkey"]
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
c.punc
c.punc
Line 1,310: Line 1,382:
"bnt-nyg",
"bnt-nyg",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ŋ"},
from = {"ŋ"},
Line 1,341: Line 1,413:
9211,
9211,
"tai-swe",
"tai-swe",
"Laoo", -- also Tai Noi/Lao Buhan script
"Laoo",
translit = "lo-translit",
translit = "lo-translit",
sort_key = "Laoo-sortkey",
sort_key = "Laoo-sortkey",
standard_chars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
}
}


Line 1,354: Line 1,426:
ancestors = "olt",
ancestors = "olt",
display_text = "lt-common",
display_text = "lt-common",
strip_diacritics = "lt-common",
entry_name = "lt-common",
sort_key = "lt-common",
sort_key = "lt-common",
standard_chars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
}
}


Line 1,371: Line 1,443:
"bat-eas",
"bat-eas",
"Latn",
"Latn",
strip_diacritics = {
entry_name = {
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
Line 1,380: Line 1,452:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
}
}


Line 1,402: Line 1,474:


m["mi"] = {
m["mi"] = {
"Māori",
"Maori",
36451,
36451,
"poz-pep",
"poz-pep",
Line 1,409: Line 1,481:
remove_diacritics = c.macron,
remove_diacritics = c.macron,
from = {"ng", "wh"},
from = {"ng", "wh"},
to = {"n" .. p[1], "w" .. p[1]}
to = {"z" .. p[1], "z" .. p[2]}
},
},
}
}
Line 1,420: Line 1,492:
ancestors = "cu",
ancestors = "cu",
translit = {
translit = {
Cyrl = "mk-translit",
Cyrl = "mk-translit"
-- FIXME: formerly no translit specified for Polyt; unclear if the default [[Module:grc-translit]] is
},
-- acceptable, so we disable it for now
display_text = {
Polyt = false,
Polyt = s["Polyt-displaytext"]
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
},
},
Polyt = s["Polyt-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,438: Line 1,511:
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
},
},
Polyt = s["Polyt-sortkey"],
},
},
-- Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standardChars = {
standard_chars = {
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
c.punc
c.punc
Line 1,451: Line 1,524:
"dra-mal",
"dra-mal",
"Mlym",
"Mlym",
translit = "ml-translit",
override_translit = true,
override_translit = true,
-- Mlym translit in [[Module:scripts/data]]
}
}


Line 1,463: Line 1,536:
translit = {
translit = {
Cyrl = "mn-translit",
Cyrl = "mn-translit",
-- Mong translit in [[Module:scripts/data]]
Mong = "Mong-translit",
},
},
override_translit = true,
override_translit = true,
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
display_text = {
strip_diacritics = {
Mong = s["Mong-displaytext"]
},
entry_name = {
Cyrl = {remove_diacritics = c.grave .. c.acute},
Cyrl = {remove_diacritics = c.grave .. c.acute},
Mong = s["Mong-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,477: Line 1,553:
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 1,484: Line 1,560:
}
}


-- "mo" is treated as "ro", see [[WT:LT]]
-- "mo" IS TREATED AS "ro", SEE WT:LT


m["mr"] = {
m["mr"] = {
Line 1,496: Line 1,572:
Modi = "mr-Modi-translit",
Modi = "mr-Modi-translit",
},
},
strip_diacritics = {
entry_name = {
Deva = {
Deva = {
from = {"च़", "ज़", "झ़"},
from = {"च़", "ज़", "झ़"},
Line 1,510: Line 1,586:
"Latn, ms-Arab",
"Latn, ms-Arab",
ancestors = "ms-cla",
ancestors = "ms-cla",
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
c.punc
c.punc
Line 1,525: Line 1,601:
to = {"’"}
to = {"’"}
},
},
strip_diacritics = {
entry_name = {
from = {"’"},
from = {"’"},
to = {"'"},
to = {"'"},
Line 1,575: Line 1,651:
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,583: Line 1,659:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,612: Line 1,688:
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÄäËëÏïÖöÜü",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
Brai = c.braille,
c.punc
c.punc
Line 1,625: Line 1,701:
"Latn",
"Latn",
ancestors = "gmq-mno",
ancestors = "gmq-mno",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
},
},
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,639: Line 1,715:
ancestors = "gmq-mno",
ancestors = "gmq-mno",
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standard_chars = s["no-standardchars"],
standardChars = s["no-standardchars"],
}
}


Line 1,647: Line 1,723:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,675: Line 1,751:
"bnt-nys",
"bnt-nys",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
entry_name = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ng'"},
from = {"ng'"},
Line 1,688: Line 1,764:
"Latn, Hebr",
"Latn, Hebr",
ancestors = "pro",
ancestors = "pro",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
sort_key = {
Latn = {
Latn = {
Line 1,694: Line 1,776:
to = {"%1h"}
to = {"%1h"}
},
},
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 1,735: Line 1,817:
translit = {
translit = {
Cyrl = "os-translit",
Cyrl = "os-translit",
-- Geor translit in [[Module:scripts/data]]
Geor = "Geor-translit",
},
},
override_translit = true,
override_translit = true,
Line 1,748: Line 1,830:
},
},
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 1,772: Line 1,854:
"inc-pan",
"inc-pan",
"Guru, pa-Arab",
"Guru, pa-Arab",
ancestors = "inc-opa",
translit = {
translit = {
Guru = "Guru-translit",
Guru = "Guru-translit",
["pa-Arab"] = "pa-Arab-translit",
["pa-Arab"] = "pa-Arab-translit",
},
},
strip_diacritics = {
entry_name = {
["pa-Arab"] = {
["pa-Arab"] = {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
Line 1,792: Line 1,875:
ancestors = "sa",
ancestors = "sa",
translit = {
translit = {
-- Brah translit in [[Module:scripts/data]]
Brah = "Brah-translit",
Deva = "sa-translit",
Deva = "sa-translit",
Beng = "pi-translit",
Beng = "pi-translit",
Line 1,803: Line 1,886:
Cakm = "Cakm-translit",
Cakm = "Cakm-translit",
},
},
strip_diacritics = {
entry_name = {
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
Mymr = {
remove_diacritics = c.VS01
remove_diacritics = c.VS01,
},
},
},
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
Line 1,828: Line 1,909:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standard_chars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
}
}


Line 1,836: Line 1,917:
"ira-pat",
"ira-pat",
"ps-Arab",
"ps-Arab",
strip_diacritics = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
}
}


Line 1,851: Line 1,932:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Brai = c.braille,
Brai = c.braille,
Line 1,866: Line 1,947:


m["rm"] = {
m["rm"] = {
"Romansh",
"Romansch",
13199,
13199,
"roa-rhe",
"roa-rhe",
ancestors = "rm-old",
"Latn",
"Latn",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
Line 1,893: Line 1,973:
},
},
},
},
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]; presumably not present
standardChars = {
standard_chars = {
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Line 1,916: Line 1,995:
},
},
},
},
strip_diacritics = {
entry_name = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
Line 1,927: Line 2,006:
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_exceptions = {"ё", "ѣ̈", "я̈"},
from = {
from = {
"і", "ѣ", "ѳ", "ѵ"
"ё", "ѣ̈", "я̈", -- 2 chars
"і", "ѣ", "ѳ", "ѵ" -- 1 char
},
},
to = {
to = {
"е" .. p[1], "ь" .. p[2], "я" .. p[1],
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
}
}
},
},
},
},
standard_chars = {
standardChars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 1,947: Line 2,029:
"bnt-glb",
"bnt-glb",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,958: Line 2,040:
Beng = "sa-Beng-translit",
Beng = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
-- Brah translit in [[Module:scripts/data]]
Brah = "Brah-translit",
Deva = "sa-translit",
Deva = "sa-translit",
Gujr = "sa-Gujr-translit",
Gujr = "sa-Gujr-translit",
Line 1,970: Line 2,052:
Mlym = "sa-Mlym-translit",
Mlym = "sa-Mlym-translit",
Modi = "sa-Modi-translit",
Modi = "sa-Modi-translit",
-- Mong, mnc-Mong, xwo-Mong translit in [[Module:scripts/data]]
Mong = "Mong-translit",
-- NOTE: Formerly used xal-translit for transliterating xwo-Mong but that only handles Cyrillic; it has
["mnc-Mong"] = "mnc-translit",
-- code to transliterate xwo-Mong but it's broken so I've replaced it with the default xwo-translit.
["xwo-Mong"] = "xal-translit",
Mymr = "pi-translit",
Mymr = "pi-translit",
Orya = "sa-Orya-translit",
Orya = "sa-Orya-translit",
-- Shrd translit in [[Module:scripts/data]]
Shrd = "Shrd-translit",
-- Sidd translit in [[Module:scripts/data]]
Sidd = "Sidd-translit",
Sinh = "si-translit",
Sinh = "si-translit",
Taml = "sa-Taml-translit",
Taml = "sa-Taml-translit",
Telu = "sa-Telu-translit",
Telu = "sa-Telu-translit",
Thai = "pi-translit",
Thai = "pi-translit",
-- Tibt translit in [[Module:scripts/data]]
Tibt = "Tibt-translit",
},
display_text = {
Mong = s["Mong-displaytext"],
Tibt = s["Tibt-displaytext"],
},
},
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
entry_name = {
-- Tibt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Mong = s["Mong-entryname"],
strip_diacritics = {
Tibt = s["Tibt-entryname"],
Deva = s["sa-Deva-stripdiacritics"],
Mymr = {
remove_diacritics = c.VS01,
},
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
remove_diacritics = c.VS01 .. c.udatta .. c.anudatta
},
},
sort_key = {
sort_key = {
Deva = s["sa-Deva-stripdiacritics"], -- until we have a proper Sanskrit sorting algorithm.
Tibt = "Tibt-sortkey",
Lana = { -- Tai Tham
{ -- FIXME: This needs to be converted into the current standardized format.
from = {"", "", "", "", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", ""},
from = {"ā", "ī", "ū", "", "", "", "m[" .. c.dotabove .. c.dotbelow .. "]", "", "ñ", "", "", "", "ś", "", "", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "", "", "", "", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "", u(0xFE00), u(0x200D)},
to = {"ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", ""},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
},
Laoo = "Laoo-sortkey",
Latn = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "", "ñ", "", "", "", "ś", "", "ṭ"},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~"},
},
Mymr = {
remove_diacritics = c.VS01,
},
},
Thai = "Thai-sortkey",
-- FIXME: The previous sort key which mixed all scripts removed ZWJ; I don't know which script(s) this was
-- intended for and there are no other languages which remove it in the sort key AFAIK. If it needs to be
-- removed, specify the script(s) it needs to be removed under or add handling for the "all" script that applies
-- regardless of script.
--all = {
-- remove_diacritics = c.ZWJ,
--},
},
},
}
}
Line 2,025: Line 2,092:
"roa-sou",
"roa-sou",
"Latn",
"Latn",
ancestors = "sc-old",
}
}


Line 2,034: Line 2,100:
"sd-Arab, Deva, Sind, Khoj",
"sd-Arab, Deva, Sind, Khoj",
translit = {
translit = {
Sind = "Sind-translit",
Sind = "Sind-translit"
["sd-Arab"] = "sd-Arab-translit"
},
},
strip_diacritics = {
entry_name = {
["sd-Arab"] = {
["sd-Arab"] = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
Line 2,055: Line 2,120:
to = {"ˈ"}
to = {"ˈ"}
},
},
strip_diacritics = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
sort_key = {
sort_key = {
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
},
},
standard_chars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
}
}


Line 2,075: Line 2,140:
9301,
9301,
"zls",
"zls",
"Latn, Cyrl, Glag, Arab",
"Latn, Cyrl, Glag",
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
wikimedia_codes = "sh, bs, hr, sr",
wikimedia_codes = "sh, bs, hr, sr",
strip_diacritics = {
entry_name = {
Latn = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
Line 2,102: Line 2,167:
},
},
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Line 2,125: Line 2,190:
ancestors = "zlw-osk",
ancestors = "zlw-osk",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
standard_chars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
}
}


Line 2,133: Line 2,198:
"zls",
"zls",
"Latn",
"Latn",
strip_diacritics = {
entry_name = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
Line 2,145: Line 2,210:
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
},
},
standard_chars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
}
}


Line 2,160: Line 2,225:
"bnt-sho",
"bnt-sho",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
entry_name = {remove_diacritics = c.acute},
}
}


Line 2,168: Line 2,233:
"cus-som",
"cus-som",
"Latn, Arab, Osma",
"Latn, Arab, Osma",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
Line 2,180: Line 2,245:
translit = {
translit = {
Elba = "Elba-translit",
Elba = "Elba-translit",
Vith = "Vith-translit",
},
},
-- Grek display_text, sort_key in [[Module:scripts/data]]
display_text = {
strip_diacritics = {
Grek = s["Grek-displaytext"],
},
entry_name = {
Latn = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron,
remove_diacritics = c.acute,
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
},
},
Grek = { -- Diacritic removal from Grek-stripdiacritics excluded.
Grek = { -- Diacritic removal from Grek-entryname excluded.
from = m_langdata.chars_substitutions["Grek-stripdiacritics"].from,
from = s["Grek-entryname"].from,
to = m_langdata.chars_substitutions["Grek-stripdiacritics"].to,
to = s["Grek-entryname"].to,
},
},
},
},
sort_key = {
sort_key = {
Latn = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.macron .. c.tilde .. c.breve .. c.caron,
remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron,
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
}
}
-- TODO: Grek if the default sort key is unsuitable
-- TODO: Grek
},
},
standard_chars = {
standardChars = {
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
c.punc
c.punc
Line 2,212: Line 2,278:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,220: Line 2,286:
"bnt-sts",
"bnt-sts",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,230: Line 2,296:
ancestors = "osn",
ancestors = "osn",
translit = {
translit = {
Sund = "Sund-translit"
Sund = "su-translit"
},
},
}
}
Line 2,246: Line 2,312:
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
},
},
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
}
}


Line 2,291: Line 2,357:
},
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Cyrl = s["tg-stripdiacritics"],
Cyrl = s["tg-entryname"],
Latn = s["tg-stripdiacritics"],
Latn = s["tg-entryname"],
},
},
sort_key = {
sort_key = {
Line 2,329: Line 2,395:
"trk-ogz",
"trk-ogz",
"Latn, Cyrl, Arab",
"Latn, Cyrl, Arab",
strip_diacritics = {
entry_name = {
Latn = s["tk-stripdiacritics"],
Latn = s["tk-entryname"],
Cyrl = s["tk-stripdiacritics"],
Cyrl = s["tk-entryname"],
},
},
sort_key = {
sort_key = {
Line 2,343: Line 2,409:
},
},
},
},
ancestors = "trk-eog",
}
}


Line 2,355: Line 2,420:
},
},
override_translit = true,
override_translit = true,
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
standard_chars = {
standardChars = {
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
c.punc
c.punc
Line 2,379: Line 2,444:
"poz-ton",
"poz-ton",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.acute},
entry_name = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.macron},
sort_key = {remove_diacritics = c.macron},
}
}
Line 2,403: Line 2,468:
}
}
},
},
standard_chars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
}
}


Line 2,419: Line 2,484:
"Cyrl, Latn, tt-Arab",
"Cyrl, Latn, tt-Arab",
translit = {
translit = {
Cyrl = "tt-translit",
Cyrl = "tt-translit"
["tt-Arab"] = "tt-translit"
},
},
--override_translit = true, -- enable override until Module code can detect Russian loans such as [[аэропорт]]
override_translit = true,
dotted_dotless_i = true,
dotted_dotless_i = true,
sort_key = {
sort_key = {
Line 2,442: Line 2,506:
}
}


-- "tw" is treated as "ak", see [[WT:LT]]
-- "tw" IS TREATED AS "ak", SEE WT:LT


m["ty"] = {
m["ty"] = {
Line 2,469: Line 2,533:
"zle",
"zle",
"Cyrl",
"Cyrl",
ancestors = "zle-muk",
ancestors = "zle-ouk",
translit = "uk-translit",
translit = "uk-translit",
strip_diacritics = {remove_diacritics = c.grave .. c.acute},
entry_name = {remove_diacritics = c.grave .. c.acute},
sort_key = {
sort_key = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 2,483: Line 2,547:
}
}
},
},
standard_chars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
}
}


Line 2,494: Line 2,558:
["ur-Arab"] = "ur-translit"
["ur-Arab"] = "ur-translit"
},
},
strip_diacritics = {
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
["ur-Arab"] = {
["ur-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ۂ", "ٱ"},
from = {"هٔ", "ۂ", "ٱ"},
to = {"ہ", "ہ", "ا"},
to = {"ہ", "ہ", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standardChars = {
standard_chars = {
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
c.punc,
c.punc,
Line 2,527: Line 2,597:
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
},
},
},
strip_diacritics = {
["fa-Arab"] = "ar-stripdiacritics",
},
},
}
}
Line 2,579: Line 2,646:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,591: Line 2,658:
Hebr = "yi-translit",
Hebr = "yi-translit",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
}
}


Line 2,599: Line 2,674:
"alv-yor",
"alv-yor",
"Latn, Arab",
"Latn, Arab",
strip_diacritics = {
entry_name = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
},
},
Line 2,642: Line 2,717:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


return require("Module:languages").finalizeData(m, "language")
return require("Module:languages").finalizeData(m, "language")