Module:languages/data/2: Difference between revisions

No edit summary
Undid revision 89976079 by Chuck Entz (talk) had absolutely no effect- might as well revert to be on the safe side
Tag: Reverted
Line 20: Line 20:
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz"


s["ka-entryname"] = {remove_diacritics = c.circ}
s["ka-stripdiacritics"] = {remove_diacritics = c.circ}


s["no-sortkey"] = {
s["no-sortkey"] = {
Line 31: Line 31:
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc


s["tg-entryname"] = {remove_diacritics = c.grave .. c.acute}
s["sa-Deva-stripdiacritics"] = { -- Don't use remove_diacritics for accent marks, as १ and ३ should also be removed if (and only if) they carry any.
from = {"[१३]?[" .. c.anudatta .. c.udatta .. c.dsvarita .. c.tsvarita .. "]+"},
to = {""},
}


s["tk-entryname"] = {remove_diacritics = c.macron}
s["tg-stripdiacritics"] = {remove_diacritics = c.grave .. c.acute}
 
s["tk-stripdiacritics"] = {remove_diacritics = c.macron}


local m = {}
local m = {}
Line 42: Line 47:
"cus-eas",
"cus-eas",
"Latn, Ethi",
"Latn, Ethi",
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.acute},
Latn = {remove_diacritics = c.acute},
},
},
Line 54: Line 59:
translit = {
translit = {
Cyrl = "ab-translit",
Cyrl = "ab-translit",
Geor = "Geor-translit",
-- Geor translit in [[Module:scripts/data]]
},
},
override_translit = true,
override_translit = true,
Line 60: Line 65:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
entry_name = {
strip_diacritics = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
Line 66: Line 71:
to = {"а"},
to = {"а"},
},
},
Latn = s["cau-Latn-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
},
},
sort_key = {
sort_key = {
Line 90: Line 95:
29572,
29572,
"ira-cen",
"ira-cen",
"Avst, Gujr",
"Avst, Gujr, Deva",
translit = {
translit = {
Avst = "Avst-translit"
Avst = "Avst-translit"
Line 137: Line 142:
13955,
13955,
"sem-arb",
"sem-arb",
"Arab, Hebr, Syrc, Brai",
"Arab, Hebr, Syrc, Brai, Nbat",
translit = {
translit = {
Arab = "ar-translit"
Arab = "ar-translit"
},
},
display_text = {
strip_diacritics = {
Hebr = "Hebr-common",
Arab = "ar-stripdiacritics",
},
entry_name = {
Arab = "ar-entryname",
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 176: Line 175:
Cyrl = s["cau-Cyrl-displaytext"],
Cyrl = s["cau-Cyrl-displaytext"],
},
},
entry_name = {
strip_diacritics = {
Cyrl = s["cau-Cyrl-entryname"],
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
},
},
sort_key = {
sort_key = {
Line 202: Line 201:
ancestors = "trk-oat",
ancestors = "trk-oat",
dotted_dotless_i = true,
dotted_dotless_i = true,
entry_name = {
strip_diacritics = {
Latn = {
Latn = {
from = {"ʼ"},
from = {"ʼ"},
Line 208: Line 207:
},
},
["fa-Arab"] = {
["fa-Arab"] = {
module = "ar-entryname",
module = "ar-stripdiacritics",
["from"] = {
["from"] = {
"ۆ",
"ۆ",
Line 267: Line 266:
"zle",
"zle",
"Cyrl, Latn",
"Cyrl, Latn",
ancestors = "zle-obe",
ancestors = "zle-mbe",
translit = {
translit = {
Cyrl = "be-translit",
Cyrl = "be-translit",
},
},
entry_name = {
strip_diacritics = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 293: Line 292:
},
},
},
},
standardChars = {
standard_chars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž",
Line 307: Line 306:
ancestors = "cu-bgm",
ancestors = "cu-bgm",
translit = "bg-translit",
translit = "bg-translit",
entry_name = {
strip_diacritics = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
Line 315: Line 314:
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"},
},
},
standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
standard_chars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc,
}
}


Line 363: Line 362:
"Tibt", -- sometimes Deva?
"Tibt", -- sometimes Deva?
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
display_text = s["Tibt-displaytext"],
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 389: Line 385:
ancestors = "roa-oca",
ancestors = "roa-oca",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"},
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
standard_chars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc,
}
}


Line 405: Line 401:
Cyrl = s["cau-Cyrl-displaytext"]
Cyrl = s["cau-Cyrl-displaytext"]
},
},
entry_name = {
strip_diacritics = {
Cyrl = s["cau-Cyrl-entryname"],
Cyrl = s["cau-Cyrl-stripdiacritics"],
Latn = s["cau-Latn-entryname"],
Latn = s["cau-Latn-stripdiacritics"],
},
},
sort_key = {
sort_key = {
Line 438: Line 434:
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]}
},
},
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc,
}
}


Line 461: Line 457:
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]}
},
},
standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
standard_chars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc,
}
}


Line 468: Line 464:
35499,
35499,
"zls",
"zls",
"Cyrs, Glag",
"Cyrs, Glag, Zname",
translit = {
translit = {
Cyrs = "Cyrs-translit",
Cyrs = "Cyrs-translit",
Glag = "Glag-translit"
Glag = "Glag-translit"
},
},
entry_name = {
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]
Cyrs = s["Cyrs-entryname"]
},
sort_key = {
Cyrs = s["Cyrs-sortkey"]
},
}
}


Line 506: Line 497:
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]}
},
},
standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
standard_chars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc,
}
}


Line 521: Line 512:
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc,
}
}


Line 529: Line 520:
"gmw-hgm",
"gmw-hgm",
"Latn, Latf, Brai",
"Latn, Latf, Brai",
ancestors = "gmh",
ancestors = "de-ear",
sort_key = {
sort_key = {
Latn = s["de-Latn-sortkey"],
Latn = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
Latf = s["de-Latn-sortkey"],
},
},
standardChars = {
standard_chars = {
Latn = s["de-Latn-standardchars"],
Latn = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Latf = s["de-Latn-standardchars"],
Line 551: Line 542:
Diak = "Diak-translit",
Diak = "Diak-translit",
},
},
    ancestors = "dv-old",
override_translit = true,
override_translit = true,
}
}
Line 560: Line 552:
"Tibt",
"Tibt",
ancestors = "xct",
ancestors = "xct",
translit = "Tibt-translit",
override_translit = true,
override_translit = true,
display_text = s["Tibt-displaytext"],
-- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
entry_name = s["Tibt-entryname"],
sort_key = "Tibt-sortkey",
}
}


Line 585: Line 574:
"Grek, Polyt, Brai",
"Grek, Polyt, Brai",
ancestors = "el-kth",
ancestors = "el-kth",
translit = {
translit = "el-translit",
Grek = "el-translit",
Polyt = "grc-translit",
},
override_translit = true,
override_translit = true,
display_text = {
-- Grek and Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Grek = s["Grek-displaytext"],
standard_chars = {
Polyt = s["Polyt-displaytext"],
},
entry_name = {
Grek = s["Grek-entryname"],
Polyt = s["Polyt-entryname"],
},
sort_key = {
Grek = s["Grek-sortkey"],
Polyt = s["Polyt-sortkey"],
},
standardChars = {
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ",
Brai = c.braille,
Brai = c.braille,
Line 620: Line 595:
-- Many of these are needed for sorting language names.
-- Many of these are needed for sorting language names.
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics,
-- These are found in entry names.
-- These are found in pagenames.
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"},
to = {{
to = {{
Line 632: Line 607:
},
},
},
},
standardChars = {
standard_chars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 649: Line 624:
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]}
},
},
standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
standard_chars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc,
}
}


Line 666: Line 641:
},
},
},
},
standardChars = {
standard_chars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 688: Line 663:
}
}
},
},
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc,
}
}


Line 700: Line 675:
to = {"c" .. p[1], "n" .. p[1]}
to = {"c" .. p[1], "n" .. p[1]}
},
},
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc,
}
}


Line 709: Line 684:
"fa-Arab, Hebr",
"fa-Arab, Hebr",
ancestors = "fa-cls",
ancestors = "fa-cls",
display_text = {
strip_diacritics = {
Hebr = "Hebr-common",
},
entry_name = {
["fa-Arab"] = {
["fa-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif
to = {"ه", "ا"},
to = {"ه", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 742: Line 711:
to = {"’"}
to = {"’"}
},
},
entry_name = { -- used to indicate gemination of the next consonant
strip_diacritics = { -- used to indicate gemination of the next consonant
remove_diacritics = "ˣ",
remove_diacritics = "ˣ",
from = {"’"},
from = {"’"},
Line 748: Line 717:
},
},
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö".
remove_diacritics = "':" .. c.diacritics,
remove_diacritics = "':" .. c.diacritics,
remove_exceptions = {
remove_exceptions = {
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ
Line 757: Line 726:
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"}
},
},
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc,
}
}


Line 776: Line 745:
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
standard_chars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc,
}
}


Line 784: Line 753:
"roa-oil",
"roa-oil",
"Latn, Brai",
"Latn, Brai",
display_text = {
Latn = {
from = {"'"},
to = {"’"}
},
},
entry_name = {
Latn = {
from = {"’"},
to = {"'"},
},
},
ancestors = "frm",
ancestors = "frm",
sort_key = {
sort_key = {
Latn = s["roa-oil-sortkey"]
Latn = s["roa-oil-sortkey"]
},
},
standardChars = {
standard_chars = {
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz",
Brai = c.braille,
Brai = c.braille,
Line 817: Line 774:
to = {"i"}
to = {"i"}
},
},
standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
standard_chars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc,
}
}


Line 831: Line 788:
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"}
},
},
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc,
}
}


Line 841: Line 798:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.grave .. c.acute},
sort_key = {remove_diacritics = c.grave .. c.acute},
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc,
}
}


Line 854: Line 811:
to = {"n" .. p[1]}
to = {"n" .. p[1]}
},
},
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc,
}
}


m["gn"] = {
m["gu"] = {
"Guaraní",
35876,
"tup-gua",
"Latn",
}
 
m["gu"] = {
"Gujarati",
"Gujarati",
5137,
5137,
Line 873: Line 823:
Gujr = "gu-translit",
Gujr = "gu-translit",
},
},
entry_name = {
strip_diacritics = {
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun},
Gujr = {remove_diacritics = "઼"},
Gujr = {remove_diacritics = "઼"},
Line 886: Line 836:
ancestors = "mga",
ancestors = "mga",
sort_key = {remove_diacritics = c.cedilla .. "-"},
sort_key = {remove_diacritics = c.cedilla .. "-"},
standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
standard_chars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc,
}
}


Line 894: Line 844:
"cdc-wst",
"cdc-wst",
"Latn, Arab",
"Latn, Arab",
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron}
},
},
Line 911: Line 861:
"Hebr, Phnx, Brai, Samr",
"Hebr, Phnx, Brai, Samr",
ancestors = "he-med",
ancestors = "he-med",
display_text = {
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Hebr = "Hebr-common",
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]]
},
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission)
entry_name = {
Hebr = "Hebr-common",
Samr = s["Samr-entryname"],
},
sort_key = {
Hebr = "Hebr-common",
Samr = s["Samr-sortkey"],
},
}
}


Line 932: Line 874:
Deva = "hi-translit"
Deva = "hi-translit"
},
},
standardChars = {
standard_chars = {
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰",
c.punc
c.punc
Line 982: Line 924:
},
},
},
},
standardChars = {
standard_chars = {
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz",
c.punc
c.punc
Line 994: Line 936:
"Armn, Brai",
"Armn, Brai",
ancestors = "axm",
ancestors = "axm",
translit = {
-- Armn translit in [[Module:scripts/data]]
Armn = "Armn-translit"
},
override_translit = true,
override_translit = true,
entry_name = {
strip_diacritics = {
Armn = {
Armn = {
remove_diacritics = "՛՜՞՟",
remove_diacritics = "՛՜՞՟",
Line 1,039: Line 979:
"Latn",
"Latn",
ancestors = "ms",
ancestors = "ms",
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc,
}
}


Line 1,048: Line 988:
"Latn",
"Latn",
type = "appendix-constructed",
type = "appendix-constructed",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ},
}
}


Line 1,056: Line 996:
"alv-igb",
"alv-igb",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.macron},
sort_key = {
sort_key = {
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"},
Line 1,104: Line 1,044:
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]}
},
},
standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
standard_chars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc,
}
}


Line 1,114: Line 1,054:
ancestors = "roa-oit",
ancestors = "roa-oit",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove},
standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
standard_chars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc,
}
}


Line 1,137: Line 1,077:
link_tr = true,
link_tr = true,
display_text = s["jpx-displaytext"],
display_text = s["jpx-displaytext"],
entry_name = s["jpx-entryname"],
strip_diacritics = s["jpx-stripdiacritics"],
sort_key = s["jpx-sortkey"],
sort_key = s["jpx-sortkey"],
}
}
Line 1,145: Line 1,085:
33549,
33549,
"poz",
"poz",
"Latn, Java",
"Latn, Java, Arab",
ancestors = "kaw",
ancestors = "kaw",
translit = {
translit = {
Line 1,151: Line 1,091:
},
},
link_tr = true,
link_tr = true,
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê
},
},
Line 1,168: Line 1,108:
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian
ancestors = "ka-mid",
ancestors = "ka-mid",
translit = {
-- Geor, Geok translit in [[Module:scripts/data]]
Geor = "Geor-translit",
Geok = "Geok-translit",
},
override_translit = true,
override_translit = true,
display_text = {
strip_diacritics = {
Hebr = "Hebr-common",
Geor = s["ka-stripdiacritics"],
Geok = s["ka-stripdiacritics"],
},
},
entry_name = {
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Geor = s["ka-entryname"],
Geok = s["ka-entryname"],
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
}
}
}


Line 1,231: Line 1,162:
},
},
},
},
standardChars = {
standard_chars = {
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя",
c.punc
c.punc
Line 1,263: Line 1,194:
"Knda, Tutg",
"Knda, Tutg",
ancestors = "dra-mkn",
ancestors = "dra-mkn",
translit = {
-- Knda translit in [[Module:scripts/data]]
Knda = "kn-translit",
},
}
}


Line 1,277: Line 1,206:
Kore = "ko-translit",
Kore = "ko-translit",
},
},
entry_name = {
-- Kore strip_diacritics in [[Module:scripts/data]]
Kore = s["Kore-entryname"],
},
}
}


Line 1,287: Line 1,214:
"ssa-sah",
"ssa-sah",
"Latn, Arab",
"Latn, Arab",
-- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
-- the sortkey and strip_diacritics are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve}
},
},
Line 1,307: Line 1,234:
["ks-Arab"] = "ks-Arab-translit",
["ks-Arab"] = "ks-Arab-translit",
Deva = "ks-Deva-translit",
Deva = "ks-Deva-translit",
Shrd = "Shrd-translit",
-- Shrd translit in [[Module:scripts/data]]
},
},
}
}


-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT
-- "kv" is treated as "koi", "kpv", see [[WT:LT]]


m["kw"] = {
m["kw"] = {
Line 1,346: Line 1,273:
397,
397,
"itc-laf",
"itc-laf",
"Latn",
"Latn, Ital",
ancestors = "itc-ola",
ancestors = "itc-ola",
-- Ital translit in [[Module:scripts/data]] (NOTE: formerly not present, probably an accidental omission)
display_text = {
display_text = {
Latn = s["itc-Latn-displaytext"]
Latn = s["itc-Latn-displaytext"]
},
},
entry_name = {
strip_diacritics = {
Latn = s["itc-Latn-entryname"]
Latn = s["itc-Latn-stripdiacritics"]
},
},
sort_key = {
sort_key = {
Latn = s["itc-Latn-sortkey"]
Latn = s["itc-Latn-sortkey"]
},
},
standardChars = {
standard_chars = {
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx",
c.punc
c.punc
Line 1,382: Line 1,310:
"bnt-nyg",
"bnt-nyg",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ŋ"},
from = {"ŋ"},
Line 1,413: Line 1,341:
9211,
9211,
"tai-swe",
"tai-swe",
"Laoo",
"Laoo", -- also Tai Noi/Lao Buhan script
translit = "lo-translit",
translit = "lo-translit",
sort_key = "Laoo-sortkey",
sort_key = "Laoo-sortkey",
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
standard_chars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc,
}
}


Line 1,426: Line 1,354:
ancestors = "olt",
ancestors = "olt",
display_text = "lt-common",
display_text = "lt-common",
entry_name = "lt-common",
strip_diacritics = "lt-common",
sort_key = "lt-common",
sort_key = "lt-common",
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
standard_chars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc,
}
}


Line 1,443: Line 1,371:
"bat-eas",
"bat-eas",
"Latn",
"Latn",
entry_name = {
strip_diacritics = {
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient.
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde},
Line 1,452: Line 1,380:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]}
},
},
standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
standard_chars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc,
}
}


Line 1,474: Line 1,402:


m["mi"] = {
m["mi"] = {
"Maori",
"Māori",
36451,
36451,
"poz-pep",
"poz-pep",
Line 1,481: Line 1,409:
remove_diacritics = c.macron,
remove_diacritics = c.macron,
from = {"ng", "wh"},
from = {"ng", "wh"},
to = {"z" .. p[1], "z" .. p[2]}
to = {"n" .. p[1], "w" .. p[1]}
},
},
}
}
Line 1,492: Line 1,420:
ancestors = "cu",
ancestors = "cu",
translit = {
translit = {
Cyrl = "mk-translit"
Cyrl = "mk-translit",
},
-- FIXME: formerly no translit specified for Polyt; unclear if the default [[Module:grc-translit]] is
display_text = {
-- acceptable, so we disable it for now
Polyt = s["Polyt-displaytext"]
Polyt = false,
},
},
entry_name = {
strip_diacritics = {
Cyrl = {
Cyrl = {
remove_diacritics = c.acute,
remove_diacritics = c.acute,
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"}
},
},
Polyt = s["Polyt-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,511: Line 1,438:
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]}
},
},
Polyt = s["Polyt-sortkey"],
},
},
standardChars = {
-- Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standard_chars = {
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш",
c.punc
c.punc
Line 1,524: Line 1,451:
"dra-mal",
"dra-mal",
"Mlym",
"Mlym",
translit = "ml-translit",
override_translit = true,
override_translit = true,
-- Mlym translit in [[Module:scripts/data]]
}
}


Line 1,536: Line 1,463:
translit = {
translit = {
Cyrl = "mn-translit",
Cyrl = "mn-translit",
Mong = "Mong-translit",
-- Mong translit in [[Module:scripts/data]]
},
},
override_translit = true,
override_translit = true,
display_text = {
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
Mong = s["Mong-displaytext"]
strip_diacritics = {
},
entry_name = {
Cyrl = {remove_diacritics = c.grave .. c.acute},
Cyrl = {remove_diacritics = c.grave .. c.acute},
Mong = s["Mong-entryname"],
},
},
sort_key = {
sort_key = {
Line 1,553: Line 1,477:
},
},
},
},
standardChars = {
standard_chars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 1,560: Line 1,484:
}
}


-- "mo" IS TREATED AS "ro", SEE WT:LT
-- "mo" is treated as "ro", see [[WT:LT]]


m["mr"] = {
m["mr"] = {
Line 1,572: Line 1,496:
Modi = "mr-Modi-translit",
Modi = "mr-Modi-translit",
},
},
entry_name = {
strip_diacritics = {
Deva = {
Deva = {
from = {"च़", "ज़", "झ़"},
from = {"च़", "ज़", "झ़"},
Line 1,586: Line 1,510:
"Latn, ms-Arab",
"Latn, ms-Arab",
ancestors = "ms-cla",
ancestors = "ms-cla",
standardChars = {
standard_chars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
c.punc
c.punc
Line 1,601: Line 1,525:
to = {"’"}
to = {"’"}
},
},
entry_name = {
strip_diacritics = {
from = {"’"},
from = {"’"},
to = {"'"},
to = {"'"},
Line 1,651: Line 1,575:
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
ancestors = "gmq-mno, da",  -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standardChars = s["no-standardchars"],
standard_chars = s["no-standardchars"],
}
}


Line 1,659: Line 1,583:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,688: Line 1,612:
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"},
},
},
standardChars = {
standard_chars = {
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz",
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÄäËëÏïÖöÜü",
Brai = c.braille,
Brai = c.braille,
c.punc
c.punc
Line 1,701: Line 1,625:
"Latn",
"Latn",
ancestors = "gmq-mno",
ancestors = "gmq-mno",
entry_name = {
strip_diacritics = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
},
},
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standardChars = s["no-standardchars"],
standard_chars = s["no-standardchars"],
}
}


Line 1,715: Line 1,639:
ancestors = "gmq-mno",
ancestors = "gmq-mno",
sort_key = s["no-sortkey"],
sort_key = s["no-sortkey"],
standardChars = s["no-standardchars"],
standard_chars = s["no-standardchars"],
}
}


Line 1,723: Line 1,647:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 1,751: Line 1,675:
"bnt-nys",
"bnt-nys",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ},
strip_diacritics = {remove_diacritics = c.acute .. c.circ},
sort_key = {
sort_key = {
from = {"ng'"},
from = {"ng'"},
Line 1,764: Line 1,688:
"Latn, Hebr",
"Latn, Hebr",
ancestors = "pro",
ancestors = "pro",
display_text = {
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
sort_key = {
Latn = {
Latn = {
Line 1,776: Line 1,694:
to = {"%1h"}
to = {"%1h"}
},
},
Hebr = "Hebr-common",
},
},
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
}
}


Line 1,817: Line 1,735:
translit = {
translit = {
Cyrl = "os-translit",
Cyrl = "os-translit",
Geor = "Geor-translit",
-- Geor translit in [[Module:scripts/data]]
},
},
override_translit = true,
override_translit = true,
Line 1,830: Line 1,748:
},
},
},
},
entry_name = {
strip_diacritics = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 1,854: Line 1,772:
"inc-pan",
"inc-pan",
"Guru, pa-Arab",
"Guru, pa-Arab",
ancestors = "inc-opa",
translit = {
translit = {
Guru = "Guru-translit",
Guru = "Guru-translit",
["pa-Arab"] = "pa-Arab-translit",
["pa-Arab"] = "pa-Arab-translit",
},
},
entry_name = {
strip_diacritics = {
["pa-Arab"] = {
["pa-Arab"] = {
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna,
Line 1,875: Line 1,792:
ancestors = "sa",
ancestors = "sa",
translit = {
translit = {
Brah = "Brah-translit",
-- Brah translit in [[Module:scripts/data]]
Deva = "sa-translit",
Deva = "sa-translit",
Beng = "pi-translit",
Beng = "pi-translit",
Line 1,886: Line 1,803:
Cakm = "Cakm-translit",
Cakm = "Cakm-translit",
},
},
entry_name = {
strip_diacritics = {
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
remove_diacritics = c.VS01
Mymr = {
remove_diacritics = c.VS01,
},
},
},
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
sort_key = { -- FIXME: This needs to be converted into the current standardized format.
Line 1,909: Line 1,828:
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]}
},
},
standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
standard_chars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc,
}
}


Line 1,917: Line 1,836:
"ira-pat",
"ira-pat",
"ps-Arab",
"ps-Arab",
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
strip_diacritics = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef},
}
}


Line 1,932: Line 1,851:
},
},
},
},
standardChars = {
standard_chars = {
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz",
Brai = c.braille,
Brai = c.braille,
Line 1,947: Line 1,866:


m["rm"] = {
m["rm"] = {
"Romansch",
"Romansh",
13199,
13199,
"roa-rhe",
"roa-rhe",
ancestors = "rm-old",
"Latn",
"Latn",
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e},
Line 1,973: Line 1,893:
},
},
},
},
standardChars = {
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]; presumably not present
standard_chars = {
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя",
Line 1,995: Line 1,916:
},
},
},
},
entry_name = {
strip_diacritics = {
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
Line 2,006: Line 1,927:
Cyrl = {
Cyrl = {
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_diacritics = c.grave .. c.acute .. c.diaer,
remove_exceptions = {"ё", "ѣ̈", "я̈"},
from = {
from = {
"ё", "ѣ̈", "я̈", -- 2 chars
"і", "ѣ", "ѳ", "ѵ"
"і", "ѣ", "ѳ", "ѵ" -- 1 char
},
},
to = {
to = {
"е" .. p[1], "ь" .. p[2], "я" .. p[1],
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3]
}
}
},
},
},
},
standardChars = {
standard_chars = {
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—",
Brai = c.braille,
Brai = c.braille,
Line 2,029: Line 1,947:
"bnt-glb",
"bnt-glb",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,040: Line 1,958:
Beng = "sa-Beng-translit",
Beng = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
["as-Beng"] = "sa-Beng-translit",
Brah = "Brah-translit",
-- Brah translit in [[Module:scripts/data]]
Deva = "sa-translit",
Deva = "sa-translit",
Gujr = "sa-Gujr-translit",
Gujr = "sa-Gujr-translit",
Line 2,052: Line 1,970:
Mlym = "sa-Mlym-translit",
Mlym = "sa-Mlym-translit",
Modi = "sa-Modi-translit",
Modi = "sa-Modi-translit",
Mong = "Mong-translit",
-- Mong, mnc-Mong, xwo-Mong translit in [[Module:scripts/data]]
["mnc-Mong"] = "mnc-translit",
-- NOTE: Formerly used xal-translit for transliterating xwo-Mong but that only handles Cyrillic; it has
["xwo-Mong"] = "xal-translit",
-- code to transliterate xwo-Mong but it's broken so I've replaced it with the default xwo-translit.
Mymr = "pi-translit",
Mymr = "pi-translit",
Orya = "sa-Orya-translit",
Orya = "sa-Orya-translit",
Shrd = "Shrd-translit",
-- Shrd translit in [[Module:scripts/data]]
Sidd = "Sidd-translit",
-- Sidd translit in [[Module:scripts/data]]
Sinh = "si-translit",
Sinh = "si-translit",
Taml = "sa-Taml-translit",
Taml = "sa-Taml-translit",
Telu = "sa-Telu-translit",
Telu = "sa-Telu-translit",
Thai = "pi-translit",
Thai = "pi-translit",
Tibt = "Tibt-translit",
-- Tibt translit in [[Module:scripts/data]]
},
},
display_text = {
-- Mong display_text and strip_diacritics in [[Module:scripts/data]]
Mong = s["Mong-displaytext"],
-- Tibt display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Tibt = s["Tibt-displaytext"],
strip_diacritics = {
},
Deva = s["sa-Deva-stripdiacritics"],
entry_name = {
Mymr = {
Mong = s["Mong-entryname"],
remove_diacritics = c.VS01,
Tibt = s["Tibt-entryname"],
},
Thai = {
Thai = {
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here.
to = {"ิํ", "ฐ", "ญ"}
to = {"ิํ", "ฐ", "ญ"}
},
},
remove_diacritics = c.VS01 .. c.udatta .. c.anudatta
},
},
sort_key = {
sort_key = {
Tibt = "Tibt-sortkey",
Deva = s["sa-Deva-stripdiacritics"], -- until we have a proper Sanskrit sorting algorithm.
{ -- FIXME: This needs to be converted into the current standardized format.
Lana = { -- Tai Tham
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)},
from = {"ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ"},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
to = {"ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"},
},
Laoo = "Laoo-sortkey",
Latn = {
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ"},
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~"},
},
Mymr = {
remove_diacritics = c.VS01,
},
},
Thai = "Thai-sortkey",
-- FIXME: The previous sort key which mixed all scripts removed ZWJ; I don't know which script(s) this was
-- intended for and there are no other languages which remove it in the sort key AFAIK. If it needs to be
-- removed, specify the script(s) it needs to be removed under or add handling for the "all" script that applies
-- regardless of script.
--all = {
-- remove_diacritics = c.ZWJ,
--},
},
},
}
}
Line 2,092: Line 2,025:
"roa-sou",
"roa-sou",
"Latn",
"Latn",
ancestors = "sc-old",
}
}


Line 2,100: Line 2,034:
"sd-Arab, Deva, Sind, Khoj",
"sd-Arab, Deva, Sind, Khoj",
translit = {
translit = {
Sind = "Sind-translit"
Sind = "Sind-translit",
["sd-Arab"] = "sd-Arab-translit"
},
},
entry_name = {
strip_diacritics = {
["sd-Arab"] = {
["sd-Arab"] = {
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef,
Line 2,120: Line 2,055:
to = {"ˈ"}
to = {"ˈ"}
},
},
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
strip_diacritics = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"},
sort_key = {
sort_key = {
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"},
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]}
},
},
standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
standard_chars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc,
}
}


Line 2,140: Line 2,075:
9301,
9301,
"zls",
"zls",
"Latn, Cyrl, Glag",
"Latn, Cyrl, Glag, Arab",
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1
wikimedia_codes = "sh, bs, hr, sr",
wikimedia_codes = "sh, bs, hr, sr",
entry_name = {
strip_diacritics = {
Latn = {
Latn = {
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve,
Line 2,167: Line 2,102:
},
},
},
},
standardChars = {
standard_chars = {
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш",
Line 2,190: Line 2,125:
ancestors = "zlw-osk",
ancestors = "zlw-osk",
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron},
standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
standard_chars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc,
}
}


Line 2,198: Line 2,133:
"zls",
"zls",
"Latn",
"Latn",
entry_name = {
strip_diacritics = {
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow,
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"},
Line 2,210: Line 2,145:
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]},
},
},
standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
standard_chars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc,
}
}


Line 2,225: Line 2,160:
"bnt-sho",
"bnt-sho",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.acute},
strip_diacritics = {remove_diacritics = c.acute},
}
}


Line 2,233: Line 2,168:
"cus-som",
"cus-som",
"Latn, Arab, Osma",
"Latn, Arab, Osma",
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
Line 2,245: Line 2,180:
translit = {
translit = {
Elba = "Elba-translit",
Elba = "Elba-translit",
Vith = "Vith-translit",
},
},
display_text = {
-- Grek display_text, sort_key in [[Module:scripts/data]]
Grek = s["Grek-displaytext"],
strip_diacritics = {
},
entry_name = {
Latn = {
Latn = {
remove_diacritics = c.acute,
remove_diacritics = c.acute .. c.circ .. c.macron,
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'},
},
},
Grek = { -- Diacritic removal from Grek-entryname excluded.
Grek = { -- Diacritic removal from Grek-stripdiacritics excluded.
from = s["Grek-entryname"].from,
from = m_langdata.chars_substitutions["Grek-stripdiacritics"].from,
to = s["Grek-entryname"].to,
to = m_langdata.chars_substitutions["Grek-stripdiacritics"].to,
},
},
},
},
sort_key = {
sort_key = {
Latn = {
Latn = {
remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron,
remove_diacritics = c.acute .. c.circ .. c.macron .. c.tilde .. c.breve .. c.caron,
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]},
}
}
-- TODO: Grek
-- TODO: Grek if the default sort key is unsuitable
},
},
standardChars = {
standard_chars = {
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz",
c.punc
c.punc
Line 2,278: Line 2,212:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,286: Line 2,220:
"bnt-sts",
"bnt-sts",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,296: Line 2,230:
ancestors = "osn",
ancestors = "osn",
translit = {
translit = {
Sund = "su-translit"
Sund = "Sund-translit"
},
},
}
}
Line 2,312: Line 2,246:
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"}
},
},
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc,
}
}


Line 2,357: Line 2,291:
},
},
override_translit = true,
override_translit = true,
entry_name = {
strip_diacritics = {
Cyrl = s["tg-entryname"],
Cyrl = s["tg-stripdiacritics"],
Latn = s["tg-entryname"],
Latn = s["tg-stripdiacritics"],
},
},
sort_key = {
sort_key = {
Line 2,395: Line 2,329:
"trk-ogz",
"trk-ogz",
"Latn, Cyrl, Arab",
"Latn, Cyrl, Arab",
entry_name = {
strip_diacritics = {
Latn = s["tk-entryname"],
Latn = s["tk-stripdiacritics"],
Cyrl = s["tk-entryname"],
Cyrl = s["tk-stripdiacritics"],
},
},
sort_key = {
sort_key = {
Line 2,409: Line 2,343:
},
},
},
},
ancestors = "trk-eog",
}
}


Line 2,420: Line 2,355:
},
},
override_translit = true,
override_translit = true,
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ}
},
},
standardChars = {
standard_chars = {
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy",
c.punc
c.punc
Line 2,444: Line 2,379:
"poz-ton",
"poz-ton",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.acute},
strip_diacritics = {remove_diacritics = c.acute},
sort_key = {remove_diacritics = c.macron},
sort_key = {remove_diacritics = c.macron},
}
}
Line 2,468: Line 2,403:
}
}
},
},
standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
standard_chars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc,
}
}


Line 2,484: Line 2,419:
"Cyrl, Latn, tt-Arab",
"Cyrl, Latn, tt-Arab",
translit = {
translit = {
Cyrl = "tt-translit"
Cyrl = "tt-translit",
["tt-Arab"] = "tt-translit"
},
},
override_translit = true,
--override_translit = true, -- enable override until Module code can detect Russian loans such as [[аэропорт]]
dotted_dotless_i = true,
dotted_dotless_i = true,
sort_key = {
sort_key = {
Line 2,506: Line 2,442:
}
}


-- "tw" IS TREATED AS "ak", SEE WT:LT
-- "tw" is treated as "ak", see [[WT:LT]]


m["ty"] = {
m["ty"] = {
Line 2,533: Line 2,469:
"zle",
"zle",
"Cyrl",
"Cyrl",
ancestors = "zle-ouk",
ancestors = "zle-muk",
translit = "uk-translit",
translit = "uk-translit",
entry_name = {remove_diacritics = c.grave .. c.acute},
strip_diacritics = {remove_diacritics = c.grave .. c.acute},
sort_key = {
sort_key = {
remove_diacritics = c.grave .. c.acute,
remove_diacritics = c.grave .. c.acute,
Line 2,547: Line 2,483:
}
}
},
},
standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
standard_chars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""),  -- Exclude apostrophe.
}
}


Line 2,558: Line 2,494:
["ur-Arab"] = "ur-translit"
["ur-Arab"] = "ur-translit"
},
},
display_text = {
strip_diacritics = {
Hebr = "Hebr-common",
},
entry_name = {
["ur-Arab"] = {
["ur-Arab"] = {
-- character "ۂ" code U+06C2 to "ه" and "هٔ"(U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif
from = {"هٔ", "ۂ", "ٱ"},
from = {"هٔ", "ۂ", "ٱ"},
to = {"ہ", "ہ", "ا"},
to = {"ہ", "ہ", "ا"},
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef
},
},
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
},
standardChars = {
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
standard_chars = {
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے",
c.punc,
c.punc,
Line 2,597: Line 2,527:
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]}
},
},
},
strip_diacritics = {
["fa-Arab"] = "ar-stripdiacritics",
},
},
}
}
Line 2,646: Line 2,579:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


Line 2,658: Line 2,591:
Hebr = "yi-translit",
Hebr = "yi-translit",
},
},
display_text = {
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]]
Hebr = "Hebr-common",
},
entry_name = {
Hebr = "Hebr-common",
},
sort_key = {
Hebr = "Hebr-common",
},
}
}


Line 2,674: Line 2,599:
"alv-yor",
"alv-yor",
"Latn, Arab",
"Latn, Arab",
entry_name = {
strip_diacritics = {
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron}
},
},
Line 2,717: Line 2,642:
"bnt-ngu",
"bnt-ngu",
"Latn",
"Latn",
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron},
}
}


return require("Module:languages").finalizeData(m, "language")
return require("Module:languages").finalizeData(m, "language")