Module:languages/data/2: Difference between revisions
No edit summary |
Undid revision 89976079 by Chuck Entz (talk) had absolutely no effect- might as well revert to be on the safe side Tag: Reverted |
||
| Line 20: | Line 20: | ||
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz" | s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz" | ||
s["ka- | s["ka-stripdiacritics"] = {remove_diacritics = c.circ} | ||
s["no-sortkey"] = { | s["no-sortkey"] = { | ||
| Line 31: | Line 31: | ||
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc | s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc | ||
s[" | s["sa-Deva-stripdiacritics"] = { -- Don't use remove_diacritics for accent marks, as १ and ३ should also be removed if (and only if) they carry any. | ||
from = {"[१३]?[" .. c.anudatta .. c.udatta .. c.dsvarita .. c.tsvarita .. "]+"}, | |||
to = {""}, | |||
} | |||
s["tk- | s["tg-stripdiacritics"] = {remove_diacritics = c.grave .. c.acute} | ||
s["tk-stripdiacritics"] = {remove_diacritics = c.macron} | |||
local m = {} | local m = {} | ||
| Line 42: | Line 47: | ||
"cus-eas", | "cus-eas", | ||
"Latn, Ethi", | "Latn, Ethi", | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.acute}, | Latn = {remove_diacritics = c.acute}, | ||
}, | }, | ||
| Line 54: | Line 59: | ||
translit = { | translit = { | ||
Cyrl = "ab-translit", | Cyrl = "ab-translit", | ||
Geor | -- Geor translit in [[Module:scripts/data]] | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
| Line 60: | Line 65: | ||
Cyrl = s["cau-Cyrl-displaytext"] | Cyrl = s["cau-Cyrl-displaytext"] | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.acute, | remove_diacritics = c.acute, | ||
| Line 66: | Line 71: | ||
to = {"а"}, | to = {"а"}, | ||
}, | }, | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-stripdiacritics"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 90: | Line 95: | ||
29572, | 29572, | ||
"ira-cen", | "ira-cen", | ||
"Avst, Gujr", | "Avst, Gujr, Deva", | ||
translit = { | translit = { | ||
Avst = "Avst-translit" | Avst = "Avst-translit" | ||
| Line 137: | Line 142: | ||
13955, | 13955, | ||
"sem-arb", | "sem-arb", | ||
"Arab, Hebr, Syrc, Brai", | "Arab, Hebr, Syrc, Brai, Nbat", | ||
translit = { | translit = { | ||
Arab = "ar-translit" | Arab = "ar-translit" | ||
}, | }, | ||
strip_diacritics = { | |||
Arab = "ar-stripdiacritics", | |||
Arab = "ar- | |||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 176: | Line 175: | ||
Cyrl = s["cau-Cyrl-displaytext"], | Cyrl = s["cau-Cyrl-displaytext"], | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = s["cau-Cyrl- | Cyrl = s["cau-Cyrl-stripdiacritics"], | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-stripdiacritics"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 202: | Line 201: | ||
ancestors = "trk-oat", | ancestors = "trk-oat", | ||
dotted_dotless_i = true, | dotted_dotless_i = true, | ||
strip_diacritics = { | |||
Latn = { | Latn = { | ||
from = {"ʼ"}, | from = {"ʼ"}, | ||
| Line 208: | Line 207: | ||
}, | }, | ||
["fa-Arab"] = { | ["fa-Arab"] = { | ||
module = "ar- | module = "ar-stripdiacritics", | ||
["from"] = { | ["from"] = { | ||
"ۆ", | "ۆ", | ||
| Line 267: | Line 266: | ||
"zle", | "zle", | ||
"Cyrl, Latn", | "Cyrl, Latn", | ||
ancestors = "zle- | ancestors = "zle-mbe", | ||
translit = { | translit = { | ||
Cyrl = "be-translit", | Cyrl = "be-translit", | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 293: | Line 292: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя", | ||
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž", | Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž", | ||
| Line 307: | Line 306: | ||
ancestors = "cu-bgm", | ancestors = "cu-bgm", | ||
translit = "bg-translit", | translit = "bg-translit", | ||
strip_diacritics = { | |||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | ||
| Line 315: | Line 314: | ||
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | ||
}, | }, | ||
standard_chars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc, | |||
} | } | ||
| Line 363: | Line 362: | ||
"Tibt", -- sometimes Deva? | "Tibt", -- sometimes Deva? | ||
ancestors = "xct", | ancestors = "xct", | ||
override_translit = true, | override_translit = true, | ||
display_text | -- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | ||
} | } | ||
| Line 389: | Line 385: | ||
ancestors = "roa-oca", | ancestors = "roa-oca", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"}, | ||
standard_chars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc, | |||
} | } | ||
| Line 405: | Line 401: | ||
Cyrl = s["cau-Cyrl-displaytext"] | Cyrl = s["cau-Cyrl-displaytext"] | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = s["cau-Cyrl- | Cyrl = s["cau-Cyrl-stripdiacritics"], | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-stripdiacritics"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 438: | Line 434: | ||
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]} | to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]} | ||
}, | }, | ||
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc, | |||
} | } | ||
| Line 461: | Line 457: | ||
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc, | |||
} | } | ||
| Line 468: | Line 464: | ||
35499, | 35499, | ||
"zls", | "zls", | ||
"Cyrs, Glag", | "Cyrs, Glag, Zname", | ||
translit = { | translit = { | ||
Cyrs = "Cyrs-translit", | Cyrs = "Cyrs-translit", | ||
Glag = "Glag-translit" | Glag = "Glag-translit" | ||
}, | }, | ||
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 506: | Line 497: | ||
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]} | to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]} | ||
}, | }, | ||
standard_chars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc, | |||
} | } | ||
| Line 521: | Line 512: | ||
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]} | to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]} | ||
}, | }, | ||
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc, | |||
} | } | ||
| Line 529: | Line 520: | ||
"gmw-hgm", | "gmw-hgm", | ||
"Latn, Latf, Brai", | "Latn, Latf, Brai", | ||
ancestors = " | ancestors = "de-ear", | ||
sort_key = { | sort_key = { | ||
Latn = s["de-Latn-sortkey"], | Latn = s["de-Latn-sortkey"], | ||
Latf = s["de-Latn-sortkey"], | Latf = s["de-Latn-sortkey"], | ||
}, | }, | ||
standard_chars = { | |||
Latn = s["de-Latn-standardchars"], | Latn = s["de-Latn-standardchars"], | ||
Latf = s["de-Latn-standardchars"], | Latf = s["de-Latn-standardchars"], | ||
| Line 551: | Line 542: | ||
Diak = "Diak-translit", | Diak = "Diak-translit", | ||
}, | }, | ||
ancestors = "dv-old", | |||
override_translit = true, | override_translit = true, | ||
} | } | ||
| Line 560: | Line 552: | ||
"Tibt", | "Tibt", | ||
ancestors = "xct", | ancestors = "xct", | ||
override_translit = true, | override_translit = true, | ||
display_text | -- Tibt translit, display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | ||
} | } | ||
| Line 585: | Line 574: | ||
"Grek, Polyt, Brai", | "Grek, Polyt, Brai", | ||
ancestors = "el-kth", | ancestors = "el-kth", | ||
translit | translit = "el-translit", | ||
override_translit = true, | override_translit = true, | ||
-- Grek and Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
standard_chars = { | |||
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ", | Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 620: | Line 595: | ||
-- Many of these are needed for sorting language names. | -- Many of these are needed for sorting language names. | ||
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics, | remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics, | ||
-- These are found in | -- These are found in pagenames. | ||
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"}, | from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"}, | ||
to = {{ | to = {{ | ||
| Line 632: | Line 607: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 649: | Line 624: | ||
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]} | to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc, | |||
} | } | ||
| Line 666: | Line 641: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz", | Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 688: | Line 663: | ||
} | } | ||
}, | }, | ||
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc, | |||
} | } | ||
| Line 700: | Line 675: | ||
to = {"c" .. p[1], "n" .. p[1]} | to = {"c" .. p[1], "n" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc, | |||
} | } | ||
| Line 709: | Line 684: | ||
"fa-Arab, Hebr", | "fa-Arab, Hebr", | ||
ancestors = "fa-cls", | ancestors = "fa-cls", | ||
strip_diacritics = { | |||
["fa-Arab"] = { | ["fa-Arab"] = { | ||
-- character "ۂ" code U+06C2 to "ه" and "هٔ" | -- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | ||
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif | from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif | ||
to = {"ه", "ا"}, | to = {"ه", "ا"}, | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | ||
}, | }, | ||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 742: | Line 711: | ||
to = {"’"} | to = {"’"} | ||
}, | }, | ||
strip_diacritics = { -- used to indicate gemination of the next consonant | |||
remove_diacritics = "ˣ", | remove_diacritics = "ˣ", | ||
from = {"’"}, | from = {"’"}, | ||
| Line 748: | Line 717: | ||
}, | }, | ||
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö". | sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö". | ||
remove_diacritics = "':" .. c.diacritics, | remove_diacritics = "'’:" .. c.diacritics, | ||
remove_exceptions = { | remove_exceptions = { | ||
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ | "a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ | ||
| Line 757: | Line 726: | ||
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"} | to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"} | ||
}, | }, | ||
standard_chars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc, | |||
} | } | ||
| Line 776: | Line 745: | ||
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]} | to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]} | ||
}, | }, | ||
standard_chars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc, | |||
} | } | ||
| Line 784: | Line 753: | ||
"roa-oil", | "roa-oil", | ||
"Latn, Brai", | "Latn, Brai", | ||
ancestors = "frm", | ancestors = "frm", | ||
sort_key = { | sort_key = { | ||
Latn = s["roa-oil-sortkey"] | Latn = s["roa-oil-sortkey"] | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz", | Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 817: | Line 774: | ||
to = {"i"} | to = {"i"} | ||
}, | }, | ||
standard_chars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc, | |||
} | } | ||
| Line 831: | Line 788: | ||
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"} | to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"} | ||
}, | }, | ||
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc, | |||
} | } | ||
| Line 841: | Line 798: | ||
ancestors = "mga", | ancestors = "mga", | ||
sort_key = {remove_diacritics = c.grave .. c.acute}, | sort_key = {remove_diacritics = c.grave .. c.acute}, | ||
standard_chars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc, | |||
} | } | ||
| Line 854: | Line 811: | ||
to = {"n" .. p[1]} | to = {"n" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc, | |||
} | } | ||
m["gu"] = { | |||
m["gu"] = { | |||
"Gujarati", | "Gujarati", | ||
5137, | 5137, | ||
| Line 873: | Line 823: | ||
Gujr = "gu-translit", | Gujr = "gu-translit", | ||
}, | }, | ||
strip_diacritics = { | |||
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun}, | Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun}, | ||
Gujr = {remove_diacritics = "઼"}, | Gujr = {remove_diacritics = "઼"}, | ||
| Line 886: | Line 836: | ||
ancestors = "mga", | ancestors = "mga", | ||
sort_key = {remove_diacritics = c.cedilla .. "-"}, | sort_key = {remove_diacritics = c.cedilla .. "-"}, | ||
standard_chars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc, | |||
} | } | ||
| Line 894: | Line 844: | ||
"cdc-wst", | "cdc-wst", | ||
"Latn, Arab", | "Latn, Arab", | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron} | ||
}, | }, | ||
| Line 911: | Line 861: | ||
"Hebr, Phnx, Brai, Samr", | "Hebr, Phnx, Brai, Samr", | ||
ancestors = "he-med", | ancestors = "he-med", | ||
display_text | -- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | ||
-- Samr strip_diacritics, sort_key in [[Module:scripts/data]] | |||
-- Phnx translit in [[Module:scripts/data]] (NOTE: not present before, presumably an accidental omission) | |||
} | } | ||
| Line 932: | Line 874: | ||
Deva = "hi-translit" | Deva = "hi-translit" | ||
}, | }, | ||
standard_chars = { | |||
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰", | Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰", | ||
c.punc | c.punc | ||
| Line 982: | Line 924: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz", | Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz", | ||
c.punc | c.punc | ||
| Line 994: | Line 936: | ||
"Armn, Brai", | "Armn, Brai", | ||
ancestors = "axm", | ancestors = "axm", | ||
-- Armn translit in [[Module:scripts/data]] | |||
override_translit = true, | override_translit = true, | ||
strip_diacritics = { | |||
Armn = { | Armn = { | ||
remove_diacritics = "՛՜՞՟", | remove_diacritics = "՛՜՞՟", | ||
| Line 1,039: | Line 979: | ||
"Latn", | "Latn", | ||
ancestors = "ms", | ancestors = "ms", | ||
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc, | |||
} | } | ||
| Line 1,048: | Line 988: | ||
"Latn", | "Latn", | ||
type = "appendix-constructed", | type = "appendix-constructed", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ}, | |||
} | } | ||
| Line 1,056: | Line 996: | ||
"alv-igb", | "alv-igb", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.macron}, | |||
sort_key = { | sort_key = { | ||
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"}, | from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"}, | ||
| Line 1,104: | Line 1,044: | ||
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]} | to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]} | ||
}, | }, | ||
standard_chars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc, | |||
} | } | ||
| Line 1,114: | Line 1,054: | ||
ancestors = "roa-oit", | ancestors = "roa-oit", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove}, | ||
standard_chars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc, | |||
} | } | ||
| Line 1,137: | Line 1,077: | ||
link_tr = true, | link_tr = true, | ||
display_text = s["jpx-displaytext"], | display_text = s["jpx-displaytext"], | ||
strip_diacritics = s["jpx-stripdiacritics"], | |||
sort_key = s["jpx-sortkey"], | sort_key = s["jpx-sortkey"], | ||
} | } | ||
| Line 1,145: | Line 1,085: | ||
33549, | 33549, | ||
"poz", | "poz", | ||
"Latn, Java", | "Latn, Java, Arab", | ||
ancestors = "kaw", | ancestors = "kaw", | ||
translit = { | translit = { | ||
| Line 1,151: | Line 1,091: | ||
}, | }, | ||
link_tr = true, | link_tr = true, | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê | Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê | ||
}, | }, | ||
| Line 1,168: | Line 1,108: | ||
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian | "Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian | ||
ancestors = "ka-mid", | ancestors = "ka-mid", | ||
-- Geor, Geok translit in [[Module:scripts/data]] | |||
override_translit = true, | override_translit = true, | ||
strip_diacritics = { | |||
Geor = s["ka-stripdiacritics"], | |||
Geok = s["ka-stripdiacritics"], | |||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 1,231: | Line 1,162: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя", | Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя", | ||
c.punc | c.punc | ||
| Line 1,263: | Line 1,194: | ||
"Knda, Tutg", | "Knda, Tutg", | ||
ancestors = "dra-mkn", | ancestors = "dra-mkn", | ||
-- Knda translit in [[Module:scripts/data]] | |||
} | } | ||
| Line 1,277: | Line 1,206: | ||
Kore = "ko-translit", | Kore = "ko-translit", | ||
}, | }, | ||
-- Kore strip_diacritics in [[Module:scripts/data]] | |||
} | } | ||
| Line 1,287: | Line 1,214: | ||
"ssa-sah", | "ssa-sah", | ||
"Latn, Arab", | "Latn, Arab", | ||
-- the sortkey and | -- the sortkey and strip_diacritics are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve} | ||
}, | }, | ||
| Line 1,307: | Line 1,234: | ||
["ks-Arab"] = "ks-Arab-translit", | ["ks-Arab"] = "ks-Arab-translit", | ||
Deva = "ks-Deva-translit", | Deva = "ks-Deva-translit", | ||
Shrd | -- Shrd translit in [[Module:scripts/data]] | ||
}, | }, | ||
} | } | ||
-- "kv" | -- "kv" is treated as "koi", "kpv", see [[WT:LT]] | ||
m["kw"] = { | m["kw"] = { | ||
| Line 1,346: | Line 1,273: | ||
397, | 397, | ||
"itc-laf", | "itc-laf", | ||
"Latn", | "Latn, Ital", | ||
ancestors = "itc-ola", | ancestors = "itc-ola", | ||
-- Ital translit in [[Module:scripts/data]] (NOTE: formerly not present, probably an accidental omission) | |||
display_text = { | display_text = { | ||
Latn = s["itc-Latn-displaytext"] | Latn = s["itc-Latn-displaytext"] | ||
}, | }, | ||
strip_diacritics = { | |||
Latn = s["itc-Latn- | Latn = s["itc-Latn-stripdiacritics"] | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
Latn = s["itc-Latn-sortkey"] | Latn = s["itc-Latn-sortkey"] | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx", | Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx", | ||
c.punc | c.punc | ||
| Line 1,382: | Line 1,310: | ||
"bnt-nyg", | "bnt-nyg", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.acute .. c.circ}, | |||
sort_key = { | sort_key = { | ||
from = {"ŋ"}, | from = {"ŋ"}, | ||
| Line 1,413: | Line 1,341: | ||
9211, | 9211, | ||
"tai-swe", | "tai-swe", | ||
"Laoo", | "Laoo", -- also Tai Noi/Lao Buhan script | ||
translit = "lo-translit", | translit = "lo-translit", | ||
sort_key = "Laoo-sortkey", | sort_key = "Laoo-sortkey", | ||
standard_chars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc, | |||
} | } | ||
| Line 1,426: | Line 1,354: | ||
ancestors = "olt", | ancestors = "olt", | ||
display_text = "lt-common", | display_text = "lt-common", | ||
strip_diacritics = "lt-common", | |||
sort_key = "lt-common", | sort_key = "lt-common", | ||
standard_chars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 1,443: | Line 1,371: | ||
"bat-eas", | "bat-eas", | ||
"Latn", | "Latn", | ||
strip_diacritics = { | |||
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient. | -- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient. | ||
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde}, | from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde}, | ||
| Line 1,452: | Line 1,380: | ||
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 1,474: | Line 1,402: | ||
m["mi"] = { | m["mi"] = { | ||
" | "Māori", | ||
36451, | 36451, | ||
"poz-pep", | "poz-pep", | ||
| Line 1,481: | Line 1,409: | ||
remove_diacritics = c.macron, | remove_diacritics = c.macron, | ||
from = {"ng", "wh"}, | from = {"ng", "wh"}, | ||
to = {" | to = {"n" .. p[1], "w" .. p[1]} | ||
}, | }, | ||
} | } | ||
| Line 1,492: | Line 1,420: | ||
ancestors = "cu", | ancestors = "cu", | ||
translit = { | translit = { | ||
Cyrl = "mk-translit" | Cyrl = "mk-translit", | ||
-- FIXME: formerly no translit specified for Polyt; unclear if the default [[Module:grc-translit]] is | |||
-- acceptable, so we disable it for now | |||
Polyt | Polyt = false, | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.acute, | remove_diacritics = c.acute, | ||
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"} | remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"} | ||
}, | }, | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 1,511: | Line 1,438: | ||
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]} | to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]} | ||
}, | }, | ||
}, | }, | ||
-- Polyt display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
standard_chars = { | |||
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш", | Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш", | ||
c.punc | c.punc | ||
| Line 1,524: | Line 1,451: | ||
"dra-mal", | "dra-mal", | ||
"Mlym", | "Mlym", | ||
override_translit = true, | override_translit = true, | ||
-- Mlym translit in [[Module:scripts/data]] | |||
} | } | ||
| Line 1,536: | Line 1,463: | ||
translit = { | translit = { | ||
Cyrl = "mn-translit", | Cyrl = "mn-translit", | ||
Mong | -- Mong translit in [[Module:scripts/data]] | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
display_text | -- Mong display_text and strip_diacritics in [[Module:scripts/data]] | ||
strip_diacritics = { | |||
Cyrl = {remove_diacritics = c.grave .. c.acute}, | Cyrl = {remove_diacritics = c.grave .. c.acute}, | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 1,553: | Line 1,477: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 1,560: | Line 1,484: | ||
} | } | ||
-- "mo" | -- "mo" is treated as "ro", see [[WT:LT]] | ||
m["mr"] = { | m["mr"] = { | ||
| Line 1,572: | Line 1,496: | ||
Modi = "mr-Modi-translit", | Modi = "mr-Modi-translit", | ||
}, | }, | ||
strip_diacritics = { | |||
Deva = { | Deva = { | ||
from = {"च़", "ज़", "झ़"}, | from = {"च़", "ज़", "झ़"}, | ||
| Line 1,586: | Line 1,510: | ||
"Latn, ms-Arab", | "Latn, ms-Arab", | ||
ancestors = "ms-cla", | ancestors = "ms-cla", | ||
standard_chars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | ||
c.punc | c.punc | ||
| Line 1,601: | Line 1,525: | ||
to = {"’"} | to = {"’"} | ||
}, | }, | ||
strip_diacritics = { | |||
from = {"’"}, | from = {"’"}, | ||
to = {"'"}, | to = {"'"}, | ||
| Line 1,651: | Line 1,575: | ||
ancestors = "gmq-mno, da", -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion | ancestors = "gmq-mno, da", -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standard_chars = s["no-standardchars"], | |||
} | } | ||
| Line 1,659: | Line 1,583: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 1,688: | Line 1,612: | ||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"}, | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"}, | ||
}, | }, | ||
standard_chars = { | |||
Latn = " | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZzÄäËëÏïÖöÜü", | ||
Brai = c.braille, | Brai = c.braille, | ||
c.punc | c.punc | ||
| Line 1,701: | Line 1,625: | ||
"Latn", | "Latn", | ||
ancestors = "gmq-mno", | ancestors = "gmq-mno", | ||
strip_diacritics = { | |||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
}, | }, | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standard_chars = s["no-standardchars"], | |||
} | } | ||
| Line 1,715: | Line 1,639: | ||
ancestors = "gmq-mno", | ancestors = "gmq-mno", | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standard_chars = s["no-standardchars"], | |||
} | } | ||
| Line 1,723: | Line 1,647: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 1,751: | Line 1,675: | ||
"bnt-nys", | "bnt-nys", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.acute .. c.circ}, | |||
sort_key = { | sort_key = { | ||
from = {"ng'"}, | from = {"ng'"}, | ||
| Line 1,764: | Line 1,688: | ||
"Latn, Hebr", | "Latn, Hebr", | ||
ancestors = "pro", | ancestors = "pro", | ||
sort_key = { | sort_key = { | ||
Latn = { | Latn = { | ||
| Line 1,776: | Line 1,694: | ||
to = {"%1h"} | to = {"%1h"} | ||
}, | }, | ||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 1,817: | Line 1,735: | ||
translit = { | translit = { | ||
Cyrl = "os-translit", | Cyrl = "os-translit", | ||
Geor | -- Geor translit in [[Module:scripts/data]] | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
| Line 1,830: | Line 1,748: | ||
}, | }, | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 1,854: | Line 1,772: | ||
"inc-pan", | "inc-pan", | ||
"Guru, pa-Arab", | "Guru, pa-Arab", | ||
translit = { | translit = { | ||
Guru = "Guru-translit", | Guru = "Guru-translit", | ||
["pa-Arab"] = "pa-Arab-translit", | ["pa-Arab"] = "pa-Arab-translit", | ||
}, | }, | ||
strip_diacritics = { | |||
["pa-Arab"] = { | ["pa-Arab"] = { | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna, | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna, | ||
| Line 1,875: | Line 1,792: | ||
ancestors = "sa", | ancestors = "sa", | ||
translit = { | translit = { | ||
Brah | -- Brah translit in [[Module:scripts/data]] | ||
Deva = "sa-translit", | Deva = "sa-translit", | ||
Beng = "pi-translit", | Beng = "pi-translit", | ||
| Line 1,886: | Line 1,803: | ||
Cakm = "Cakm-translit", | Cakm = "Cakm-translit", | ||
}, | }, | ||
strip_diacritics = { | |||
Thai = { | Thai = { | ||
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | ||
to = {"ิํ", "ฐ", "ญ"} | to = {"ิํ", "ฐ", "ญ"} | ||
}, | }, | ||
remove_diacritics = c.VS01 | Mymr = { | ||
remove_diacritics = c.VS01, | |||
}, | |||
}, | }, | ||
sort_key = { -- FIXME: This needs to be converted into the current standardized format. | sort_key = { -- FIXME: This needs to be converted into the current standardized format. | ||
| Line 1,909: | Line 1,828: | ||
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]} | to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]} | ||
}, | }, | ||
standard_chars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc, | |||
} | } | ||
| Line 1,917: | Line 1,836: | ||
"ira-pat", | "ira-pat", | ||
"ps-Arab", | "ps-Arab", | ||
strip_diacritics = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef}, | |||
} | } | ||
| Line 1,932: | Line 1,851: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz", | Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 1,947: | Line 1,866: | ||
m["rm"] = { | m["rm"] = { | ||
" | "Romansh", | ||
13199, | 13199, | ||
"roa-rhe", | "roa-rhe", | ||
ancestors = "rm-old", | |||
"Latn", | "Latn", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e}, | ||
| Line 1,973: | Line 1,893: | ||
}, | }, | ||
}, | }, | ||
-- Cyrs strip_diacritics, sort_key in [[Module:scripts/data]]; presumably not present | |||
standard_chars = { | |||
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz", | Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz", | ||
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя", | Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя", | ||
| Line 1,995: | Line 1,916: | ||
}, | }, | ||
}, | }, | ||
strip_diacritics = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer, | remove_diacritics = c.grave .. c.acute .. c.diaer, | ||
| Line 2,006: | Line 1,927: | ||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer, | remove_diacritics = c.grave .. c.acute .. c.diaer, | ||
from = { | from = { | ||
"і", "ѣ", "ѳ", "ѵ" | |||
"і", "ѣ", "ѳ", "ѵ" | |||
}, | }, | ||
to = { | to = { | ||
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3] | "и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3] | ||
} | } | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 2,029: | Line 1,947: | ||
"bnt-glb", | "bnt-glb", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,040: | Line 1,958: | ||
Beng = "sa-Beng-translit", | Beng = "sa-Beng-translit", | ||
["as-Beng"] = "sa-Beng-translit", | ["as-Beng"] = "sa-Beng-translit", | ||
Brah | -- Brah translit in [[Module:scripts/data]] | ||
Deva = "sa-translit", | Deva = "sa-translit", | ||
Gujr = "sa-Gujr-translit", | Gujr = "sa-Gujr-translit", | ||
| Line 2,052: | Line 1,970: | ||
Mlym = "sa-Mlym-translit", | Mlym = "sa-Mlym-translit", | ||
Modi = "sa-Modi-translit", | Modi = "sa-Modi-translit", | ||
Mong | -- Mong, mnc-Mong, xwo-Mong translit in [[Module:scripts/data]] | ||
-- NOTE: Formerly used xal-translit for transliterating xwo-Mong but that only handles Cyrillic; it has | |||
-- code to transliterate xwo-Mong but it's broken so I've replaced it with the default xwo-translit. | |||
Mymr = "pi-translit", | Mymr = "pi-translit", | ||
Orya = "sa-Orya-translit", | Orya = "sa-Orya-translit", | ||
Shrd | -- Shrd translit in [[Module:scripts/data]] | ||
Sidd | -- Sidd translit in [[Module:scripts/data]] | ||
Sinh = "si-translit", | Sinh = "si-translit", | ||
Taml = "sa-Taml-translit", | Taml = "sa-Taml-translit", | ||
Telu = "sa-Telu-translit", | Telu = "sa-Telu-translit", | ||
Thai = "pi-translit", | Thai = "pi-translit", | ||
Tibt | -- Tibt translit in [[Module:scripts/data]] | ||
}, | }, | ||
display_text | -- Mong display_text and strip_diacritics in [[Module:scripts/data]] | ||
-- Tibt display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
strip_diacritics = { | |||
Deva = s["sa-Deva-stripdiacritics"], | |||
Mymr = { | |||
remove_diacritics = c.VS01, | |||
}, | |||
Thai = { | Thai = { | ||
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | ||
to = {"ิํ", "ฐ", "ญ"} | to = {"ิํ", "ฐ", "ญ"} | ||
}, | }, | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
Deva = s["sa-Deva-stripdiacritics"], -- until we have a proper Sanskrit sorting algorithm. | |||
{ -- | Lana = { -- Tai Tham | ||
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ" | from = {"ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ"}, | ||
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", | to = {"ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}, | ||
}, | |||
Laoo = "Laoo-sortkey", | |||
Latn = { | |||
from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ"}, | |||
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~"}, | |||
}, | |||
Mymr = { | |||
remove_diacritics = c.VS01, | |||
}, | }, | ||
Thai = "Thai-sortkey", | |||
-- FIXME: The previous sort key which mixed all scripts removed ZWJ; I don't know which script(s) this was | |||
-- intended for and there are no other languages which remove it in the sort key AFAIK. If it needs to be | |||
-- removed, specify the script(s) it needs to be removed under or add handling for the "all" script that applies | |||
-- regardless of script. | |||
--all = { | |||
-- remove_diacritics = c.ZWJ, | |||
--}, | |||
}, | }, | ||
} | } | ||
| Line 2,092: | Line 2,025: | ||
"roa-sou", | "roa-sou", | ||
"Latn", | "Latn", | ||
ancestors = "sc-old", | |||
} | } | ||
| Line 2,100: | Line 2,034: | ||
"sd-Arab, Deva, Sind, Khoj", | "sd-Arab, Deva, Sind, Khoj", | ||
translit = { | translit = { | ||
Sind = "Sind-translit" | Sind = "Sind-translit", | ||
["sd-Arab"] = "sd-Arab-translit" | |||
}, | }, | ||
strip_diacritics = { | |||
["sd-Arab"] = { | ["sd-Arab"] = { | ||
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | ||
| Line 2,120: | Line 2,055: | ||
to = {"ˈ"} | to = {"ˈ"} | ||
}, | }, | ||
strip_diacritics = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"}, | |||
sort_key = { | sort_key = { | ||
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, | from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, | ||
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standard_chars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,140: | Line 2,075: | ||
9301, | 9301, | ||
"zls", | "zls", | ||
"Latn, Cyrl, Glag", | "Latn, Cyrl, Glag, Arab", | ||
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1 | ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1 | ||
wikimedia_codes = "sh, bs, hr, sr", | wikimedia_codes = "sh, bs, hr, sr", | ||
strip_diacritics = { | |||
Latn = { | Latn = { | ||
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve, | remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve, | ||
| Line 2,167: | Line 2,102: | ||
}, | }, | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž", | Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž", | ||
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш", | Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш", | ||
| Line 2,190: | Line 2,125: | ||
ancestors = "zlw-osk", | ancestors = "zlw-osk", | ||
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron}, | sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron}, | ||
standard_chars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,198: | Line 2,133: | ||
"zls", | "zls", | ||
"Latn", | "Latn", | ||
strip_diacritics = { | |||
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow, | remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow, | ||
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"}, | remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"}, | ||
| Line 2,210: | Line 2,145: | ||
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}, | to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}, | ||
}, | }, | ||
standard_chars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,225: | Line 2,160: | ||
"bnt-sho", | "bnt-sho", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.acute}, | |||
} | } | ||
| Line 2,233: | Line 2,168: | ||
"cus-som", | "cus-som", | ||
"Latn, Arab, Osma", | "Latn, Arab, Osma", | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | ||
}, | }, | ||
| Line 2,245: | Line 2,180: | ||
translit = { | translit = { | ||
Elba = "Elba-translit", | Elba = "Elba-translit", | ||
Vith = "Vith-translit", | |||
}, | }, | ||
display_text | -- Grek display_text, sort_key in [[Module:scripts/data]] | ||
strip_diacritics = { | |||
Latn = { | Latn = { | ||
remove_diacritics = c.acute, | remove_diacritics = c.acute .. c.circ .. c.macron, | ||
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'}, | from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'}, | ||
}, | }, | ||
Grek = { -- Diacritic removal from Grek- | Grek = { -- Diacritic removal from Grek-stripdiacritics excluded. | ||
from = | from = m_langdata.chars_substitutions["Grek-stripdiacritics"].from, | ||
to = | to = m_langdata.chars_substitutions["Grek-stripdiacritics"].to, | ||
}, | }, | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
Latn = { | Latn = { | ||
remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron, | remove_diacritics = c.acute .. c.circ .. c.macron .. c.tilde .. c.breve .. c.caron, | ||
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'}, | from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'}, | ||
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]}, | to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]}, | ||
} | } | ||
-- TODO: Grek | -- TODO: Grek if the default sort key is unsuitable | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz", | Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz", | ||
c.punc | c.punc | ||
| Line 2,278: | Line 2,212: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,286: | Line 2,220: | ||
"bnt-sts", | "bnt-sts", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,296: | Line 2,230: | ||
ancestors = "osn", | ancestors = "osn", | ||
translit = { | translit = { | ||
Sund = " | Sund = "Sund-translit" | ||
}, | }, | ||
} | } | ||
| Line 2,312: | Line 2,246: | ||
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"} | to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"} | ||
}, | }, | ||
standard_chars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc, | |||
} | } | ||
| Line 2,357: | Line 2,291: | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
strip_diacritics = { | |||
Cyrl = s["tg- | Cyrl = s["tg-stripdiacritics"], | ||
Latn = s["tg- | Latn = s["tg-stripdiacritics"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 2,395: | Line 2,329: | ||
"trk-ogz", | "trk-ogz", | ||
"Latn, Cyrl, Arab", | "Latn, Cyrl, Arab", | ||
strip_diacritics = { | |||
Latn = s["tk- | Latn = s["tk-stripdiacritics"], | ||
Cyrl = s["tk- | Cyrl = s["tk-stripdiacritics"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 2,409: | Line 2,343: | ||
}, | }, | ||
}, | }, | ||
ancestors = "trk-eog", | |||
} | } | ||
| Line 2,420: | Line 2,355: | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | ||
}, | }, | ||
standard_chars = { | |||
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy", | Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy", | ||
c.punc | c.punc | ||
| Line 2,444: | Line 2,379: | ||
"poz-ton", | "poz-ton", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.acute}, | |||
sort_key = {remove_diacritics = c.macron}, | sort_key = {remove_diacritics = c.macron}, | ||
} | } | ||
| Line 2,468: | Line 2,403: | ||
} | } | ||
}, | }, | ||
standard_chars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc, | |||
} | } | ||
| Line 2,484: | Line 2,419: | ||
"Cyrl, Latn, tt-Arab", | "Cyrl, Latn, tt-Arab", | ||
translit = { | translit = { | ||
Cyrl = "tt-translit" | Cyrl = "tt-translit", | ||
["tt-Arab"] = "tt-translit" | |||
}, | }, | ||
override_translit = true, | --override_translit = true, -- enable override until Module code can detect Russian loans such as [[аэропорт]] | ||
dotted_dotless_i = true, | dotted_dotless_i = true, | ||
sort_key = { | sort_key = { | ||
| Line 2,506: | Line 2,442: | ||
} | } | ||
-- "tw" | -- "tw" is treated as "ak", see [[WT:LT]] | ||
m["ty"] = { | m["ty"] = { | ||
| Line 2,533: | Line 2,469: | ||
"zle", | "zle", | ||
"Cyrl", | "Cyrl", | ||
ancestors = "zle- | ancestors = "zle-muk", | ||
translit = "uk-translit", | translit = "uk-translit", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute}, | |||
sort_key = { | sort_key = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 2,547: | Line 2,483: | ||
} | } | ||
}, | }, | ||
standard_chars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""), -- Exclude apostrophe. | |||
} | } | ||
| Line 2,558: | Line 2,494: | ||
["ur-Arab"] = "ur-translit" | ["ur-Arab"] = "ur-translit" | ||
}, | }, | ||
strip_diacritics = { | |||
["ur-Arab"] = { | ["ur-Arab"] = { | ||
-- character "ۂ" code U+06C2 to "ه" and "هٔ" | -- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | ||
from = {"هٔ", "ۂ", "ٱ"}, | from = {"هٔ", "ۂ", "ٱ"}, | ||
to = {"ہ", "ہ", "ا"}, | to = {"ہ", "ہ", "ا"}, | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef | ||
}, | }, | ||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
standard_chars = { | |||
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے", | ["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے", | ||
c.punc, | c.punc, | ||
| Line 2,597: | Line 2,527: | ||
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]} | to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]} | ||
}, | }, | ||
}, | |||
strip_diacritics = { | |||
["fa-Arab"] = "ar-stripdiacritics", | |||
}, | }, | ||
} | } | ||
| Line 2,646: | Line 2,579: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,658: | Line 2,591: | ||
Hebr = "yi-translit", | Hebr = "yi-translit", | ||
}, | }, | ||
-- Hebr display_text, strip_diacritics, sort_key in [[Module:scripts/data]] | |||
} | } | ||
| Line 2,674: | Line 2,599: | ||
"alv-yor", | "alv-yor", | ||
"Latn, Arab", | "Latn, Arab", | ||
strip_diacritics = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron} | Latn = {remove_diacritics = c.grave .. c.acute .. c.macron} | ||
}, | }, | ||
| Line 2,717: | Line 2,642: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
strip_diacritics = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
return require("Module:languages").finalizeData(m, "language") | return require("Module:languages").finalizeData(m, "language") | ||