Module:languages/data/2: Difference between revisions
m 1 revision imported |
No edit summary Tag: Manual revert |
||
| Line 20: | Line 20: | ||
s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz" | s["de-Latn-standardchars"] = "AaÄäBbCcDdEeFfGgHhIiJjKkLlMmNnOoÖöPpQqRrSsẞßTtUuÜüVvWwXxYyZz" | ||
s["ka- | s["ka-entryname"] = {remove_diacritics = c.circ} | ||
s["no-sortkey"] = { | s["no-sortkey"] = { | ||
| Line 31: | Line 31: | ||
s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc | s["no-standardchars"] = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc | ||
s[" | s["tg-entryname"] = {remove_diacritics = c.grave .. c.acute} | ||
} | |||
s["tk-entryname"] = {remove_diacritics = c.macron} | |||
s["tk- | |||
local m = {} | local m = {} | ||
| Line 47: | Line 42: | ||
"cus-eas", | "cus-eas", | ||
"Latn, Ethi", | "Latn, Ethi", | ||
entry_name = { | |||
Latn = {remove_diacritics = c.acute}, | Latn = {remove_diacritics = c.acute}, | ||
}, | }, | ||
| Line 59: | Line 54: | ||
translit = { | translit = { | ||
Cyrl = "ab-translit", | Cyrl = "ab-translit", | ||
- | Geor = "Geor-translit", | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
| Line 65: | Line 60: | ||
Cyrl = s["cau-Cyrl-displaytext"] | Cyrl = s["cau-Cyrl-displaytext"] | ||
}, | }, | ||
entry_name = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.acute, | remove_diacritics = c.acute, | ||
| Line 71: | Line 66: | ||
to = {"а"}, | to = {"а"}, | ||
}, | }, | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-entryname"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 95: | Line 90: | ||
29572, | 29572, | ||
"ira-cen", | "ira-cen", | ||
"Avst, Gujr | "Avst, Gujr", | ||
translit = { | translit = { | ||
Avst = "Avst-translit" | Avst = "Avst-translit" | ||
| Line 142: | Line 137: | ||
13955, | 13955, | ||
"sem-arb", | "sem-arb", | ||
"Arab, Hebr, Syrc, Brai | "Arab, Hebr, Syrc, Brai", | ||
translit = { | translit = { | ||
Arab = "ar-translit" | Arab = "ar-translit" | ||
}, | }, | ||
display_text = { | |||
Arab = "ar- | Hebr = "Hebr-common", | ||
}, | |||
entry_name = { | |||
Arab = "ar-entryname", | |||
Hebr = "Hebr-common", | |||
}, | |||
sort_key = { | |||
Hebr = "Hebr-common", | |||
}, | }, | ||
} | } | ||
| Line 175: | Line 176: | ||
Cyrl = s["cau-Cyrl-displaytext"], | Cyrl = s["cau-Cyrl-displaytext"], | ||
}, | }, | ||
entry_name = { | |||
Cyrl = s["cau-Cyrl- | Cyrl = s["cau-Cyrl-entryname"], | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-entryname"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 201: | Line 202: | ||
ancestors = "trk-oat", | ancestors = "trk-oat", | ||
dotted_dotless_i = true, | dotted_dotless_i = true, | ||
entry_name = { | |||
Latn = { | Latn = { | ||
from = {"ʼ"}, | from = {"ʼ"}, | ||
| Line 207: | Line 208: | ||
}, | }, | ||
["fa-Arab"] = { | ["fa-Arab"] = { | ||
module = "ar- | module = "ar-entryname", | ||
["from"] = { | ["from"] = { | ||
"ۆ", | "ۆ", | ||
| Line 266: | Line 267: | ||
"zle", | "zle", | ||
"Cyrl, Latn", | "Cyrl, Latn", | ||
ancestors = "zle- | ancestors = "zle-obe", | ||
translit = { | translit = { | ||
Cyrl = "be-translit", | Cyrl = "be-translit", | ||
}, | }, | ||
entry_name = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 292: | Line 293: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзІіЙйКкЛлМмНнОоПпРрСсТтУуЎўФфХхЦцЧчШшЫыЬьЭэЮюЯя", | ||
Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž", | Latn = "AaBbCcĆćČčDdEeFfGgHhIiJjKkLlŁłMmNnŃńOoPpRrSsŚśŠšTtUuŬŭVvYyZzŹźŽž", | ||
| Line 306: | Line 307: | ||
ancestors = "cu-bgm", | ancestors = "cu-bgm", | ||
translit = "bg-translit", | translit = "bg-translit", | ||
entry_name = { | |||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | ||
| Line 314: | Line 315: | ||
remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | remove_exceptions = {"%f[^%z%s]ѝ%f[%z%s]"}, | ||
}, | }, | ||
standardChars = "АаБбВвГгДдЕеЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЬьЮюЯя" .. c.punc, | |||
} | } | ||
| Line 362: | Line 363: | ||
"Tibt", -- sometimes Deva? | "Tibt", -- sometimes Deva? | ||
ancestors = "xct", | ancestors = "xct", | ||
translit = "Tibt-translit", | |||
override_translit = true, | override_translit = true, | ||
-- Tibt | display_text = s["Tibt-displaytext"], | ||
entry_name = s["Tibt-entryname"], | |||
sort_key = "Tibt-sortkey", | |||
} | } | ||
| Line 385: | Line 389: | ||
ancestors = "roa-oca", | ancestors = "roa-oca", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.diaer .. c.cedilla .. "·"}, | ||
standardChars = "AaÀàBbCcÇçDdEeÉéÈèFfGgHhIiÍíÏïJjLlMmNnOoÓóÒòPpQqRrSsTtUuÚúÜüVvXxYyZz·" .. c.punc, | |||
} | } | ||
| Line 401: | Line 405: | ||
Cyrl = s["cau-Cyrl-displaytext"] | Cyrl = s["cau-Cyrl-displaytext"] | ||
}, | }, | ||
entry_name = { | |||
Cyrl = s["cau-Cyrl- | Cyrl = s["cau-Cyrl-entryname"], | ||
Latn = s["cau-Latn- | Latn = s["cau-Latn-entryname"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 434: | Line 438: | ||
to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]} | to = {"c" .. p[1], "g" .. p[1], "s" .. p[1], "s" .. p[2]} | ||
}, | }, | ||
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìÏïJjLlMmNnOoÒòPpQqRrSsTtUuÙùÜüVvZz" .. c.punc, | |||
} | } | ||
| Line 457: | Line 461: | ||
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "e" .. p[1], "e" .. p[2], "h" .. p[1], "i" .. p[1], "n" .. p[1], "o" .. p[1], "r" .. p[1], "s" .. p[1], "t" .. p[1], "u" .. p[1], "u" .. p[2], "y" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standardChars = "AaÁáBbCcČčDdĎďEeÉéĚěFfGgHhIiÍíJjKkLlMmNnŇňOoÓóPpRrŘřSsŠšTtŤťUuÚúŮůVvYyÝýZzŽž" .. c.punc, | |||
} | } | ||
| Line 464: | Line 468: | ||
35499, | 35499, | ||
"zls", | "zls", | ||
"Cyrs, Glag | "Cyrs, Glag", | ||
translit = { | translit = { | ||
Cyrs = "Cyrs-translit", | Cyrs = "Cyrs-translit", | ||
Glag = "Glag-translit" | Glag = "Glag-translit" | ||
}, | }, | ||
- | entry_name = { | ||
Cyrs = s["Cyrs-entryname"] | |||
}, | |||
sort_key = { | |||
Cyrs = s["Cyrs-sortkey"] | |||
}, | |||
} | } | ||
| Line 497: | Line 506: | ||
to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]} | to = {"c" .. p[1], "d" .. p[1], "f" .. p[1], "g" .. p[1], "l" .. p[1], "p" .. p[1], "r" .. p[1], "t" .. p[1]} | ||
}, | }, | ||
standardChars = "ÂâAaBbCcDdEeÊêFfGgHhIiÎîLlMmNnOoÔôPpRrSsTtUuÛûWwŴŵYyŶŷ" .. c.punc, | |||
} | } | ||
| Line 512: | Line 521: | ||
to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]} | to = {"z" .. p[1], "z" .. p[2], "z" .. p[3]} | ||
}, | }, | ||
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÆæØøÅå" .. c.punc, | |||
} | } | ||
| Line 520: | Line 529: | ||
"gmw-hgm", | "gmw-hgm", | ||
"Latn, Latf, Brai", | "Latn, Latf, Brai", | ||
ancestors = " | ancestors = "gmh", | ||
sort_key = { | sort_key = { | ||
Latn = s["de-Latn-sortkey"], | Latn = s["de-Latn-sortkey"], | ||
Latf = s["de-Latn-sortkey"], | Latf = s["de-Latn-sortkey"], | ||
}, | }, | ||
standardChars = { | |||
Latn = s["de-Latn-standardchars"], | Latn = s["de-Latn-standardchars"], | ||
Latf = s["de-Latn-standardchars"], | Latf = s["de-Latn-standardchars"], | ||
| Line 542: | Line 551: | ||
Diak = "Diak-translit", | Diak = "Diak-translit", | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
} | } | ||
| Line 552: | Line 560: | ||
"Tibt", | "Tibt", | ||
ancestors = "xct", | ancestors = "xct", | ||
translit = "Tibt-translit", | |||
override_translit = true, | override_translit = true, | ||
-- Tibt | display_text = s["Tibt-displaytext"], | ||
entry_name = s["Tibt-entryname"], | |||
sort_key = "Tibt-sortkey", | |||
} | } | ||
| Line 574: | Line 585: | ||
"Grek, Polyt, Brai", | "Grek, Polyt, Brai", | ||
ancestors = "el-kth", | ancestors = "el-kth", | ||
translit = "el-translit", | translit = { | ||
Grek = "el-translit", | |||
Polyt = "grc-translit", | |||
}, | |||
override_translit = true, | override_translit = true, | ||
-- Grek | display_text = { | ||
Grek = s["Grek-displaytext"], | |||
Polyt = s["Polyt-displaytext"], | |||
}, | |||
entry_name = { | |||
Grek = s["Grek-entryname"], | |||
Polyt = s["Polyt-entryname"], | |||
}, | |||
sort_key = { | |||
Grek = s["Grek-sortkey"], | |||
Polyt = s["Polyt-sortkey"], | |||
}, | |||
standardChars = { | |||
Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ", | Grek = "΅·ͺ΄ΑαΆάΒβΓγΔδΕεέΈΖζΗηΉήΘθΙιΊίΪϊΐΚκΛλΜμΝνΞξΟοΌόΠπΡρΣσςΤτΥυΎύΫϋΰΦφΧχΨψΩωΏώ", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 595: | Line 620: | ||
-- Many of these are needed for sorting language names. | -- Many of these are needed for sorting language names. | ||
remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics, | remove_diacritics = "'\"%-%.,%s·ʻʼ" .. c.diacritics, | ||
-- These are found in | -- These are found in entry names. | ||
from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"}, | from = {"[ɒæ🅱¢©ᴄðđəǝɜɡħʜıɨłŋɲøɔœꝑꝓꝕßʋ]"}, | ||
to = {{ | to = {{ | ||
| Line 607: | Line 632: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 624: | Line 649: | ||
to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]} | to = {"c" .. p[1], "g" .. p[1], "h" .. p[1], "j" .. p[1], "s" .. p[1], "u" .. p[1]} | ||
}, | }, | ||
standardChars = "AaBbCcĈĉDdEeFfGgĜĝHhĤĥIiJjĴĵKkLlMmNnOoPpRrSsŜŝTtUuŬŭVvZz" .. c.punc, | |||
} | } | ||
| Line 641: | Line 666: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz", | Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 663: | Line 688: | ||
} | } | ||
}, | }, | ||
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvÕõÄäÖöÜü" .. c.punc, | |||
} | } | ||
| Line 675: | Line 700: | ||
to = {"c" .. p[1], "n" .. p[1]} | to = {"c" .. p[1], "n" .. p[1]} | ||
}, | }, | ||
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnÑñOoPpRrSsTtUuXxZz" .. c.punc, | |||
} | } | ||
| Line 684: | Line 709: | ||
"fa-Arab, Hebr", | "fa-Arab, Hebr", | ||
ancestors = "fa-cls", | ancestors = "fa-cls", | ||
display_text = { | |||
Hebr = "Hebr-common", | |||
}, | |||
entry_name = { | |||
["fa-Arab"] = { | ["fa-Arab"] = { | ||
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | -- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | ||
from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif | from = {"هٔ", "ٱ"}, -- character "ۂ" code U+06C2 to "ه"; hamzatu l-waṣli to a regular alif | ||
to = {"ه", "ا"}, | to = {"ه", "ا"}, | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | ||
}, | }, | ||
Hebr = "Hebr-common", | |||
}, | |||
sort_key = { | |||
Hebr = "Hebr-common", | |||
}, | }, | ||
} | } | ||
| Line 711: | Line 742: | ||
to = {"’"} | to = {"’"} | ||
}, | }, | ||
entry_name = { -- used to indicate gemination of the next consonant | |||
remove_diacritics = "ˣ", | remove_diacritics = "ˣ", | ||
from = {"’"}, | from = {"’"}, | ||
| Line 717: | Line 748: | ||
}, | }, | ||
sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö". | sort_key = { -- [[Appendix:Finnish alphabet#Collation]] + "aͤ" and "oͤ" as historical variants of "ä" and "ö". | ||
remove_diacritics = "' | remove_diacritics = "':" .. c.diacritics, | ||
remove_exceptions = { | remove_exceptions = { | ||
"a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ | "a[" .. c.ringabove .. c.diaer .. c.small_e .. "]", -- åäaͤ | ||
| Line 726: | Line 757: | ||
to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"} | to = {"ae", "d", "l", "n", "oe", "ss", "th", "y", "z" .. p[1], "ä", "ö", "ö", "%1"} | ||
}, | }, | ||
standardChars = "AaBbDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvYyÄäÖö" .. c.punc, | |||
} | } | ||
| Line 745: | Line 776: | ||
to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]} | to = {"a" .. p[1], "d" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2]} | ||
}, | }, | ||
standardChars = "AaÁáBbDdÐðEeFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvYyÝýÆæØø" .. c.punc, | |||
} | } | ||
| Line 753: | Line 784: | ||
"roa-oil", | "roa-oil", | ||
"Latn, Brai", | "Latn, Brai", | ||
display_text = { | |||
Latn = { | |||
from = {"'"}, | |||
to = {"’"} | |||
}, | |||
}, | |||
entry_name = { | |||
Latn = { | |||
from = {"’"}, | |||
to = {"'"}, | |||
}, | |||
}, | |||
ancestors = "frm", | ancestors = "frm", | ||
sort_key = { | sort_key = { | ||
Latn = s["roa-oil-sortkey"] | Latn = s["roa-oil-sortkey"] | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz", | Latn = "AaÀàÂâBbCcÇçDdEeÉéÈèÊêËëFfGgHhIiÎîÏïJjLlMmNnOoÔôŒœPpQqRrSsTtUuÙùÛûÜüVvXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 774: | Line 817: | ||
to = {"i"} | to = {"i"} | ||
}, | }, | ||
standardChars = "AaâäàÆæBbCcDdEeéêëèFfGgHhIiïìYyỳJjKkLlMmNnOoôöòPpRrSsTtUuúûüùVvWwZz" .. c.punc, | |||
} | } | ||
| Line 788: | Line 831: | ||
to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"} | to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"} | ||
}, | }, | ||
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíLlMmNnOoÓóPpRrSsTtUuÚúVv" .. c.punc, | |||
} | } | ||
| Line 798: | Line 841: | ||
ancestors = "mga", | ancestors = "mga", | ||
sort_key = {remove_diacritics = c.grave .. c.acute}, | sort_key = {remove_diacritics = c.grave .. c.acute}, | ||
standardChars = "AaÀàBbCcDdEeÈèFfGgHhIiÌìLlMmNnOoÒòPpRrSsTtUuÙù" .. c.punc, | |||
} | } | ||
| Line 811: | Line 854: | ||
to = {"n" .. p[1]} | to = {"n" .. p[1]} | ||
}, | }, | ||
standardChars = "AaÁáBbCcDdEeÉéFfGgHhIiÍíÏïLlMmNnÑñOoÓóPpQqRrSsTtUuÚúÜüVvXxZz" .. c.punc, | |||
} | |||
m["gn"] = { | |||
"Guaraní", | |||
35876, | |||
"tup-gua", | |||
"Latn", | |||
} | } | ||
| Line 823: | Line 873: | ||
Gujr = "gu-translit", | Gujr = "gu-translit", | ||
}, | }, | ||
entry_name = { | |||
Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun}, | Arab = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.kasra .. c.shadda .. c.sukun}, | ||
Gujr = {remove_diacritics = "઼"}, | Gujr = {remove_diacritics = "઼"}, | ||
| Line 836: | Line 886: | ||
ancestors = "mga", | ancestors = "mga", | ||
sort_key = {remove_diacritics = c.cedilla .. "-"}, | sort_key = {remove_diacritics = c.cedilla .. "-"}, | ||
standardChars = "AaBbCcÇçDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwYy" .. c.punc, | |||
} | } | ||
| Line 844: | Line 894: | ||
"cdc-wst", | "cdc-wst", | ||
"Latn, Arab", | "Latn, Arab", | ||
entry_name = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.macron} | ||
}, | }, | ||
| Line 861: | Line 911: | ||
"Hebr, Phnx, Brai, Samr", | "Hebr, Phnx, Brai, Samr", | ||
ancestors = "he-med", | ancestors = "he-med", | ||
- | display_text = { | ||
- | Hebr = "Hebr-common", | ||
-- | }, | ||
entry_name = { | |||
Hebr = "Hebr-common", | |||
Samr = s["Samr-entryname"], | |||
}, | |||
sort_key = { | |||
Hebr = "Hebr-common", | |||
Samr = s["Samr-sortkey"], | |||
}, | |||
} | } | ||
| Line 874: | Line 932: | ||
Deva = "hi-translit" | Deva = "hi-translit" | ||
}, | }, | ||
standardChars = { | |||
Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰", | Deva = "अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहत्रज्ञक्षक़ख़ग़ज़झ़ड़ढ़फ़काखागाघाङाचाछाजाझाञाटाठाडाढाणाताथादाधानापाफाबाभामायारालावाशाषासाहात्राज्ञाक्षाक़ाख़ाग़ाज़ाझ़ाड़ाढ़ाफ़ाकिखिगिघिङिचिछिजिझिञिटिठिडिढिणितिथिदिधिनिपिफिबिभिमियिरिलिविशिषिसिहित्रिज्ञिक्षिक़िख़िग़िज़िझ़िड़िढ़िफ़िकीखीगीघीङीचीछीजीझीञीटीठीडीढीणीतीथीदीधीनीपीफीबीभीमीयीरीलीवीशीषीसीहीत्रीज्ञीक्षीक़ीख़ीग़ीज़ीझ़ीड़ीढ़ीफ़ीकुखुगुघुङुचुछुजुझुञुटुठुडुढुणुतुथुदुधुनुपुफुबुभुमुयुरुलुवुशुषुसुहुत्रुज्ञुक्षुक़ुख़ुग़ुज़ुझ़ुड़ुढ़ुफ़ुकूखूगूघूङूचूछूजूझूञूटूठूडूढूणूतूथूदूधूनूपूफूबूभूमूयूरूलूवूशूषूसूहूत्रूज्ञूक्षूक़ूख़ूग़ूज़ूझ़ूड़ूढ़ूफ़ूकेखेगेघेङेचेछेजेझेञेटेठेडेढेणेतेथेदेधेनेपेफेबेभेमेयेरेलेवेशेषेसेहेत्रेज्ञेक्षेक़ेख़ेग़ेज़ेझ़ेड़ेढ़ेफ़ेकैखैगैघैङैचैछैजैझैञैटैठैडैढैणैतैथैदैधैनैपैफैबैभैमैयैरैलैवैशैषैसैहैत्रैज्ञैक्षैक़ैख़ैग़ैज़ैझ़ैड़ैढ़ैफ़ैकोखोगोघोङोचोछोजोझोञोटोठोडोढोणोतोथोदोधोनोपोफोबोभोमोयोरोलोवोशोषोसोहोत्रोज्ञोक्षोक़ोख़ोग़ोज़ोझ़ोड़ोढ़ोफ़ोकौखौगौघौङौचौछौजौझौञौटौठौडौढौणौतौथौदौधौनौपौफौबौभौमौयौरौलौवौशौषौसौहौत्रौज्ञौक्षौक़ौख़ौग़ौज़ौझ़ौड़ौढ़ौफ़ौक्ख्ग्घ्ङ्च्छ्ज्झ्ञ्ट्ठ्ड्ढ्ण्त्थ्द्ध्न्प्फ्ब्भ्म्य्र्ल्व्श्ष्स्ह्त्र्ज्ञ्क्ष्क़्ख़्ग़्ज़्झ़्ड़्ढ़्फ़्।॥०१२३४५६७८९॰", | ||
c.punc | c.punc | ||
| Line 924: | Line 982: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz", | Latn = "AaÁáBbCcDdEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóÖöŐőPpQqRrSsTtUuÚúÜüŰűVvWwXxYyZz", | ||
c.punc | c.punc | ||
| Line 936: | Line 994: | ||
"Armn, Brai", | "Armn, Brai", | ||
ancestors = "axm", | ancestors = "axm", | ||
- | translit = { | ||
Armn = "Armn-translit" | |||
}, | |||
override_translit = true, | override_translit = true, | ||
entry_name = { | |||
Armn = { | Armn = { | ||
remove_diacritics = "՛՜՞՟", | remove_diacritics = "՛՜՞՟", | ||
| Line 979: | Line 1,039: | ||
"Latn", | "Latn", | ||
ancestors = "ms", | ancestors = "ms", | ||
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz" .. c.punc, | |||
} | } | ||
| Line 988: | Line 1,048: | ||
"Latn", | "Latn", | ||
type = "appendix-constructed", | type = "appendix-constructed", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ}, | |||
} | } | ||
| Line 996: | Line 1,056: | ||
"alv-igb", | "alv-igb", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.macron}, | |||
sort_key = { | sort_key = { | ||
from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"}, | from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"}, | ||
| Line 1,044: | Line 1,104: | ||
to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]} | to = {"a" .. p[1], "d" .. p[1], "e" .. p[1], "i" .. p[1], "o" .. p[1], "u" .. p[1], "y" .. p[1], "z" .. p[1], "z" .. p[2], "z" .. p[3]} | ||
}, | }, | ||
standardChars = "AaÁáBbDdÐðEeÉéFfGgHhIiÍíJjKkLlMmNnOoÓóPpRrSsTtUuÚúVvXxYyÝýÞþÆæÖö" .. c.punc, | |||
} | } | ||
| Line 1,054: | Line 1,114: | ||
ancestors = "roa-oit", | ancestors = "roa-oit", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.ringabove}, | ||
standardChars = "AaÀàBbCcDdEeÈèÉéFfGgHhIiÌìLlMmNnOoÒòPpQqRrSsTtUuÙùVvZz" .. c.punc, | |||
} | } | ||
| Line 1,077: | Line 1,137: | ||
link_tr = true, | link_tr = true, | ||
display_text = s["jpx-displaytext"], | display_text = s["jpx-displaytext"], | ||
entry_name = s["jpx-entryname"], | |||
sort_key = s["jpx-sortkey"], | sort_key = s["jpx-sortkey"], | ||
} | } | ||
| Line 1,085: | Line 1,145: | ||
33549, | 33549, | ||
"poz", | "poz", | ||
"Latn, Java | "Latn, Java", | ||
ancestors = "kaw", | ancestors = "kaw", | ||
translit = { | translit = { | ||
| Line 1,091: | Line 1,151: | ||
}, | }, | ||
link_tr = true, | link_tr = true, | ||
entry_name = { | |||
Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê | Latn = {remove_diacritics = c.circ} -- Modern jv don't use ê | ||
}, | }, | ||
| Line 1,108: | Line 1,168: | ||
"Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian | "Geor, Geok, Hebr", -- Hebr is used to write Judeo-Georgian | ||
ancestors = "ka-mid", | ancestors = "ka-mid", | ||
- | translit = { | ||
Geor = "Geor-translit", | |||
Geok = "Geok-translit", | |||
}, | |||
override_translit = true, | override_translit = true, | ||
display_text = { | |||
Geor = s["ka- | Hebr = "Hebr-common", | ||
Geok = s["ka- | }, | ||
entry_name = { | |||
Geor = s["ka-entryname"], | |||
Geok = s["ka-entryname"], | |||
Hebr = "Hebr-common", | |||
}, | }, | ||
- | sort_key = { | ||
Hebr = "Hebr-common", | |||
} | |||
} | } | ||
| Line 1,162: | Line 1,231: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя", | Cyrl = "АаӘәБбВвГгҒғДдЕеЁёЖжЗзИиЙйКкҚқЛлМмНнҢңОоӨөПпРрСсТтУуҰұҮүФфХхҺһЦцЧчШшЩщЪъЫыІіЬьЭэЮюЯя", | ||
c.punc | c.punc | ||
| Line 1,194: | Line 1,263: | ||
"Knda, Tutg", | "Knda, Tutg", | ||
ancestors = "dra-mkn", | ancestors = "dra-mkn", | ||
- | translit = { | ||
Knda = "kn-translit", | |||
}, | |||
} | } | ||
| Line 1,206: | Line 1,277: | ||
Kore = "ko-translit", | Kore = "ko-translit", | ||
}, | }, | ||
entry_name = { | |||
Kore = s["Kore-entryname"], | |||
}, | |||
} | } | ||
| Line 1,214: | Line 1,287: | ||
"ssa-sah", | "ssa-sah", | ||
"Latn, Arab", | "Latn, Arab", | ||
-- the sortkey and | -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically | ||
entry_name = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.breve} | ||
}, | }, | ||
| Line 1,234: | Line 1,307: | ||
["ks-Arab"] = "ks-Arab-translit", | ["ks-Arab"] = "ks-Arab-translit", | ||
Deva = "ks-Deva-translit", | Deva = "ks-Deva-translit", | ||
- | Shrd = "Shrd-translit", | ||
}, | }, | ||
} | } | ||
-- "kv" | -- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT | ||
m["kw"] = { | m["kw"] = { | ||
| Line 1,273: | Line 1,346: | ||
397, | 397, | ||
"itc-laf", | "itc-laf", | ||
"Latn | "Latn", | ||
ancestors = "itc-ola", | ancestors = "itc-ola", | ||
display_text = { | display_text = { | ||
Latn = s["itc-Latn-displaytext"] | Latn = s["itc-Latn-displaytext"] | ||
}, | }, | ||
entry_name = { | |||
Latn = s["itc-Latn- | Latn = s["itc-Latn-entryname"] | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
Latn = s["itc-Latn-sortkey"] | Latn = s["itc-Latn-sortkey"] | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx", | Latn = "AaBbCcDdEeFfGgHhIiLlMmNnOoPpQqRrSsTtUuVvXx", | ||
c.punc | c.punc | ||
| Line 1,310: | Line 1,382: | ||
"bnt-nyg", | "bnt-nyg", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.acute .. c.circ}, | |||
sort_key = { | sort_key = { | ||
from = {"ŋ"}, | from = {"ŋ"}, | ||
| Line 1,341: | Line 1,413: | ||
9211, | 9211, | ||
"tai-swe", | "tai-swe", | ||
"Laoo", | "Laoo", | ||
translit = "lo-translit", | translit = "lo-translit", | ||
sort_key = "Laoo-sortkey", | sort_key = "Laoo-sortkey", | ||
standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. c.punc, | |||
} | } | ||
| Line 1,354: | Line 1,426: | ||
ancestors = "olt", | ancestors = "olt", | ||
display_text = "lt-common", | display_text = "lt-common", | ||
entry_name = "lt-common", | |||
sort_key = "lt-common", | sort_key = "lt-common", | ||
standardChars = "AaĄąBbCcČčDdEeĘęĖėFfGgHhIiĮįYyJjKkLlMmNnOoPpRrSsŠšTtUuŲųŪūVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 1,371: | Line 1,443: | ||
"bat-eas", | "bat-eas", | ||
"Latn", | "Latn", | ||
entry_name = { | |||
-- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient. | -- This attempts to convert vowels with tone marks to vowels either with or without macrons. Specifically, there should be no macrons if the vowel is part of a diphthong (including resonant diphthongs such pìrksts -> pirksts not #pīrksts). What we do is first convert the vowel + tone mark to a vowel + tilde in a decomposed fashion, then remove the tilde in diphthongs, then convert the remaining vowel + tilde sequences to macroned vowels, then delete any other tilde. We leave already-macroned vowels alone: Both e.g. ar and ār occur before consonants. FIXME: This still might not be sufficient. | ||
from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde}, | from = {"([Ee])" .. c.cedilla, "[" .. c.grave .. c.circ .. c.tilde .."]", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .. "?([^aAeEiIoOuU])", "([aAeEiIoOuU])" .. c.tilde .."?([lrnmuiLRNMUI])" .. c.tilde .."?$", "([iI])" .. c.tilde .. "?([eE])" .. c.tilde .. "?", "([aAeEiIuU])" .. c.tilde, c.tilde}, | ||
| Line 1,380: | Line 1,452: | ||
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "g" .. p[1], "i" .. p[1], "k" .. p[1], "l" .. p[1], "n" .. p[1], "s" .. p[1], "u" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standardChars = "AaĀāBbCcČčDdEeĒēFfGgĢģHhIiĪīJjKkĶķLlĻļMmNnŅņOoPpRrSsŠšTtUuŪūVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 1,402: | Line 1,474: | ||
m["mi"] = { | m["mi"] = { | ||
" | "Maori", | ||
36451, | 36451, | ||
"poz-pep", | "poz-pep", | ||
| Line 1,409: | Line 1,481: | ||
remove_diacritics = c.macron, | remove_diacritics = c.macron, | ||
from = {"ng", "wh"}, | from = {"ng", "wh"}, | ||
to = {" | to = {"z" .. p[1], "z" .. p[2]} | ||
}, | }, | ||
} | } | ||
| Line 1,420: | Line 1,492: | ||
ancestors = "cu", | ancestors = "cu", | ||
translit = { | translit = { | ||
Cyrl = "mk-translit", | Cyrl = "mk-translit" | ||
}, | |||
display_text = { | |||
Polyt = s["Polyt-displaytext"] | |||
}, | }, | ||
entry_name = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.acute, | remove_diacritics = c.acute, | ||
remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"} | remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"} | ||
}, | }, | ||
Polyt = s["Polyt-entryname"], | |||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 1,438: | Line 1,511: | ||
to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]} | to = {"д" .. p[1], "з" .. p[1], "и" .. p[1], "л" .. p[1], "н" .. p[1], "т" .. p[1], "ч" .. p[1]} | ||
}, | }, | ||
Polyt = s["Polyt-sortkey"], | |||
}, | }, | ||
standardChars = { | |||
Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш", | Cyrl = "АаБбВвГгДдЃѓЕеЖжЗзЅѕИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЌќУуФфХхЦцЧчЏџШш", | ||
c.punc | c.punc | ||
| Line 1,451: | Line 1,524: | ||
"dra-mal", | "dra-mal", | ||
"Mlym", | "Mlym", | ||
translit = "ml-translit", | |||
override_translit = true, | override_translit = true, | ||
} | } | ||
| Line 1,463: | Line 1,536: | ||
translit = { | translit = { | ||
Cyrl = "mn-translit", | Cyrl = "mn-translit", | ||
- | Mong = "Mong-translit", | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
display_text = { | |||
Mong = s["Mong-displaytext"] | |||
}, | |||
entry_name = { | |||
Cyrl = {remove_diacritics = c.grave .. c.acute}, | Cyrl = {remove_diacritics = c.grave .. c.acute}, | ||
Mong = s["Mong-entryname"], | |||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 1,477: | Line 1,553: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйЛлМмНнОоӨөРрСсТтУуҮүХхЦцЧчШшЫыЬьЭэЮюЯя—", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 1,484: | Line 1,560: | ||
} | } | ||
-- "mo" | -- "mo" IS TREATED AS "ro", SEE WT:LT | ||
m["mr"] = { | m["mr"] = { | ||
| Line 1,496: | Line 1,572: | ||
Modi = "mr-Modi-translit", | Modi = "mr-Modi-translit", | ||
}, | }, | ||
entry_name = { | |||
Deva = { | Deva = { | ||
from = {"च़", "ज़", "झ़"}, | from = {"च़", "ज़", "झ़"}, | ||
| Line 1,510: | Line 1,586: | ||
"Latn, ms-Arab", | "Latn, ms-Arab", | ||
ancestors = "ms-cla", | ancestors = "ms-cla", | ||
standardChars = { | |||
Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | ||
c.punc | c.punc | ||
| Line 1,525: | Line 1,601: | ||
to = {"’"} | to = {"’"} | ||
}, | }, | ||
entry_name = { | |||
from = {"’"}, | from = {"’"}, | ||
to = {"'"}, | to = {"'"}, | ||
| Line 1,575: | Line 1,651: | ||
ancestors = "gmq-mno, da", -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion | ancestors = "gmq-mno, da", -- da as an (but not the) ancestor of nb was agreed on - do not change without discussion | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standardChars = s["no-standardchars"], | |||
} | } | ||
| Line 1,583: | Line 1,659: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 1,612: | Line 1,688: | ||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"}, | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.tilde .. c.diaer .. c.ringabove .. c.cedilla .. "'"}, | ||
}, | }, | ||
standardChars = { | |||
Latn = " | Latn = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
c.punc | c.punc | ||
| Line 1,625: | Line 1,701: | ||
"Latn", | "Latn", | ||
ancestors = "gmq-mno", | ancestors = "gmq-mno", | ||
entry_name = { | |||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
}, | }, | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standardChars = s["no-standardchars"], | |||
} | } | ||
| Line 1,639: | Line 1,715: | ||
ancestors = "gmq-mno", | ancestors = "gmq-mno", | ||
sort_key = s["no-sortkey"], | sort_key = s["no-sortkey"], | ||
standardChars = s["no-standardchars"], | |||
} | } | ||
| Line 1,647: | Line 1,723: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 1,675: | Line 1,751: | ||
"bnt-nys", | "bnt-nys", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.acute .. c.circ}, | |||
sort_key = { | sort_key = { | ||
from = {"ng'"}, | from = {"ng'"}, | ||
| Line 1,688: | Line 1,764: | ||
"Latn, Hebr", | "Latn, Hebr", | ||
ancestors = "pro", | ancestors = "pro", | ||
display_text = { | |||
Hebr = "Hebr-common", | |||
}, | |||
entry_name = { | |||
Hebr = "Hebr-common", | |||
}, | |||
sort_key = { | sort_key = { | ||
Latn = { | Latn = { | ||
| Line 1,694: | Line 1,776: | ||
to = {"%1h"} | to = {"%1h"} | ||
}, | }, | ||
Hebr = "Hebr-common", | |||
}, | }, | ||
} | } | ||
| Line 1,735: | Line 1,817: | ||
translit = { | translit = { | ||
Cyrl = "os-translit", | Cyrl = "os-translit", | ||
- | Geor = "Geor-translit", | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
| Line 1,748: | Line 1,830: | ||
}, | }, | ||
}, | }, | ||
entry_name = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 1,772: | Line 1,854: | ||
"inc-pan", | "inc-pan", | ||
"Guru, pa-Arab", | "Guru, pa-Arab", | ||
ancestors = "inc-opa", | |||
translit = { | translit = { | ||
Guru = "Guru-translit", | Guru = "Guru-translit", | ||
["pa-Arab"] = "pa-Arab-translit", | ["pa-Arab"] = "pa-Arab-translit", | ||
}, | }, | ||
entry_name = { | |||
["pa-Arab"] = { | ["pa-Arab"] = { | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna, | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna, | ||
| Line 1,792: | Line 1,875: | ||
ancestors = "sa", | ancestors = "sa", | ||
translit = { | translit = { | ||
- | Brah = "Brah-translit", | ||
Deva = "sa-translit", | Deva = "sa-translit", | ||
Beng = "pi-translit", | Beng = "pi-translit", | ||
| Line 1,803: | Line 1,886: | ||
Cakm = "Cakm-translit", | Cakm = "Cakm-translit", | ||
}, | }, | ||
entry_name = { | |||
Thai = { | Thai = { | ||
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | ||
to = {"ิํ", "ฐ", "ญ"} | to = {"ิํ", "ฐ", "ญ"} | ||
}, | }, | ||
remove_diacritics = c.VS01 | |||
}, | }, | ||
sort_key = { -- FIXME: This needs to be converted into the current standardized format. | sort_key = { -- FIXME: This needs to be converted into the current standardized format. | ||
| Line 1,828: | Line 1,909: | ||
to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]} | to = {"a" .. p[1], "c" .. p[1], "e" .. p[1], "l" .. p[1], "n" .. p[1], "o" .. p[1], "s" .. p[1], "z" .. p[1], "z" .. p[2]} | ||
}, | }, | ||
standardChars = "AaĄąBbCcĆćDdEeĘęFfGgHhIiJjKkLlŁłMmNnŃńOoÓóPpRrSsŚśTtUuWwYyZzŹźŻż" .. c.punc, | |||
} | } | ||
| Line 1,836: | Line 1,917: | ||
"ira-pat", | "ira-pat", | ||
"ps-Arab", | "ps-Arab", | ||
entry_name = {remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.zwarakay .. c.superalef}, | |||
} | } | ||
| Line 1,851: | Line 1,932: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz", | Latn = "AaÁáÂâÃãBbCcÇçDdEeÉéÊêFfGgHhIiÍíJjLlMmNnOoÓóÔôÕõPpQqRrSsTtUuÚúVvXxZz", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 1,866: | Line 1,947: | ||
m["rm"] = { | m["rm"] = { | ||
" | "Romansch", | ||
13199, | 13199, | ||
"roa-rhe", | "roa-rhe", | ||
"Latn", | "Latn", | ||
sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e}, | sort_key = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.diaer .. c.small_e}, | ||
| Line 1,893: | Line 1,973: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz", | Latn = "AaĂăÂâBbCcDdEeFfGgHhIiÎîJjLlMmNnOoPpRrSsȘșTtȚțUuVvXxZz", | ||
Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя", | Cyrl = "АаБбВвГгДдЕеЖжӁӂЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЫыЬьЭэЮюЯя", | ||
| Line 1,916: | Line 1,995: | ||
}, | }, | ||
}, | }, | ||
entry_name = { | |||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer, | remove_diacritics = c.grave .. c.acute .. c.diaer, | ||
| Line 1,927: | Line 2,006: | ||
Cyrl = { | Cyrl = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer, | remove_diacritics = c.grave .. c.acute .. c.diaer, | ||
remove_exceptions = {"ё", "ѣ̈", "я̈"}, | |||
from = { | from = { | ||
"і", "ѣ", "ѳ", "ѵ" | "ё", "ѣ̈", "я̈", -- 2 chars | ||
"і", "ѣ", "ѳ", "ѵ" -- 1 char | |||
}, | }, | ||
to = { | to = { | ||
"е" .. p[1], "ь" .. p[2], "я" .. p[1], | |||
"и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3] | "и" .. p[1], "ь" .. p[1], "я" .. p[2], "я" .. p[3] | ||
} | } | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—", | Cyrl = "АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя—", | ||
Brai = c.braille, | Brai = c.braille, | ||
| Line 1,947: | Line 2,029: | ||
"bnt-glb", | "bnt-glb", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 1,958: | Line 2,040: | ||
Beng = "sa-Beng-translit", | Beng = "sa-Beng-translit", | ||
["as-Beng"] = "sa-Beng-translit", | ["as-Beng"] = "sa-Beng-translit", | ||
- | Brah = "Brah-translit", | ||
Deva = "sa-translit", | Deva = "sa-translit", | ||
Gujr = "sa-Gujr-translit", | Gujr = "sa-Gujr-translit", | ||
| Line 1,970: | Line 2,052: | ||
Mlym = "sa-Mlym-translit", | Mlym = "sa-Mlym-translit", | ||
Modi = "sa-Modi-translit", | Modi = "sa-Modi-translit", | ||
Mong = "Mong-translit", | |||
- | ["mnc-Mong"] = "mnc-translit", | ||
["xwo-Mong"] = "xal-translit", | |||
Mymr = "pi-translit", | Mymr = "pi-translit", | ||
Orya = "sa-Orya-translit", | Orya = "sa-Orya-translit", | ||
- | Shrd = "Shrd-translit", | ||
- | Sidd = "Sidd-translit", | ||
Sinh = "si-translit", | Sinh = "si-translit", | ||
Taml = "sa-Taml-translit", | Taml = "sa-Taml-translit", | ||
Telu = "sa-Telu-translit", | Telu = "sa-Telu-translit", | ||
Thai = "pi-translit", | Thai = "pi-translit", | ||
-- Tibt | Tibt = "Tibt-translit", | ||
}, | |||
display_text = { | |||
Mong = s["Mong-displaytext"], | |||
Tibt = s["Tibt-displaytext"], | |||
}, | }, | ||
entry_name = { | |||
Mong = s["Mong-entryname"], | |||
Tibt = s["Tibt-entryname"], | |||
Thai = { | Thai = { | ||
from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | from = {"ึ", u(0xF700), u(0xF70F)}, -- FIXME: Not clear what's going on with the PUA characters here. | ||
to = {"ิํ", "ฐ", "ญ"} | to = {"ิํ", "ฐ", "ญ"} | ||
}, | }, | ||
remove_diacritics = c.VS01 .. c.udatta .. c.anudatta | |||
}, | }, | ||
sort_key = { | sort_key = { | ||
Tibt = "Tibt-sortkey", | |||
{ -- FIXME: This needs to be converted into the current standardized format. | |||
from = {" | from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "m[" .. c.dotabove .. c.dotbelow .. "]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, | ||
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"}, | |||
to = {"a~", "i~", "u~", "d~", "l~", "l~~", "m~", "n~", "n~~", "n~~~", "r~", "r~~", "s~", "s~~", "t~" | |||
}, | }, | ||
}, | }, | ||
} | } | ||
| Line 2,025: | Line 2,092: | ||
"roa-sou", | "roa-sou", | ||
"Latn", | "Latn", | ||
} | } | ||
| Line 2,034: | Line 2,100: | ||
"sd-Arab, Deva, Sind, Khoj", | "sd-Arab, Deva, Sind, Khoj", | ||
translit = { | translit = { | ||
Sind = "Sind | Sind = "Sind-translit" | ||
}, | }, | ||
entry_name = { | |||
["sd-Arab"] = { | ["sd-Arab"] = { | ||
remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | remove_diacritics = c.kashida .. c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.superalef, | ||
| Line 2,055: | Line 2,120: | ||
to = {"ˈ"} | to = {"ˈ"} | ||
}, | }, | ||
entry_name = {remove_diacritics = c.macron .. c.dotbelow .. "'ˈ"}, | |||
sort_key = { | sort_key = { | ||
from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, | from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, | ||
to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]} | to = {"a" .. p[1], "c" .. p[1], "d" .. p[1], "n" .. p[1], "s" .. p[1], "t" .. p[1], "z" .. p[1]} | ||
}, | }, | ||
standardChars = "AaÁáBbCcČčDdĐđEeFfGgHhIiJjKkLlMmNnŊŋOoPpRrSsŠšTtŦŧUuVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,075: | Line 2,140: | ||
9301, | 9301, | ||
"zls", | "zls", | ||
"Latn, Cyrl, Glag | "Latn, Cyrl, Glag", | ||
ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1 | ietf_subtag = "hbs", -- ISO 639-3 code, since "sh" is deprecated from ISO 639-1 | ||
wikimedia_codes = "sh, bs, hr, sr", | wikimedia_codes = "sh, bs, hr, sr", | ||
entry_name = { | |||
Latn = { | Latn = { | ||
remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve, | remove_diacritics = c.grave .. c.acute .. c.tilde .. c.macron .. c.dgrave .. c.invbreve, | ||
| Line 2,102: | Line 2,167: | ||
}, | }, | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž", | Latn = "AaBbCcČčĆćDdĐđEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž", | ||
Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш", | Cyrl = "АаБбВвГгДдЂђЕеЖжЗзИиЈјКкЛлЉљМмНнЊњОоПпРрСсТтЋћУуФфХхЦцЧчЏџШш", | ||
| Line 2,125: | Line 2,190: | ||
ancestors = "zlw-osk", | ancestors = "zlw-osk", | ||
sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron}, | sort_key = {remove_diacritics = c.acute .. c.circ .. c.diaer .. c.caron}, | ||
standardChars = "AaÁáÄäBbCcČčDdĎďEeÉéFfGgHhIiÍíJjKkLlĹ弾MmNnŇňOoÓóÔôPpRrŔŕSsŠšTtŤťUuÚúVvYyÝýZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,133: | Line 2,198: | ||
"zls", | "zls", | ||
"Latn", | "Latn", | ||
entry_name = { | |||
remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow, | remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.dgrave .. c.invbreve .. c.dotbelow, | ||
remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"}, | remove_exceptions = {"Ć", "ć", "Ǵ", "ǵ", "Ś", "ś", "Ź", "ź"}, | ||
| Line 2,145: | Line 2,210: | ||
to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}, | to = {"a" .. p[1], "c" .. p[1], "c" .. p[2], "d" .. p[1], "e", "e" .. p[1], "g" .. p[1], "g" .. p[2], "i" .. p[1], "l", "o" .. p[1], "s" .. p[1], "s" .. p[2], "u" .. p[1], "z" .. p[1], "z" .. p[2]}, | ||
}, | }, | ||
standardChars = "AaBbCcČčDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsŠšTtUuVvZzŽž" .. c.punc, | |||
} | } | ||
| Line 2,160: | Line 2,225: | ||
"bnt-sho", | "bnt-sho", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.acute}, | |||
} | } | ||
| Line 2,168: | Line 2,233: | ||
"cus-som", | "cus-som", | ||
"Latn, Arab, Osma", | "Latn, Arab, Osma", | ||
entry_name = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | ||
}, | }, | ||
| Line 2,180: | Line 2,245: | ||
translit = { | translit = { | ||
Elba = "Elba-translit", | Elba = "Elba-translit", | ||
}, | }, | ||
display_text = { | |||
Grek = s["Grek-displaytext"], | |||
}, | |||
entry_name = { | |||
Latn = { | Latn = { | ||
remove_diacritics = c.acute | remove_diacritics = c.acute, | ||
from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'}, | from = {'^[ie] (%w)', '^të (%w)'}, to = {'%1', '%1'}, | ||
}, | }, | ||
Grek = { -- Diacritic removal from Grek- | Grek = { -- Diacritic removal from Grek-entryname excluded. | ||
from = | from = s["Grek-entryname"].from, | ||
to = | to = s["Grek-entryname"].to, | ||
}, | }, | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
Latn = { | Latn = { | ||
remove_diacritics = c.acute .. c.circ | remove_diacritics = c.acute .. c.circ .. c.tilde .. c.breve .. c.caron, | ||
from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'}, | from = {'^[ie] (%w)', '^të (%w)', 'ç', 'dh', 'ë', 'gj', 'll', 'nj', 'rr', 'sh', 'th', 'xh', 'zh'}, | ||
to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]}, | to = {'%1', '%1', 'c'..p[1], 'd'..p[1], 'e'..p[1], 'g'..p[1], 'l'..p[1], 'n'..p[1], 'r'..p[1], 's'..p[1], 't'..p[1], 'x'..p[1], 'z'..p[1]}, | ||
} | } | ||
-- TODO: Grek | -- TODO: Grek | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz", | Latn = "AaBbCcÇçDdEeËëFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvXxYyZz", | ||
c.punc | c.punc | ||
| Line 2,212: | Line 2,278: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,220: | Line 2,286: | ||
"bnt-sts", | "bnt-sts", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,230: | Line 2,296: | ||
ancestors = "osn", | ancestors = "osn", | ||
translit = { | translit = { | ||
Sund = " | Sund = "su-translit" | ||
}, | }, | ||
} | } | ||
| Line 2,246: | Line 2,312: | ||
to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"} | to = {"o", "ae", "oe", "ss", "z" .. p[1], "ä", "ö"} | ||
}, | }, | ||
standardChars = "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpRrSsTtUuVvXxYyÅåÄäÖö" .. c.punc, | |||
} | } | ||
| Line 2,291: | Line 2,357: | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
entry_name = { | |||
Cyrl = s["tg- | Cyrl = s["tg-entryname"], | ||
Latn = s["tg- | Latn = s["tg-entryname"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 2,329: | Line 2,395: | ||
"trk-ogz", | "trk-ogz", | ||
"Latn, Cyrl, Arab", | "Latn, Cyrl, Arab", | ||
entry_name = { | |||
Latn = s["tk- | Latn = s["tk-entryname"], | ||
Cyrl = s["tk- | Cyrl = s["tk-entryname"], | ||
}, | }, | ||
sort_key = { | sort_key = { | ||
| Line 2,343: | Line 2,409: | ||
}, | }, | ||
}, | }, | ||
} | } | ||
| Line 2,355: | Line 2,420: | ||
}, | }, | ||
override_translit = true, | override_translit = true, | ||
entry_name = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | Latn = {remove_diacritics = c.grave .. c.acute .. c.circ} | ||
}, | }, | ||
standardChars = { | |||
Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy", | Latn = "AaBbKkDdEeGgHhIiLlMmNnOoPpRrSsTtUuWwYy", | ||
c.punc | c.punc | ||
| Line 2,379: | Line 2,444: | ||
"poz-ton", | "poz-ton", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.acute}, | |||
sort_key = {remove_diacritics = c.macron}, | sort_key = {remove_diacritics = c.macron}, | ||
} | } | ||
| Line 2,403: | Line 2,468: | ||
} | } | ||
}, | }, | ||
standardChars = "AaÂâBbCcÇçDdEeFfGgĞğHhIıİiÎîJjKkLlMmNnOoÖöPpRrSsŞşTtUuÛûÜüVvYyZz" .. c.punc, | |||
} | } | ||
| Line 2,419: | Line 2,484: | ||
"Cyrl, Latn, tt-Arab", | "Cyrl, Latn, tt-Arab", | ||
translit = { | translit = { | ||
Cyrl | Cyrl = "tt-translit" | ||
}, | }, | ||
override_translit = true, | |||
dotted_dotless_i = true, | dotted_dotless_i = true, | ||
sort_key = { | sort_key = { | ||
| Line 2,442: | Line 2,506: | ||
} | } | ||
-- "tw" | -- "tw" IS TREATED AS "ak", SEE WT:LT | ||
m["ty"] = { | m["ty"] = { | ||
| Line 2,469: | Line 2,533: | ||
"zle", | "zle", | ||
"Cyrl", | "Cyrl", | ||
ancestors = "zle- | ancestors = "zle-ouk", | ||
translit = "uk-translit", | translit = "uk-translit", | ||
entry_name = {remove_diacritics = c.grave .. c.acute}, | |||
sort_key = { | sort_key = { | ||
remove_diacritics = c.grave .. c.acute, | remove_diacritics = c.grave .. c.acute, | ||
| Line 2,483: | Line 2,547: | ||
} | } | ||
}, | }, | ||
standardChars = "АаБбВвГгДдЕеЄєЖжЗзИиІіЇїЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЬьЮюЯя" .. c.punc:gsub("'", ""), -- Exclude apostrophe. | |||
} | } | ||
| Line 2,494: | Line 2,558: | ||
["ur-Arab"] = "ur-translit" | ["ur-Arab"] = "ur-translit" | ||
}, | }, | ||
display_text = { | |||
Hebr = "Hebr-common", | |||
}, | |||
entry_name = { | |||
["ur-Arab"] = { | ["ur-Arab"] = { | ||
-- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | -- character "ۂ" code U+06C2 to "ه" and "هٔ" (U+0647 + U+0654) to "ه"; hamzatu l-waṣli to a regular alif | ||
from = {"هٔ", "ۂ", "ٱ"}, | from = {"هٔ", "ۂ", "ٱ"}, | ||
to = {"ہ", "ہ", "ا"}, | to = {"ہ", "ہ", "ا"}, | ||
remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef | remove_diacritics = c.fathatan .. c.dammatan .. c.kasratan .. c.fatha .. c.damma .. c.kasra .. c.shadda .. c.sukun .. c.nunghunna .. c.superalef | ||
}, | }, | ||
Hebr = "Hebr-common", | |||
}, | |||
sort_key = { | |||
Hebr = "Hebr-common", | |||
}, | }, | ||
standardChars = { | |||
["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے", | ["ur-Arab"] = "ایببپتثجچحخدذرزژسشصضطظعغفقکگلࣇڷمنݨوؤہھئٹڈڑآے", | ||
c.punc, | c.punc, | ||
| Line 2,527: | Line 2,597: | ||
to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]} | to = {"е" .. p[1], "я" .. p[1], "я" .. p[2], "я" .. p[3], "я" .. p[4]} | ||
}, | }, | ||
}, | }, | ||
} | } | ||
| Line 2,579: | Line 2,646: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
| Line 2,591: | Line 2,658: | ||
Hebr = "yi-translit", | Hebr = "yi-translit", | ||
}, | }, | ||
-- Hebr | display_text = { | ||
Hebr = "Hebr-common", | |||
}, | |||
entry_name = { | |||
Hebr = "Hebr-common", | |||
}, | |||
sort_key = { | |||
Hebr = "Hebr-common", | |||
}, | |||
} | } | ||
| Line 2,599: | Line 2,674: | ||
"alv-yor", | "alv-yor", | ||
"Latn, Arab", | "Latn, Arab", | ||
entry_name = { | |||
Latn = {remove_diacritics = c.grave .. c.acute .. c.macron} | Latn = {remove_diacritics = c.grave .. c.acute .. c.macron} | ||
}, | }, | ||
| Line 2,642: | Line 2,717: | ||
"bnt-ngu", | "bnt-ngu", | ||
"Latn", | "Latn", | ||
entry_name = {remove_diacritics = c.grave .. c.acute .. c.circ .. c.macron .. c.caron}, | |||
} | } | ||
return require("Module:languages").finalizeData(m, "language") | return require("Module:languages").finalizeData(m, "language") | ||