45,647
edits
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
local m_scripts = require("Module:scripts") | |||
local table = table | local table = table | ||
local insert = table.insert | local insert = table.insert | ||
Line 34: | Line 36: | ||
ypogegrammeni = u(0x0345), | ypogegrammeni = u(0x0345), | ||
CGJ = u(0x034F), -- combining grapheme joiner | CGJ = u(0x034F), -- combining grapheme joiner | ||
zigzag = u(0x035B), | |||
dbrevebelow = u(0x035C), | dbrevebelow = u(0x035C), | ||
dmacron = u(0x035E), | |||
dtilde = u(0x0360), | |||
dinvbreve = u(0x0361), | dinvbreve = u(0x0361), | ||
small_a = u(0x0363), | |||
small_e = u(0x0364), | small_e = u(0x0364), | ||
small_i = u(0x0365), | |||
small_o = u(0x0366), | |||
small_u = u(0x0367), | |||
kamora = u(0x0484), | kamora = u(0x0484), | ||
dasiapneumata = u(0x0485), | dasiapneumata = u(0x0485), | ||
Line 60: | Line 69: | ||
VS01 = u(0xFE00), -- variation selector 1 | VS01 = u(0xFE00), -- variation selector 1 | ||
-- Punctuation for the standardChars field. | -- Punctuation for the standardChars field. | ||
punc = " ', | -- Note: characters are literal (i.e. no magic characters). | ||
punc = " ',-‐‑‒–—…∅", | |||
-- Range covering all diacritics. | |||
diacritics = u(0x300) .. "-" .. u(0x34E) .. | |||
u(0x350) .. "-" .. u(0x36F) .. | |||
u(0x1AB0) .. "-" .. u(0x1ACE) .. | |||
u(0x1DC0) .. "-" .. u(0x1DFF) .. | |||
u(0x20D0) .. "-" .. u(0x20F0) .. | |||
u(0xFE20) .. "-" .. u(0xFE2F), | |||
} | } | ||
-- Braille characters for the standardChars field. | -- Braille characters for the standardChars field. | ||
Line 108: | Line 125: | ||
s["Grek-sortkey"] = { | s["Grek-sortkey"] = { | ||
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni, | remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni, | ||
from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"}, | from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"}, | ||
to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]} | to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]} | ||
} | } | ||
s["Jpan-sortkey"] = { | s["Jpan-standardchars"] = -- exclude ぢづヂヅ | ||
"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちっつてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろん" .. | |||
"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチッツテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロン" | |||
local jpx_displaytext = { | |||
from = {"~", "="}, | |||
to = {"〜", "゠"} | |||
} | |||
s["jpx-displaytext"] = { | |||
Jpan = jpx_displaytext, | |||
Hani = jpx_displaytext, | |||
Hrkt = jpx_displaytext, | |||
Hira = jpx_displaytext, | |||
Kana = jpx_displaytext | |||
-- not Latn or Brai | |||
} | |||
s["jpx-entryname"] = s["jpx-displaytext"] | |||
s["jpx-sortkey"] = { | |||
Jpan = "Jpan-sortkey", | Jpan = "Jpan-sortkey", | ||
Hani = "Hani-sortkey", | Hani = "Hani-sortkey", | ||
Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira | Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira | ||
Hira = "Hira-sortkey", | Hira = "Hira-sortkey", | ||
Kana = "Kana-sortkey" | Kana = "Kana-sortkey", | ||
Latn = {remove_diacritics = c.tilde .. c.macron .. c.diaer} | |||
} | } | ||
s[" | s["jpx-translit"] = { | ||
Hrkt = "Hrkt-translit", | Hrkt = "Hrkt-translit", | ||
Hira = "Hrkt-translit", | Hira = "Hrkt-translit", | ||
Line 127: | Line 165: | ||
} | } | ||
local HaniChars = | local HaniChars = m_scripts.getByCode("Hani"):getCharacters() | ||
-- `漢字(한자)`→`漢字` | -- `漢字(한자)`→`漢字` | ||
-- `가-나-다`→`가나다` | -- `가-나-다`→`가나다`, `가--나--다`→`가-나-다` | ||
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]]) | -- `온돌(溫突/溫堗)`→`온돌` ([[ondol]]) | ||
s["Kore-entryname"] = { | s["Kore-entryname"] = { | ||
remove_diacritics = u(0x302E) .. u(0x302F), | remove_diacritics = u(0x302E) .. u(0x302F), | ||
from = {"([" .. HaniChars .. "])%(.-%)", "( | from = {"([" .. HaniChars .. "])%(.-%)", "^%-", "%-$", "%-(%-?)", "\1", "%([" .. HaniChars .. "/]+%)"}, | ||
to = {"%1", "%1 | to = {"%1", "\1", "\1", "%1", "-"} | ||
} | } | ||