Module:languages/data: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
 
Line 1: Line 1:
local m_scripts = require("Module:scripts")
local table = table
local table = table
local insert = table.insert
local insert = table.insert
Line 34: Line 36:
ypogegrammeni = u(0x0345),
ypogegrammeni = u(0x0345),
CGJ = u(0x034F), -- combining grapheme joiner
CGJ = u(0x034F), -- combining grapheme joiner
zigzag = u(0x035B),
dbrevebelow = u(0x035C),
dbrevebelow = u(0x035C),
dmacron = u(0x035E),
dtilde = u(0x0360),
dinvbreve = u(0x0361),
dinvbreve = u(0x0361),
small_a = u(0x0363),
small_e = u(0x0364),
small_e = u(0x0364),
small_i = u(0x0365),
small_o = u(0x0366),
small_u = u(0x0367),
kamora          = u(0x0484),
kamora          = u(0x0484),
dasiapneumata  = u(0x0485),
dasiapneumata  = u(0x0485),
Line 60: Line 69:
VS01 = u(0xFE00), -- variation selector 1
VS01 = u(0xFE00), -- variation selector 1
-- Punctuation for the standardChars field.
-- Punctuation for the standardChars field.
punc = " ',%-–…∅"
-- Note: characters are literal (i.e. no magic characters).
punc = " ',-‐‑‒–—…∅",
-- Range covering all diacritics.
diacritics = u(0x300) .. "-" .. u(0x34E) ..
u(0x350) .. "-" .. u(0x36F) ..
u(0x1AB0) .. "-" .. u(0x1ACE) ..
u(0x1DC0) .. "-" .. u(0x1DFF) ..
u(0x20D0) .. "-" .. u(0x20F0) ..
u(0xFE20) .. "-" .. u(0xFE2F),
}
}
-- Braille characters for the standardChars field.
-- Braille characters for the standardChars field.
Line 108: Line 125:


s["Grek-sortkey"] = {
s["Grek-sortkey"] = {
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
remove_diacritics = c.grave .. c.acute .. c.diaer .. c.caron .. c.commaabove .. c.revcommaabove .. c.macron .. c.breve .. c.diaerbelow .. c.brevebelow .. c.perispomeni .. c.ypogegrammeni,
from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"},
from = {"ϝ", "ͷ", "ϛ", "ͱ", "ϻ", "ϟ", "ϙ", "ς", "ϡ", "ͳ"},
to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]}
to = {"ε" .. p[1], "ε" .. p[2], "ε" .. p[3], "ζ" .. p[1], "π" .. p[1], "π" .. p[2], "π" .. p[2], "σ", "ω" .. p[1], "ω" .. p[1]}
}
}


s["Jpan-sortkey"] = {
s["Jpan-standardchars"] = -- exclude ぢづヂヅ
"ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちっつてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろん" ..
"ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチッツテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロン"
 
local jpx_displaytext = {
from = {"~", "="},
to = {"〜", "゠"}
}
 
s["jpx-displaytext"] = {
Jpan = jpx_displaytext,
Hani = jpx_displaytext,
Hrkt = jpx_displaytext,
Hira = jpx_displaytext,
Kana = jpx_displaytext
-- not Latn or Brai
}
 
s["jpx-entryname"] = s["jpx-displaytext"]
 
s["jpx-sortkey"] = {
Jpan = "Jpan-sortkey",
Jpan = "Jpan-sortkey",
Hani = "Hani-sortkey",
Hani = "Hani-sortkey",
Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira
Hrkt = "Hira-sortkey", -- sort general kana by normalizing to Hira
Hira = "Hira-sortkey",
Hira = "Hira-sortkey",
Kana = "Kana-sortkey"
Kana = "Kana-sortkey",
Latn = {remove_diacritics = c.tilde .. c.macron .. c.diaer}
}
}


s["Jpan-translit"] = {
s["jpx-translit"] = {
Hrkt = "Hrkt-translit",
Hrkt = "Hrkt-translit",
Hira = "Hrkt-translit",
Hira = "Hrkt-translit",
Line 127: Line 165:
}
}


local HaniChars = require("Module:scripts").getByCode("Hani"):getCharacters()
local HaniChars = m_scripts.getByCode("Hani"):getCharacters()
-- `漢字(한자)`→`漢字`
-- `漢字(한자)`→`漢字`
-- `가-나-다`→`가나다`
-- `가-나-다`→`가나다`, `가--나--다`→`가-나-다`
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]])
-- `온돌(溫突/溫堗)`→`온돌` ([[ondol]])
s["Kore-entryname"] = {
s["Kore-entryname"] = {
remove_diacritics = u(0x302E) .. u(0x302F),
remove_diacritics = u(0x302E) .. u(0x302F),
from = {"([" .. HaniChars .. "])%(.-%)", "(.)%-(.)", "%([" .. HaniChars .. "/]+%)"},
from = {"([" .. HaniChars .. "])%(.-%)", "^%-", "%-$", "%-(%-?)", "\1", "%([" .. HaniChars .. "/]+%)"},
to = {"%1", "%1%2"}
to = {"%1", "\1", "\1", "%1", "-"}
}
}


Navigation menu