Module:languages/data: Difference between revisions

m 1 revision imported
No edit summary
Tag: Reverted
Line 9: Line 9:
-- UTF-8 encoded strings for some commonly-used diacritics.
-- UTF-8 encoded strings for some commonly-used diacritics.
local c = {
local c = {
prime = u(0x02B9),
prime = u(0x02B9),
grave = u(0x0300),
grave = u(0x0300),
acute = u(0x0301),
acute = u(0x0301),
circ = u(0x0302),
circ = u(0x0302), -- circumflex
tilde = u(0x0303),
tilde = u(0x0303),
macron = u(0x0304),
macron = u(0x0304),
overline = u(0x0305),
overline = u(0x0305),
breve = u(0x0306),
breve = u(0x0306),
dotabove = u(0x0307),
dotabove = u(0x0307),
diaer = u(0x0308),
diaer = u(0x0308), -- diaeresis
ringabove = u(0x030A),
hook                = u(0x0309),
hook            = u(0x0309),
ringabove = u(0x030A),
dacute = u(0x030B),
dacute = u(0x030B), -- double acute
caron = u(0x030C),
caron = u(0x030C),
lineabove = u(0x030D),
lineabove = u(0x030D),
dgrave = u(0x030F),
dgrave = u(0x030F), -- double grave
invbreve = u(0x0311),
invbreve = u(0x0311), -- inverted breve
commaabove = u(0x0313),
turnedcommaabove = u(0x0312),
revcommaabove = u(0x0314),
commaabove = u(0x0313),
dotbelow = u(0x0323),
revcommaabove = u(0x0314), -- reversed comma above
diaerbelow = u(0x0324),
dotbelow = u(0x0323),
ringbelow = u(0x0325),
diaerbelow = u(0x0324), -- diaeresis below
cedilla = u(0x0327),
ringbelow = u(0x0325),
ogonek = u(0x0328),
cedilla = u(0x0327),
tildebelow     = u(0x0330),
ogonek = u(0x0328),
brevebelow = u(0x032E),
tildebelow         = u(0x0330),
macronbelow = u(0x0331),
caronbelow = u(0x032C),
perispomeni = u(0x0342),
brevebelow = u(0x032E),
ypogegrammeni = u(0x0345),
macronbelow = u(0x0331),
CGJ = u(0x034F), -- combining grapheme joiner
perispomeni = u(0x0342),
zigzag = u(0x035B),
ypogegrammeni = u(0x0345),
dbrevebelow = u(0x035C),
CGJ = u(0x034F), -- combining grapheme joiner
dmacron = u(0x035E),
zigzag = u(0x035B),
dtilde = u(0x0360),
dbrevebelow = u(0x035C), -- double breve below
dinvbreve = u(0x0361),
dmacron = u(0x035E), -- double macron
small_a = u(0x0363),
dtilde = u(0x0360), -- double tilde
small_e = u(0x0364),
dinvbreve = u(0x0361), -- double inverted breve
small_i = u(0x0365),
small_a = u(0x0363),
small_o = u(0x0366),
small_e = u(0x0364),
small_u = u(0x0367),
small_i = u(0x0365),
keraia = u(0x0374),
small_o = u(0x0366),
lowerkeraia = u(0x0375),
small_u = u(0x0367),
tonos = u(0x0384),
keraia = u(0x0374),
palatalization = u(0x0484),
lowerkeraia = u(0x0375),
dasiapneumata = u(0x0485),
tonos = u(0x0384),
psilipneumata = u(0x0486),
palatalization = u(0x0484),
kashida = u(0x0640),
dasiapneumata = u(0x0485),
fathatan = u(0x064B),
psilipneumata = u(0x0486),
dammatan = u(0x064C),
kashida = u(0x0640),
kasratan = u(0x064D),
fathatan = u(0x064B),
fatha = u(0x064E),
dammatan = u(0x064C),
damma = u(0x064F),
kasratan = u(0x064D),
kasra = u(0x0650),
fatha = u(0x064E),
shadda = u(0x0651),
damma = u(0x064F),
sukun = u(0x0652),
kasra = u(0x0650),
hamzaabove = u(0x0654),
shadda = u(0x0651),
nunghunna = u(0x0658),
sukun = u(0x0652),
zwarakay = u(0x0659),
hamzaabove = u(0x0654),
smallv = u(0x065A),
nunghunna = u(0x0658),
superalef = u(0x0670),
zwarakay = u(0x0659),
udatta = u(0x0951),
smallv = u(0x065A),
anudatta = u(0x0952),
superalef = u(0x0670),
dottedgrave = u(0x1DC0),
udatta = u(0x0951),
dottedacute = u(0x1DC1),
anudatta = u(0x0952),
coronis = u(0x1FBD),
tacute = u(0x1ACB), -- triple acute
psili = u(0x1FBF),
dsvarita = u(0x1CDA), -- double svarita
dasia = u(0x1FEF),
tsvarita = u(0x1CDB), -- triple svarita
ZWNJ = u(0x200C), -- zero width non-joiner
dottedgrave = u(0x1DC0),
ZWJ = u(0x200D), -- zero width joiner
dottedacute = u(0x1DC1),
RSQuo = u(0x2019), -- right single quote
coronis = u(0x1FBD),
kavyka = u(0xA67C),
psili = u(0x1FBF),
VS01 = u(0xFE00), -- variation selector 1
dasia = u(0x1FEF),
-- Punctuation for the standardChars field.
ZWNJ = u(0x200C), -- zero width non-joiner
ZWJ = u(0x200D), -- zero width joiner
RSQuo = u(0x2019), -- right single quote
kavyka = u(0xA67C),
VS01 = u(0xFE00), -- variation selector 1
-- Punctuation for the standard_chars field.
-- Note: characters are literal (i.e. no magic characters).
-- Note: characters are literal (i.e. no magic characters).
punc = " ',-‐‑‒–—…∅",
punc = " ',-​‌‍‐‑‒–—…∅◌",
-- Range covering all diacritics.
-- Range covering all diacritics.
diacritics = u(0x300) .. "-" .. u(0x34E) ..
diacritics = u(0x300) .. "-" .. u(0x34E) ..