Module:languages/data: Difference between revisions
m 1 revision imported |
No edit summary Tag: Reverted |
||
| Line 9: | Line 9: | ||
-- UTF-8 encoded strings for some commonly-used diacritics. | -- UTF-8 encoded strings for some commonly-used diacritics. | ||
local c = { | local c = { | ||
prime | prime = u(0x02B9), | ||
grave | grave = u(0x0300), | ||
acute | acute = u(0x0301), | ||
circ | circ = u(0x0302), -- circumflex | ||
tilde | tilde = u(0x0303), | ||
macron | macron = u(0x0304), | ||
overline | overline = u(0x0305), | ||
breve | breve = u(0x0306), | ||
dotabove | dotabove = u(0x0307), | ||
diaer | diaer = u(0x0308), -- diaeresis | ||
hook = u(0x0309), | |||
ringabove = u(0x030A), | |||
dacute | dacute = u(0x030B), -- double acute | ||
caron | caron = u(0x030C), | ||
lineabove | lineabove = u(0x030D), | ||
dgrave | dgrave = u(0x030F), -- double grave | ||
invbreve = u( | invbreve = u(0x0311), -- inverted breve | ||
commaabove | turnedcommaabove = u(0x0312), | ||
revcommaabove = u(0x0314), | commaabove = u(0x0313), | ||
dotbelow | revcommaabove = u(0x0314), -- reversed comma above | ||
diaerbelow | dotbelow = u(0x0323), | ||
ringbelow | diaerbelow = u(0x0324), -- diaeresis below | ||
cedilla | ringbelow = u(0x0325), | ||
ogonek | cedilla = u(0x0327), | ||
tildebelow | ogonek = u(0x0328), | ||
brevebelow | tildebelow = u(0x0330), | ||
macronbelow | caronbelow = u(0x032C), | ||
perispomeni | brevebelow = u(0x032E), | ||
ypogegrammeni = u(0x0345), | macronbelow = u(0x0331), | ||
CGJ | perispomeni = u(0x0342), | ||
zigzag | ypogegrammeni = u(0x0345), | ||
dbrevebelow | CGJ = u(0x034F), -- combining grapheme joiner | ||
dmacron | zigzag = u(0x035B), | ||
dtilde | dbrevebelow = u(0x035C), -- double breve below | ||
dinvbreve | dmacron = u(0x035E), -- double macron | ||
small_a | dtilde = u(0x0360), -- double tilde | ||
small_e | dinvbreve = u(0x0361), -- double inverted breve | ||
small_i | small_a = u(0x0363), | ||
small_o | small_e = u(0x0364), | ||
small_u | small_i = u(0x0365), | ||
keraia | small_o = u(0x0366), | ||
lowerkeraia | small_u = u(0x0367), | ||
tonos | keraia = u(0x0374), | ||
palatalization = u(0x0484), | lowerkeraia = u(0x0375), | ||
dasiapneumata = u(0x0485), | tonos = u(0x0384), | ||
psilipneumata = u(0x0486), | palatalization = u(0x0484), | ||
kashida | dasiapneumata = u(0x0485), | ||
fathatan | psilipneumata = u(0x0486), | ||
dammatan | kashida = u(0x0640), | ||
kasratan | fathatan = u(0x064B), | ||
fatha | dammatan = u(0x064C), | ||
damma | kasratan = u(0x064D), | ||
kasra | fatha = u(0x064E), | ||
shadda | damma = u(0x064F), | ||
sukun | kasra = u(0x0650), | ||
hamzaabove | shadda = u(0x0651), | ||
nunghunna | sukun = u(0x0652), | ||
zwarakay | hamzaabove = u(0x0654), | ||
smallv | nunghunna = u(0x0658), | ||
superalef | zwarakay = u(0x0659), | ||
udatta | smallv = u(0x065A), | ||
anudatta = u( | superalef = u(0x0670), | ||
dottedgrave | udatta = u(0x0951), | ||
dottedacute | anudatta = u(0x0952), | ||
coronis | tacute = u(0x1ACB), -- triple acute | ||
psili | dsvarita = u(0x1CDA), -- double svarita | ||
dasia | tsvarita = u(0x1CDB), -- triple svarita | ||
ZWNJ | dottedgrave = u(0x1DC0), | ||
ZWJ | dottedacute = u(0x1DC1), | ||
RSQuo | coronis = u(0x1FBD), | ||
kavyka | psili = u(0x1FBF), | ||
VS01 | dasia = u(0x1FEF), | ||
-- Punctuation for the | ZWNJ = u(0x200C), -- zero width non-joiner | ||
ZWJ = u(0x200D), -- zero width joiner | |||
RSQuo = u(0x2019), -- right single quote | |||
kavyka = u(0xA67C), | |||
VS01 = u(0xFE00), -- variation selector 1 | |||
-- Punctuation for the standard_chars field. | |||
-- Note: characters are literal (i.e. no magic characters). | -- Note: characters are literal (i.e. no magic characters). | ||
punc = " ',- | punc = " ',-‐‑‒–—…∅◌", | ||
-- Range covering all diacritics. | -- Range covering all diacritics. | ||
diacritics = u(0x300) .. "-" .. u(0x34E) .. | diacritics = u(0x300) .. "-" .. u(0x34E) .. | ||