Module:scripts/data
Jump to navigation
Jump to search
- The following documentation is located at Module:scripts/data/doc.[edit]
- Useful links: root page • root page's subpages • links • transclusions • testcases • sandbox
--[=[
This is imported from Wiktionary - it'll be updated for this wiki - the data is left currently as examples.
When adding new scripts to this file, please don't forget to add
style definitons for the script in [[MediaWiki:Common.css]].
]=]
local concat = table.concat
local insert = table.insert
local ipairs = ipairs
local next = next
local remove = table.remove
local select = select
local sort = table.sort
local u = require("Module:string utilities").char
------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------
-- Note: a[2] > b[2] means opens are sorted before closes if otherwise equal.
local function sort_ranges(a, b)
return a[1] < b[1] or a[1] == b[1] and a[2] > b[2]
end
-- Returns the union of two or more range tables.
local function union(...)
local ranges = {}
for i = 1, select("#", ...) do
local argt = select(i, ...)
for j, v in ipairs(argt) do
insert(ranges, {v, j % 2 == 1 and 1 or -1})
end
end
sort(ranges, sort_ranges)
local ret, i = {}, 0
for _, range in ipairs(ranges) do
i = i + range[2]
if i == 0 and range[2] == -1 then -- close
insert(ret, range[1])
elseif i == 1 and range[2] == 1 then -- open
if ret[#ret] and range[1] <= ret[#ret] + 1 then
remove(ret) -- merge adjacent ranges
else
insert(ret, range[1])
end
end
end
return ret
end
-- Adds the `characters` key, which is determined by a script's `ranges` table.
local function process_ranges(sc)
local ranges, chars = sc.ranges, {}
for i = 2, #ranges, 2 do
if ranges[i] == ranges[i - 1] then
insert(chars, u(ranges[i]))
else
insert(chars, u(ranges[i - 1]))
if ranges[i] > ranges[i - 1] + 1 then
insert(chars, "-")
end
insert(chars, u(ranges[i]))
end
end
sc.characters = concat(chars)
ranges.n = #ranges
return sc
end
local function handle_normalization_fixes(fixes)
local combiningClasses = fixes.combiningClasses
if combiningClasses then
local chars, i = {}, 0
for char in next, combiningClasses do
i = i + 1
chars[i] = char
end
fixes.combiningClassCharacters = concat(chars)
end
return fixes
end
------------------------------------------------------------------------------------
--
-- Data
--
------------------------------------------------------------------------------------
local m = {}
--Constructed languages
m["Ayer"] = {
canonicalName = "Tahano Hikamu",
characters = "A-Za-z*ñÑʔəƏꜷꜶ¯ˀ¨ʰʲː[]&+0-9¹²",
character_category = false,
systems = {"abugida"},
}
m["Roka"] = {
canonicalName = "Rokadong Curakjang",
characters = "A-Za-z*[]{}'\"0-9",
character_category = false,
systems = {"abugida"},
}
--Natural languages
m["Adlm"] = {
canonicalName = "Adlam",
characters = "𞤀-𞥟",
direction = "rtl",
}
m["Afak"] = {
canonicalName = "Afaka",
}
m["Aghb"] = {
canonicalName = "Caucasian Albanian",
characters = "𐔰-𐕣𐕯",
}
m["Ahom"] = {
canonicalName = "Ahom",
characters = "𑜀-𑜿",
systems = {"abugida"},
}
m["Arab"] = {
canonicalName = "Arabic",
otherNames = {"Jawi", "Nastaliq", "Nastaleeq"},
characters = "-ۿݐ-ݿࢠ-ࣿﭐ-﷽ﹰ-ﻼ",
direction = "rtl",
systems = {"abjad"}, -- more precisely, impure abjad
}
m["fa-Arab"] = {
canonicalName = "Arabic",
otherNames = {"Perso-Arabic"},
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["kk-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ks-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ku-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ms-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["mzn-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ota-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["pa-Arab"] = {
canonicalName = "Shahmukhi",
otherNames = {"Arabic"},
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ps-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["sd-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["tt-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ug-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
m["ur-Arab"] = {
canonicalName = "Arabic",
characters = m["Arab"].characters,
direction = "rtl",
parent = "Arab",
}
-- Aran (Nastaliq) is subsumed into Arab
m["Armi"] = {
canonicalName = "Imperial Aramaic",
characters = "𐡀-𐡟",
direction = "rtl",
systems = {"abjad"},
}
m["Armn"] = {
canonicalName = "Armenian",
characters = "Ա-֏ﬓ-ﬗ",
}
m["Avst"] = {
canonicalName = "Avestan",
characters = "𐬀-𐬿",
direction = "rtl",
}
m["Bali"] = {
canonicalName = "Balinese",
characters = "ᬀ-᭼",
systems = {"abugida"},
}
m["Bamu"] = {
canonicalName = "Bamum",
characters = "ꚠ-꛷𖠀-𖨸",
}
m["Bass"] = {
canonicalName = "Bassa",
otherNames = {"Bassa Vah", "Vah"},
characters = "𖫐-𖫵",
}
m["Batk"] = {
canonicalName = "Batak",
characters = "ᯀ-᯿",
systems = {"abugida"},
}
m["Beng"] = {
canonicalName = "Bengali",
otherNames = {"Bengali-Assamese", "Eastern Nagari"},
characters = "ঀ-ঃঅ-ঌএঐও-নপ-রললশ-হ়-ৄেৈো-ৎৗড়ঢ়য়়ৠ-ৣ০-৯",
systems = {"abugida"},
}
m["as-Beng"] = {
canonicalName = "Assamese",
otherNames = {"Bengali-Assamese", "Eastern Nagari"},
characters = "ঁ-ঃঅ-ঌএঐও-নপ-যশ-হ়-ৄেৈো-ৎৗড়ঢ়য়়ৠ-ৣ০-ৱ",
systems = {"abugida"},
}
m["Bhks"] = {
canonicalName = "Bhaiksuki",
characters = "𑰀-𑱬",
systems = {"abugida"},
}
m["Bopo"] = {
canonicalName = "Zhuyin",
otherNames = {"Zhuyin Fuhao", "Bopomofo"},
characters = "ㄅ-ㄯㆠ-ㆺ",
}
m["Brah"] = {
canonicalName = "Brahmi",
characters = "𑀀-𑁿",
systems = {"abugida"},
}
m["Brai"] = {
canonicalName = "Braille",
characters = "⠀-⣿",
}
m["Bugi"] = {
canonicalName = "Buginese",
otherNames = {"Lontara"},
characters = "ᨀ-᨟",
systems = {"abugida"},
}
m["Buhd"] = {
canonicalName = "Buhid",
characters = "ᝀ-ᝓ",
systems = {"abugida"},
}
m["Cakm"] = {
canonicalName = "Chakma",
characters = "𑄀-𑅆",
systems = {"abugida"},
}
m["Cans"] = {
canonicalName = "Canadian syllabics",
characters = "᐀-ᙿ",
systems = {"abugida"},
}
m["Cari"] = {
canonicalName = "Carian",
characters = "𐊠-𐋐",
systems = {"alphabet"},
}
m["Cham"] = {
canonicalName = "Cham",
characters = "ꨀ-꩟",
systems = {"abugida"},
}
m["Cher"] = {
canonicalName = "Cherokee",
characters = "Ꭰ-Ᏼꭰ-ꮿ",
systems = {"syllabary"},
}
m["Copt"] = {
canonicalName = "Coptic",
characters = "Ϣ-ϯⲀ-⳿𐋡-𐋻", -- this is mostly "Coptic", not unified "Greek and Coptic"
systems = {"alphabet"},
}
m["Cprt"] = {
canonicalName = "Cypriot",
characters = "𐠀-𐠿",
direction = "rtl",
systems = {"syllabary"},
}
m["Cyrl"] = {
canonicalName = "Cyrillic",
characters = "Ѐ-џѢѣѪѫѬѭѲѳѴѵҊ-ԧꚀ-ꚗ",
systems = {"alphabet"},
}
m["Cyrs"] = {
canonicalName = "Old Cyrillic",
otherNames = { "Early Cyrillic" },
characters = "Ѐ-ԧꙀ-ꚗ",
wikipedia_article = "Early Cyrillic alphabet",
systems = {"alphabet"},
}
m["Deva"] = {
canonicalName = "Devanagari",
characters = "ऀ-ॿ꣠-ꣿ",
systems = {"abugida"},
}
m["Dogr"] = {
canonicalName = "Dogra",
characters = "𑠀-𑠻",
systems = {"abugida"},
}
m["Dsrt"] = {
canonicalName = "Deseret",
characters = "𐐀-𐑏",
systems = {"alphabet"},
}
m["Dupl"] = {
canonicalName = "Duployan",
characters = "𛰀-𛲟",
}
m["Egyd"] = {
canonicalName = "Demotic",
}
m["Egyp"] = {
canonicalName = "Egyptian hieroglyphic",
characters = "𓀀-𓐮",
}
m["Elba"] = {
canonicalName = "Elbasan",
characters = "𐔀-𐔧",
}
m["Ethi"] = {
canonicalName = "Ethiopic",
otherNames = {"Ge'ez"},
characters = "ሀ-᎙ⶀ-ⷞꬁ-ꬮ",
systems = {"abugida"},
}
m["Geok"] = {
canonicalName = "Khutsuri",
otherNames = {"Nuskhuri", "Asomtavruli"},
characters = "Ⴀ-Ⴭⴀ-ⴭ", -- Ⴀ-Ⴭ is Asomtavruli, ⴀ-ⴭ is Nuskhuri
systems = {"alphabet"},
}
m["Geor"] = {
canonicalName = "Georgian",
otherNames = {"Mkhedruli", "Mtavruli"},
characters = "ა-ჿᲐ-Ჿ", -- ა-ჿ is lowercase Mkhedruli; Ა-Ჿ is uppercase Mkhedruli (Mtavruli)
systems = {"alphabet"},
}
m["Glag"] = {
canonicalName = "Glagolitic",
characters = "Ⰰ-ⱞ𞀀-𞀪",
systems = {"alphabet"},
}
m["Gong"] = {
canonicalName = "Gunjala Gondi",
characters = "𑵠-𑶩",
systems = {"abugida"},
}
m["Gonm"] = {
canonicalName = "Masaram Gondi",
characters = "𑴀-𑵙",
systems = {"abugida"},
}
m["Goth"] = {
canonicalName = "Gothic",
characters = "𐌰-𐍊",
systems = {"alphabet"},
}
m["Gran"] = {
canonicalName = "Grantha",
characters = "𑌁-𑍴",
systems = {"abugida"},
}
m["Grek"] = {
canonicalName = "Greek",
characters = "Ͱ-ϡϰ-Ͽ",
systems = {"alphabet"},
}
m["polytonic"] = {
canonicalName = "Greek",
characters = "ἀ-῾" .. m["Grek"].characters,
parent = "Grek",
systems = {"alphabet"},
}
m["Gujr"] = {
canonicalName = "Gujarati",
characters = "ઁ-૱",
systems = {"abugida"},
}
m["Guru"] = {
canonicalName = "Gurmukhi",
characters = "ਁ-੶",
systems = {"abugida"},
}
m["Gvoz"] = {
canonicalName = "Oz",
systems = {"alphabet"},
}
m["Hang"] = {
canonicalName = "Hangul",
characters = "ᄀ-ᇿ가-힣ㄱ-ㆎ",
systems = {"syllabary"},
}
m["Hani"] = {
canonicalName = "Han",
otherNames = {"Hanzi", "Chu Nom"},
characters = "一-鿿㐀-䶵𠀀-"..u(0x2EBE0).."﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩⺀-㇀-㇣ -〿㍻-㍿",
systems = {"logography"},
}
m["Hans"] = {
canonicalName = "Simplified Han",
characters = m["Hani"].characters,
systems = {"logography"},
}
m["Hant"] = {
canonicalName = "Traditional Han",
characters = m["Hani"].characters,
systems = {"logography"},
}
m["Hatr"] = {
canonicalName = "Hatran",
characters = "𐣠-𐣿",
systems = {"abjad"},
}
m["Hira"] = {
canonicalName = "Hiragana",
otherNames = {"Hentaigana"},
characters = "ぁ-ゟ𛀁-𛄞",
systems = {"syllabary"},
}
m["Hluw"] = {
canonicalName = "Anatolian Hieroglyphs",
characters = "𔐀-𔙆",
wikipedia_article = "Anatolian hieroglyphs",
}
m["Hung"] = {
canonicalName = "Old Hungarian",
otherNames = {"Hungarian runic"},
characters = "𐲀-𐲲",
}
m["Kana"] = {
canonicalName = "Katakana",
characters = "゠-ヿㇰ-ㇿ𛀀㌀-㍗",
systems = {"syllabary"},
}
-- These should be defined after the scripts they are composed of
m["Kore"] = {
canonicalName = "Korean",
characters = m["Hang"].characters .. m["Hani"].characters .. "!-○",
systems = {"syllabary", "logography"},
}
m["Hano"] = {
canonicalName = "Hanunoo",
characters = "ᜠ-᜴",
systems = {"abugida"},
}
m["Hebr"] = {
canonicalName = "Hebrew",
characters = u(0x0590) .. "-" .. u(0x05FF) .. u(0xFB1D) .. "-" .. u(0xFB4F),
direction = "rtl",
systems = {"abjad"}, -- more precisely, impure abjad
}
m["Hmng"] = {
canonicalName = "Hmong",
otherNames = {"Pahawh Hmong"},
characters = "𖬀-𖮏",
}
m["Ibrn"] = {
canonicalName = "Iberian",
}
m["Imag"] = {
-- To be used to avoid any formatting or link processing
canonicalName = "Image-rendered",
-- This should not have any characters listed
character_category = false,
}
m["Inds"] = {
canonicalName = "Indus",
otherNames = {"Harappan", "Indus Valley"},
}
m["IPAchar"] = {
canonicalName = "International Phonetic Alphabet",
}
m["Ital"] = {
canonicalName = "Old Italic",
characters = "𐌀-𐌣",
systems = {"alphabet"},
}
m["Java"] = {
canonicalName = "Javanese",
characters = "ꦀ-꧟",
systems = {"abugida"},
}
m["Jurc"] = {
canonicalName = "Jurchen",
}
m["Kali"] = {
canonicalName = "Kayah Li",
characters = "꤀-꤯",
}
m["Khar"] = {
canonicalName = "Kharoshthi",
characters = "𐨀-𐩘",
systems = {"abugida"},
direction = "rtl",
}
m["Khmr"] = {
canonicalName = "Khmer",
characters = "ក-៹᧠-᧿",
systems = {"abugida"},
}
m["Khoj"] = {
canonicalName = "Khojki",
characters = "𑈀-𑈽",
systems = {"abugida"},
}
m["Kitl"] = {
canonicalName = "Khitan Large",
}
m["Kits"] = {
canonicalName = "Khitan Small",
}
m["Knda"] = {
canonicalName = "Kannada",
characters = "ಀ-ೲ",
systems = {"abugida"},
}
m["Kthi"] = {
canonicalName = "Kaithi",
characters = "𑂀-",
systems = {"abugida"},
}
m["Lana"] = {
canonicalName = "Tai Tham",
otherNames = {"Tham", "Tua Mueang", "Lanna"},
characters = "ᨠ-᪭",
systems = {"abugida"},
}
m["Laoo"] = {
canonicalName = "Lao",
characters = "ກ-ໟ",
systems = {"abugida"},
}
m["Latn"] = {
canonicalName = "Latin",
otherNames = {"Roman", "Rumi", "Romaji", "Rōmaji"},
characters = "A-Za-zÀ-ÖØ-öø-ɏḀ-ỿ",
systems = {"alphabet"},
}
m["Latf"] = {
canonicalName = "Fraktur",
otherNames = {"Blackletter"},
characters = m["Latn"].characters,
}
m["Latinx"] = {
canonicalName = "Latin",
characters = m["Latn"].characters .. "Ⱡ-Ɀ꜠-ꟿꬰ-ꭥ",
parent = "Latn",
}
m["nv-Latn"] = {
canonicalName = "Latin",
characters = m["Latn"].characters,
parent = "Latn",
}
m["pjt-Latn"] = {
canonicalName = "Latin",
characters = m["Latn"].characters,
parent = "Latn",
}
m["Jpan"] = {
canonicalName = "Japanese",
characters = m["Hira"].characters .. m["Kana"].characters .. m["Hani"].characters .. m["Latn"].characters,
systems = {"syllabary", "logography"},
}
m["Kwan"] = {
canonicalName = "Kwandon",
systems = {"abjad"},
}
m["Leke"] = {
canonicalName = "Leke",
systems = {"abugida"},
}
m["Lepc"] = {
canonicalName = "Lepcha",
characters = "ᰀ-ᱏ",
systems = {"abugida"},
}
m["Limb"] = {
canonicalName = "Limbu",
characters = "ᤀ-᥏",
systems = {"abugida"},
}
m["Lina"] = {
canonicalName = "Linear A",
characters = "𐘀-𐝧",
}
m["Linb"] = {
canonicalName = "Linear B",
characters = "𐀀-𐃺",
}
m["Lisu"] = {
canonicalName = "Lisu",
otherNames = {"Fraser"},
characters = "ꓐ-꓿",
systems = {"alphabet"},
}
m["Lyci"] = {
canonicalName = "Lycian",
characters = "𐊀-𐊜",
systems = {"alphabet"},
}
m["Lydi"] = {
canonicalName = "Lydian",
characters = "𐤠-𐤿",
systems = {"alphabet"},
}
m["Mahj"] = {
canonicalName = "Mahajani",
characters = "𑅐-𑅶",
systems = {"abugida"},
}
m["Maka"] = {
canonicalName = "Makasar",
characters = "𑻠-𑻸",
systems = {"abugida"},
}
m["Mand"] = {
canonicalName = "Mandaic",
otherNames = {"Mandaean"},
characters = "ࡀ-࡞",
direction = "rtl",
}
m["Mani"] = {
canonicalName = "Manichaean",
characters = "𐫀-𐫶",
direction = "rtl",
systems = {"abjad"},
}
m["Maya"] = {
canonicalName = "Maya",
otherNames = {"Maya hieroglyphic", "Mayan", "Mayan hieroglyphic"},
characters = "𝋠-𝋳",
}
m["Medf"] = {
canonicalName = "Medefaidrin",
otherNames = {"Oberi Okaime", "Oberi Ɔkaimɛ"},
characters = "𖹀-𖺚",
}
m["Mend"] = {
canonicalName = "Mende",
otherNames = {"Mende Kikakui"},
characters = "𞠀-𞣖",
direction = "rtl",
}
m["Merc"] = {
canonicalName = "Meroitic cursive",
characters = "𐦠-𐦿",
systems = {"abugida"},
}
m["Mero"] = {
canonicalName = "Meroitic hieroglyphic",
characters = "𐦀-𐦟",
systems = {"abugida"},
}
m["Mlym"] = {
canonicalName = "Malayalam",
characters = "ം-ൿ",
systems = {"abugida"},
}
m["Modi"] = {
canonicalName = "Modi",
characters = "𑘀-𑙙",
systems = {"abugida"},
}
m["Mong"] = {
canonicalName = "Mongolian",
characters = "᠀-ᢪ",
direction = "down",
}
m["Morse"] = {
canonicalName = "Morse code",
}
m["Mroo"] = {
canonicalName = "Mro",
characters = "𖩀-𖩯",
}
m["Mtei"] = {
canonicalName = "Meitei Mayek",
characters = "ꯀ-ꫠ-",
}
m["Mult"] = {
canonicalName = "Multani",
characters = "𑊀-𑊩",
systems = {"abugida"},
}
m["musical"] = {
canonicalName = "Musical notation",
characters = "𝄀-𝇝",
systems = {"pictography"},
}
m["Mymr"] = {
canonicalName = "Burmese",
otherNames = {"Myanmar"},
characters = "က-႟ꩠ-ꩿꧠ-ꧾ",
systems = {"abugida"},
}
m["Narb"] = {
canonicalName = "Old North Arabian",
characters = "𐪀-𐪟",
direction = "rtl",
systems = {"abjad"},
}
m["Nbat"] = {
canonicalName = "Nabataean",
otherNames = {"Nabatean"},
characters = "𐢀-𐢯",
direction = "rtl",
systems = {"abjad"},
}
m["Newa"] = {
canonicalName = "Newa",
otherNames = {"Newar", "Newari", "Prachalit Nepal"}, -- and Ranjana?
characters = "𑐀-𑑞",
systems = {"abugida"},
}
m["Nkoo"] = {
canonicalName = "N'Ko",
characters = "߀-߿",
direction = "rtl",
}
m["None"] = {
canonicalName = "Unspecified", -- renders as 'unspecified script'
-- This should not have any characters listed
character_category = false, -- none
}
m["Nshu"] = {
canonicalName = "Nushu",
otherNames = {"Nüshu"},
characters = "𖿡𛅰-𛋻",
systems = {"syllabary"},
}
m["Ogam"] = {
canonicalName = "Ogham",
characters = " -᚜",
}
m["Olck"] = {
canonicalName = "Ol Chiki",
characters = "᱐-᱿",
}
m["Orkh"] = {
canonicalName = "Orkhon runes",
characters = "𐰀-𐱈",
direction = "rtl",
}
m["Orya"] = {
canonicalName = "Oriya",
otherNames = {"Odia"},
characters = "ଁ-୷",
}
m["Osge"] = {
canonicalName = "Osage",
characters = "𐒰-𐓻",
}
m["Osma"] = {
canonicalName = "Osmanya",
characters = "𐒀-𐒩",
}
m["Palm"] = {
canonicalName = "Palmyrene",
characters = "𐡠-𐡿",
}
m["Pauc"] = {
canonicalName = "Pau Cin Hau",
characters = "𑫀-𑫸",
}
m["Perm"] = {
canonicalName = "Old Permic",
characters = "𐍐-𐍺",
}
m["Phag"] = {
canonicalName = "Phags-pa",
characters = "ꡀ-꡷",
systems = {"abugida"},
}
m["Marc"] = {
canonicalName = "Marchen",
characters = "𑱰-𑲶",
systems = {"abugida"},
}
m["Phli"] = {
canonicalName = "Inscriptional Pahlavi",
characters = "𐭠-𐭿",
direction = "rtl",
systems = {"abjad"},
}
m["Phlp"] = {
canonicalName = "Psalter Pahlavi",
characters = "𐮀-𐮯",
direction = "rtl",
systems = {"abjad"},
}
m["Phlv"] = {
canonicalName = "Book Pahlavi",
direction = "rtl",
systems = {"abjad"},
-- Not in Unicode
}
m["Phnx"] = {
canonicalName = "Phoenician",
characters = "𐤀-𐤟",
direction = "rtl",
systems = {"abjad"},
}
m["Plrd"] = {
canonicalName = "Pollard",
characters = "𖼀-𖾟",
systems = {"abugida"},
}
m["Prti"] = {
canonicalName = "Inscriptional Parthian",
characters = "𐭀-𐭟",
direction = "rtl",
}
m["Rjng"] = {
canonicalName = "Rejang",
characters = "ꤰ-꥟",
systems = {"abugida"},
}
m["Rohg"] = {
canonicalName = "Hanifi Rohingya",
characters = "𐴀-𐴹",
direction = "rtl",
systems = {"alphabet"},
}
m["Ruminumerals"] = {
canonicalName = "Rumi numerals",
characters = "𐹠-𐹾",
character_category = "Rumi numerals",
}
m["Runr"] = {
canonicalName = "Runic",
characters = "ᚠ-ᛰ",
systems = {"alphabet"},
}
m["Samr"] = {
canonicalName = "Samaritan",
characters = "ࠀ-࠾",
direction = "rtl",
systems = {"abjad"},
}
m["Sarb"] = {
canonicalName = "Old South Arabian",
characters = "𐩠-𐩿",
direction = "rtl",
systems = {"abjad"},
}
m["Saur"] = {
canonicalName = "Saurashtra",
characters = "ꢀ-꣙",
systems = {"abugida"},
}
m["Semap"] = {
canonicalName = "flag semaphore",
systems = {"pictography"},
}
m["Sgnw"] = {
canonicalName = "SignWriting",
characters = "𝠀-𝪯",
systems = {"pictography"},
}
m["Shaw"] = {
canonicalName = "Shavian",
characters = "𐑐-𐑿",
}
m["Shrd"] = {
canonicalName = "Sharada",
characters = "𑆀-𑇙",
systems = {"abugida"},
}
m["Sidd"] = {
canonicalName = "Siddham",
characters = "𑖀-𑗝",
systems = {"abugida"},
}
m["Sind"] = {
canonicalName = "Khudawadi",
characters = "𑊰-𑋹",
systems = {"abugida"},
}
m["Sinh"] = {
canonicalName = "Sinhalese",
characters = "ං-෴",
systems = {"abugida"},
}
m["Sogd"] = {
canonicalName = "Sogdian",
characters = "𐼰-𐽙",
direction = "rtl",
systems = {"abjad"},
}
m["Sogo"] = {
canonicalName = "Old Sogdian",
characters = "𐼀-𐼧",
direction = "rtl",
systems = {"abjad"},
}
m["Sora"] = {
canonicalName = "Sorang Sompeng",
otherNames = {"Sora Sompeng"},
characters = "𑃐-𑃹",
}
m["Soyo"] = {
canonicalName = "Soyombo",
characters = "𑩐-𑪢",
systems = {"abugida"},
}
m["Sund"] = {
canonicalName = "Sundanese",
characters = "ᮀ-ᮿ",
systems = {"abugida"},
}
m["Sylo"] = {
canonicalName = "Syloti Nagri",
otherNames = {"Sylheti Nagari"},
characters = "ꠀ-꠫",
systems = {"abugida"},
}
m["Syrc"] = {
canonicalName = "Syriac",
characters = "܀-ݏ"..u(0x0860).."-"..u(0x086A),
direction = "rtl",
systems = {"abjad"}, -- more precisely, impure abjad
}
-- Syre, Syrj, Syrn are apparently subsumed into Syrc; discuss if this causes issues
m["Tagb"] = {
canonicalName = "Tagbanwa",
characters = "ᝠ-ᝳ",
systems = {"abugida"},
}
m["Takr"] = {
canonicalName = "Takri",
characters = "𑚀-𑛉",
systems = {"abugida"},
}
m["Tale"] = {
canonicalName = "Tai Nüa",
otherNames = {"Tai Nuea", "New Tai Nüa", "New Tai Nuea", "Dehong Dai", "Tai Dehong", "Tai Le"},
characters = "ᥐ-ᥴ",
systems = {"abugida"},
}
m["Talu"] = {
canonicalName = "New Tai Lue",
characters = "ᦀ-᧟",
systems = {"abugida"},
}
m["Taml"] = {
canonicalName = "Tamil",
characters = "ஂ-௺",
systems = {"abugida"},
}
m["Tang"] = {
canonicalName = "Tangut",
characters = "𖿠𗀀-𘫲",
systems = {"logography", "syllabary"},
}
m["Tavt"] = {
canonicalName = "Tai Viet",
characters = "ꪀ-꫟",
systems = {"abugida"},
}
m["Telu"] = {
canonicalName = "Telugu",
characters = "ఀ-౿",
systems = {"abugida"},
}
m["Teng"] = {
canonicalName = "Tengwar",
}
m["Tfng"] = {
canonicalName = "Tifinagh",
otherNames = {"Libyco-Berber", "Berber"},
characters = "ⴰ-⵿",
systems = {"abjad", "alphabet"},
}
m["Tglg"] = {
canonicalName = "Baybayin",
otherNames = {"Tagalog"},
characters = "ᜀ-᜔",
systems = {"abugida"},
}
m["Thaa"] = {
canonicalName = "Thaana",
characters = "ހ-ޱ",
systems = {"abugida"},
direction = "rtl",
}
m["Thai"] = {
canonicalName = "Thai",
characters = "ก-๛",
systems = {"abugida"},
}
m["Tibt"] = {
canonicalName = "Tibetan",
characters = "ༀ-࿚",
systems = {"abugida"},
}
m["Tirh"] = {
canonicalName = "Tirhuta",
characters = "𑒀-𑓙",
systems = {"abugida"},
}
m["xzh-Tibt"] = {
canonicalName = "Zhang-Zhung",
systems = {"abugida"},
}
m["Todj"] = {
canonicalName = "Todjydheenil",
systems = {"alphabet"},
}
m["Ugar"] = {
canonicalName = "Ugaritic",
characters = "𐎀-𐎟",
systems = {"abjad"},
}
m["Vaii"] = {
canonicalName = "Vai",
characters = "ꔀ-ꘫ",
systems = {"syllabary"},
}
m["Vkht"] = {
canonicalName = "Vukht",
systems = {"abjad"},
}
m["Wara"] = {
canonicalName = "Varang Kshiti",
characters = "𑢠-𑣿",
}
m["Wlqr"] = {
canonicalName = "Welqor",
systems = {"abjad"},
}
m["Xpeo"] = {
canonicalName = "Old Persian",
characters = "𐎠-𐏕",
}
m["Xsux"] = {
canonicalName = "Cuneiform",
otherNames = {"Sumero-Akkadian Cuneiform"},
characters = "𒀀-𒍮𒐀-𒑳",
}
m["Yesu"] = {
canonicalName = "Yesuthoh",
systems = {"alphabet"},
}
m["Yiii"] = {
canonicalName = "Yi",
characters = "ꀀ-꓆",
systems = {"syllabary"},
}
m["Zadu"] = {
canonicalName = "Zaduusel",
systems = {"alphabet"},
}
m["Zanb"] = {
canonicalName = "Zanabazar Square",
characters = u(0x11A00).."-"..u(0x11A47),
}
m["Zmth"] = {
canonicalName = "mathematical notation",
characters = "ℵ∀-⋿⟀-⟯⦀-⫿𝐀-𝟿",
character_category = "Mathematical notation symbols", -- ?
}
m["Zsym"] = {
canonicalName = "symbol",
characters = "─-➿←-⇿⌀-⏿⬀-⯾🀀-🃵🌀-🩭",
character_category = false, -- none
systems = {"pictography"},
}
m["Zyyy"] = {
canonicalName = "undetermined",
-- This should not have any characters listed, probably
character_category = false, -- none
characters = m["Latn"].characters,
}
m["Zzzz"] = {
canonicalName = "uncoded",
-- This should not have any characters listed
character_category = false, -- none
}
return m