45,647
edits
Chrysophylax (talk | contribs) (Created page with "local data = {} data.high_memory_entries = { "a", "animal", "book", "coffee", "do", "e", "language", "night", "smoke", "son", "sun", "water", "wind", } local U...") |
No edit summary Tag: Reverted |
||
Line 1: | Line 1: | ||
local encode = mw.text.encode | |||
local u = mw.ustring.char | |||
local data = {} | local data = {} | ||
data. | data.ignore_cap = { | ||
" | ["ko"] = true, | ||
} | } | ||
data.phonetic_extraction = { | |||
["th"] = "Module:th", | |||
["km"] = "Module:km", | |||
} | |||
data.pos_tags = { | |||
["a"] = "adjective", | |||
["adv"] = "adverb", | |||
["int"] = "interjection", | |||
["n"] = "noun", | |||
["pron"] = "pronoun", | |||
["v"] = "verb", | |||
["vi"] = "intransitive verb", | |||
["vt"] = "transitive verb", | |||
["vti"] = "transitive and intransitive verb", | |||
} | |||
--[[ The "actual title" is the page name with the prefix "Unsupported titles/" removed. | --[[ The "actual title" is the page name with the prefix "Unsupported titles/" removed. | ||
Line 25: | Line 28: | ||
data.unsupported_titles = { | data.unsupported_titles = { | ||
[" "] = "Space", | [" "] = "Space", | ||
["# #"] = "Enclosing number signs", | |||
["#"] = "Number sign", | |||
["#MeToo"] = "MeToo", | |||
["#MeTooed"] = "MeTooed", | |||
["#MeTooing"] = "MeTooing", | |||
["#MeToos"] = "MeToos", | |||
["&"] = "Amp", | |||
["¯\\_(ツ)_/¯"] = "¯\\ (ツ) /¯", | |||
["¯_(ツ)_/¯"] = "¯ (ツ) /¯", | |||
["(^_^)"] = "(^ ^)", | |||
["*_*"] = "* *", | |||
["."] = "Full stop", | |||
[".."] = "Double period", | |||
["./."] = "Period slash period", | |||
[": :"] = ": :", | |||
[":"] = ":", | |||
[":-{"] = "Colon hyphen left curly bracket", | |||
[":{"] = "Colon left curly bracket", | |||
[":|"] = "Colon vertical line", | |||
["=_="] = "= =", | |||
["[ ]"] = "Square brackets", | |||
["["] = "Left square bracket", | |||
["[…]"] = "Square bracketed ellipsis", | |||
["[...]"] = "Left square bracket ... right square bracket", | |||
["[-0-]"] = "Australian Aboriginal Flag emoticon alternative", | |||
["[citation needed]"] = "Square bracketed citation needed", | |||
["[-o-]"] = "Australian Aboriginal Flag emoticon", | |||
["]"] = "Right square bracket", | |||
["^_^"] = "^ ^", | |||
["_ _"] = "Underscore space underscore", | |||
["-_-"] = "- -", | |||
["_"] = "Underscore", | |||
["{ }"] = "Curly brackets", | |||
["{"] = "Left curly bracket", | ["{"] = "Left curly bracket", | ||
["| |"] = "Enclosing vertical lines", | |||
["|"] = "Vertical line", | |||
["-||-"] = "Hyphen vertical line vertical line hyphen", | |||
["||"] = "Vertical line vertical line", | |||
["}"] = "Right curly bracket", | ["}"] = "Right curly bracket", | ||
["["] = " | ["</s>"] = "End s tag", | ||
["]"] = " | ["< />"] = "Less than trailing slash greater than", | ||
["< > </ >"] = "HTML start tag end tag", | |||
["< >"] = "Enclosing less than greater than", | |||
["<!-- -->"] = "HTML comment", | |||
["<-"] = "Less than hyphen", | |||
["<"] = "Less than", | ["<"] = "Less than", | ||
[">"] = " | ["</3"] = "Less than slash three", | ||
["<\\3"] = "Less than backslash three", | |||
["<<"] = "Double less than", | |||
["<<<"] = "Triple less than", | |||
["<="] = "Less than equal", | |||
["<>"] = "Less than greater than", | |||
["<3"] = "Less than three", | |||
["<g>"] = "g tag", | |||
["=<"] = "Equal less than", | ["=<"] = "Equal less than", | ||
["=>"] = "Equal greater than", | ["=>"] = "Equal greater than", | ||
["> | [">"] = "Greater than", | ||
["->"] = "Hyphen greater than", | ["->"] = "Hyphen greater than", | ||
[">_<"] = "Greater than low line less than", | [">_<"] = "Greater than low line less than", | ||
[" | [">="] = "Greater than equal", | ||
[">>"] = "Double greater than", | |||
[">>>"] = "Triple greater than", | |||
["×_×"] = "× ×", | |||
["9_9"] = "9 9", | |||
[" | |||
[" | |||
[" | |||
[" | |||
["C#"] = "C sharp", | ["C#"] = "C sharp", | ||
["C|N>K"] = "C through N to K", | ["C|N>K"] = "C through N to K", | ||
["# | ["eq #"] = "eq number sign", | ||
["f##k"] = "f double number sign k", | |||
["f##ked"] = "f double number sign ked", | |||
["f##king"] = "f double number sign king", | |||
["f##ks"] = "f double number sign ks", | |||
["hr #"] = "hr number sign", | |||
["n_n"] = "n n", | |||
["O_O"] = "O O", | |||
["O_o"] = "O o", | |||
["o_O"] = "o O", | |||
["o_o"] = "o o", | |||
["snake_case"] = "snake case", | |||
["T_T"] = "T T", | |||
["u_u"] = "u u", | |||
["X_X"] = "X X", | |||
["x_x"] = "x x", | |||
["x86_64"] = "x86 64", | |||
["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλεπικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish", | |||
["о/."] = "о slash dot", | ["о/."] = "о slash dot", | ||
["ಠ_ಠ"] = "ಠ ಠ", | |||
["ಥ_ಥ"] = "ಥ ಥ", | |||
["┬─┬ノ( º _ ºノ)"] = "┬─┬ノ( º ºノ)", | |||
["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok", | ["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok", | ||
[" | [u(0x1680)] = "Ogham space", | ||
[" | [u(0x3000)] = "Ideographic space", | ||
[" | [u(0xFFFD)] = "Replacement character", | ||
[" | } | ||
[" | |||
[" | data.display_change = { | ||
[" | [" "] = "] [", -- Space | ||
[" | [u(0x00A0)] = "]" .. u(0x00A0) .. "[", -- No-break space | ||
[" | [u(0x180E)] = "]" .. u(0x180E) .. "[", -- Mongolian vowel separator | ||
[" | [u(0x2000)] = "]" .. u(0x2000) .. "[", -- En quad | ||
[" | [u(0x2001)] = "]" .. u(0x2001) .. "[", -- Em quad | ||
[u(0x2002)] = "]" .. u(0x2002) .. "[", -- En space | |||
[u(0x2003)] = "]" .. u(0x2003) .. "[", -- Em space | |||
[u(0x2004)] = "]" .. u(0x2004) .. "[", -- Three-per-em space | |||
[u(0x2005)] = "]" .. u(0x2005) .. "[", -- Four-per-em space | |||
[u(0x2006)] = "]" .. u(0x2006) .. "[", -- Six-per-em space | |||
[u(0x2007)] = "]" .. u(0x2007) .. "[", -- Figure space | |||
[u(0x2008)] = "]" .. u(0x2008) .. "[", -- Punctuation space | |||
[u(0x2009)] = "]" .. u(0x2009) .. "[", -- Thin space | |||
[u(0x200A)] = "]" .. u(0x200A) .. "[", -- Hair space | |||
[u(0x202F)] = "]" .. u(0x202F) .. "[", -- Narrow no-break space | |||
[u(0x205F)] = "]" .. u(0x205F) .. "[", -- Medium mathematical space | |||
[u(0x3000)] = "]" .. u(0x3000) .. "[", -- Ideographic space | |||
} | } | ||
-- Valid URI schemes in external links, which therefore have to be escaped if used in entry names (e.g. [[sms:a]]). | |||
local uri_schemes = { | |||
"bitcoin:", | |||
"ftp://", | |||
"ftps://", | |||
"geo:", | |||
"git://", | |||
"gopher://", | |||
"http://", | |||
"https://", | |||
"irc:", | |||
"ircs:", | |||
"magnet:", | |||
"mailto:", | |||
"mms://", | |||
"news:", | |||
"nntp://", | |||
"redis://", | |||
"sftp://", | |||
"sip:", | |||
"sips:", | |||
"sms:", | |||
"ssh://", | |||
"svn://", | |||
"tel:", | |||
"telnet://", | |||
"urn:", | |||
"worldwind://", | |||
"xmpp:", | |||
} | |||
-- Convert into lookup table. | |||
local uri_lookup = {} | |||
for _, scheme in ipairs(uri_schemes) do | |||
uri_lookup[scheme] = encode(scheme, ":") | |||
end | end | ||
data.uri_schemes = uri_lookup | |||
return data | return data |