Module:links/data: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
Tag: Reverted
(Undo revision 320507 by Sware (talk))
Tag: Undo
 
Line 1: Line 1:
local encode = mw.text.encode
local u = mw.ustring.char
local data = {}
local data = {}


data.ignore_cap = {
data.high_memory_entries = {
["ko"] = true,
"a",
"animal",
"book",
"coffee",
"do",
"e",
"language",
"night",
"smoke",
"son",
"sun",
"water",
"wind",
}
}


data.phonetic_extraction = {
["th"] = "Module:th",
["km"] = "Module:km",
}


data.pos_tags = {
local U = mw.ustring.char
["a"] = "adjective",
local soft_hyphen = U(0xAD)
["adv"] = "adverb",
["int"] = "interjection",
["n"] = "noun",
["pron"] = "pronoun",
["v"] = "verb",
["vi"] = "intransitive verb",
["vt"] = "transitive verb",
["vti"] = "transitive and intransitive verb",
}


--[[ The "actual title" is the page name with the prefix "Unsupported titles/" removed.
--[[ The "actual title" is the page name with the prefix "Unsupported titles/" removed.
Line 28: Line 25:
data.unsupported_titles = {
data.unsupported_titles = {
[" "] = "Space",
[" "] = "Space",
["# #"] = "Enclosing number signs",
["{"] = "Left curly bracket",
["#"] = "Number sign",
["}"] = "Right curly bracket",
["#MeToo"] = "MeToo",
["#MeTooed"] = "MeTooed",
["#MeTooing"] = "MeTooing",
["#MeToos"] = "MeToos",
["&"] = "Amp",
["¯\\_(ツ)_/¯"] = "¯\\ (ツ) /¯",
["¯_(ツ)_/¯"] = "¯ (ツ) /¯",
["(^_^)"] = "(^ ^)",
["*_*"] = "* *",
["."] = "Full stop",
[".."] = "Double period",
["./."] = "Period slash period",
[": :"] = ": :",
[":"] = ":",
[":-{"] = "Colon hyphen left curly bracket",
[":{"] = "Colon left curly bracket",
[":|"] = "Colon vertical line",
["=_="] = "= =",
["[ ]"] = "Square brackets",
["["] = "Left square bracket",
["["] = "Left square bracket",
["[…]"] = "Square bracketed ellipsis",
["[...]"] = "Left square bracket ... right square bracket",
["[-0-]"] = "Australian Aboriginal Flag emoticon alternative",
["[citation needed]"] = "Square bracketed citation needed",
["[-o-]"] = "Australian Aboriginal Flag emoticon",
["]"] = "Right square bracket",
["]"] = "Right square bracket",
["^_^"] = "^ ^",
["_ _"] = "Underscore space underscore",
["-_-"] = "- -",
["_"] = "Underscore",
["{ }"] = "Curly brackets",
["{"] = "Left curly bracket",
["| |"] = "Enclosing vertical lines",
["|"] = "Vertical line",
["-||-"] = "Hyphen vertical line vertical line hyphen",
["||"] = "Vertical line vertical line",
["}"] = "Right curly bracket",
["</s>"] = "End s tag",
["< />"] = "Less than trailing slash greater than",
["< > </ >"] = "HTML start tag end tag",
["< >"] = "Enclosing less than greater than",
["<!-- -->"] = "HTML comment",
["<-"] = "Less than hyphen",
["<"] = "Less than",
["<"] = "Less than",
["</3"] = "Less than slash three",
[">"] = "Greater than",
["<\\3"] = "Less than backslash three",
["=<"] = "Equal less than",
["<<"] = "Double less than",
["=>"] = "Equal greater than",
["<<<"] = "Triple less than",
[">="] = "Greater than equal",
["<="] = "Less than equal",
["<="] = "Less than equal",
["->"] = "Hyphen greater than",
["<-"] = "Less than hyphen",
[">_<"] = "Greater than low line less than",
["::"] = "Double colon",
[": :"] = "Enclosing colons",
[":="] = "Colon equals",
[":Þ"] = "Colon capital thorn",
[":("] = "Colon left paren",
[":)"] = "Colon right paren",
["<>"] = "Less than greater than",
["<>"] = "Less than greater than",
["<3"] = "Less than three",
["<3"] = "Less than three",
["</3"] = "Less than slash three",
["< >"] = "Enclosing less than greater than",
["< />"] = "Less than trailing slash greater than",
["< > </ >"] = "HTML start tag end tag",
["<!-- -->"] = "HTML comment",
["<g>"] = "g tag",
["<g>"] = "g tag",
["=<"] = "Equal less than",
[":-("] = "Colon hyphen left paren",
["=>"] = "Equal greater than",
[":-)"] = "Colon hyphen right paren",
[">"] = "Greater than",
["|"] = "Vertical line",
["->"] = "Hyphen greater than",
["||"] = "Vertical line vertical line",
[">_<"] = "Greater than low line less than",
["| |"] = "Enclosing vertical lines",
[">="] = "Greater than equal",
[">>"] = "Double greater than",
[">>>"] = "Triple greater than",
["×_×"] = "× ×",
["9_9"] = "9 9",
["C#"] = "C sharp",
["C#"] = "C sharp",
["#"] = "Number sign",
["# #"] = "Enclosing number signs",
[":"] = "Colon",
[".."] = "Double period",
["."] = "Full stop",
["_"] = "Low line",
["-_-"] = "Low line interfix",
[U(0xFFFD)] = "Replacement character",
[U(0x1680)] = "Ogham space",
["[ ]"] = "Square brackets",
["{ }"] = "Curly brackets",
["[…]"] = "Square bracketed ellipsis",
["_ _"] = "Enclosing low lines",
["C|N>K"] = "C through N to K",
["C|N>K"] = "C through N to K",
["eq #"] = "eq number sign",
["#MeToo"] = "MeToo",
["f##k"] = "f double number sign k",
["f##ked"] = "f double number sign ked",
["f##king"] = "f double number sign king",
["f##ks"] = "f double number sign ks",
["hr #"] = "hr number sign",
["n_n"] = "n n",
["O_O"] = "O O",
["O_o"] = "O o",
["o_O"] = "o O",
["o_o"] = "o o",
["snake_case"] = "snake case",
["T_T"] = "T T",
["u_u"] = "u u",
["X_X"] = "X X",
["x_x"] = "x x",
["x86_64"] = "x86 64",
["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλεπικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish",
["о/."] = "о slash dot",
["о/."] = "о slash dot",
["ಠ_ಠ"] = "ಠ ಠ",
["ಥ_ಥ"] = "ಥ ಥ",
["┬─┬ノ( º _ ºノ)"] = "┬─┬ノ( º ºノ)",
["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok",
["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok",
[u(0x1680)] = "Ogham space",
["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλ" .. soft_hyphen .. "επικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish",
[u(0x3000)] = "Ideographic space",
[":≠"] = ":≠",
[u(0xFFFD)] = "Replacement character",
["S:t"] = "S:t",
}
["S:ta"] = "S:ta",
 
["c:a"] = "c:a",
data.display_change = {
["n:a"] = "n:a",
[" "] = "] [", -- Space
["n:o"] = "n:o",
[u(0x00A0)] = "]" .. u(0x00A0) .. "[", -- No-break space
["n:r"] = "n:r",
[u(0x180E)] = "]" .. u(0x180E) .. "[", -- Mongolian vowel separator
["s:a"] = "s:a",
[u(0x2000)] = "]" .. u(0x2000) .. "[", -- En quad
["st:a"] = "st:a",
[u(0x2001)] = "]" .. u(0x2001) .. "[", -- Em quad
["v:a"] = "v:a",
[u(0x2002)] = "]" .. u(0x2002) .. "[", -- En space
[u(0x2003)] = "]" .. u(0x2003) .. "[", -- Em space
[u(0x2004)] = "]" .. u(0x2004) .. "[", -- Three-per-em space
[u(0x2005)] = "]" .. u(0x2005) .. "[", -- Four-per-em space
[u(0x2006)] = "]" .. u(0x2006) .. "[", -- Six-per-em space
[u(0x2007)] = "]" .. u(0x2007) .. "[", -- Figure space
[u(0x2008)] = "]" .. u(0x2008) .. "[", -- Punctuation space
[u(0x2009)] = "]" .. u(0x2009) .. "[", -- Thin space
[u(0x200A)] = "]" .. u(0x200A) .. "[", -- Hair space
[u(0x202F)] = "]" .. u(0x202F) .. "[", -- Narrow no-break space
[u(0x205F)] = "]" .. u(0x205F) .. "[", -- Medium mathematical space
[u(0x3000)] = "]" .. u(0x3000) .. "[", -- Ideographic space
}
}


-- Valid URI schemes in external links, which therefore have to be escaped if used in entry names (e.g. [[sms:a]]).
for i, item in ipairs(data.high_memory_entries) do
local uri_schemes = {
data.high_memory_entries[i] = nil
"bitcoin:",
data.high_memory_entries[item] = true
"ftp://",
"ftps://",
"geo:",
"git://",
"gopher://",
"http://",
"https://",
"irc:",
"ircs:",
"magnet:",
"mailto:",
"mms://",
"news:",
"nntp://",
"redis://",
"sftp://",
"sip:",
"sips:",
"sms:",
"ssh://",
"svn://",
"tel:",
"telnet://",
"urn:",
"worldwind://",
"xmpp:",
}
-- Convert into lookup table.
local uri_lookup = {}
for _, scheme in ipairs(uri_schemes) do
uri_lookup[scheme] = encode(scheme, ":")
end
end
data.uri_schemes = uri_lookup


return data
return data

Latest revision as of 20:29, 31 July 2023



local data = {}

data.high_memory_entries = {
	"a",
	"animal",
	"book",
	"coffee",
	"do",
	"e",
	"language",
	"night",
	"smoke",
	"son",
	"sun",
	"water",
	"wind",
}


local U = mw.ustring.char
local soft_hyphen = U(0xAD)

--[[	The "actual title" is the page name with the prefix "Unsupported titles/" removed.
		["displayed_title"] = "actual title"	]]
data.unsupported_titles = {
	[" "] = "Space",
	["{"] = "Left curly bracket",
	["}"] = "Right curly bracket",
	["["] = "Left square bracket",
	["]"] = "Right square bracket",
	["<"] = "Less than",
	[">"] = "Greater than",
	["=<"] = "Equal less than",
	["=>"] = "Equal greater than",
	[">="] = "Greater than equal",
	["<="] = "Less than equal",
	["->"] = "Hyphen greater than",
	["<-"] = "Less than hyphen",
	[">_<"] = "Greater than low line less than",
	["::"] = "Double colon",
	[": :"] = "Enclosing colons",
	[":="] = "Colon equals",
	[":Þ"] = "Colon capital thorn",
	[":("] = "Colon left paren",
	[":)"] = "Colon right paren",
	["<>"] = "Less than greater than",
	["<3"] = "Less than three",
	["</3"] = "Less than slash three",
	["< >"] = "Enclosing less than greater than",
	["< />"] = "Less than trailing slash greater than",
	["< > </ >"] = "HTML start tag end tag",
	["<!-- -->"] = "HTML comment",
	["<g>"] = "g tag",
	[":-("] = "Colon hyphen left paren",
	[":-)"] = "Colon hyphen right paren",
	["|"] = "Vertical line",
	["||"] = "Vertical line vertical line",
	["| |"] = "Enclosing vertical lines",
	["C#"] = "C sharp",
	["#"] = "Number sign",
	["# #"] = "Enclosing number signs",
	[":"] = "Colon",
	[".."] = "Double period",
	["."] = "Full stop",
	["_"] = "Low line",
	["-_-"] = "Low line interfix",
	[U(0xFFFD)] = "Replacement character",
	[U(0x1680)] = "Ogham space",
	["[ ]"] = "Square brackets",
	["{ }"] = "Curly brackets",
	["[…]"] = "Square bracketed ellipsis",
	["_ _"] = "Enclosing low lines",
	["C|N>K"] = "C through N to K",
	["#MeToo"] = "MeToo",
	["о/."] = "о slash dot",
	["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok",
	["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλ" .. soft_hyphen .. "επικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish",
	[":≠"] = ":≠",
	["S:t"] = "S:t",
	["S:ta"] = "S:ta",
	["c:a"] = "c:a",
	["n:a"] = "n:a",
	["n:o"] = "n:o",
	["n:r"] = "n:r",
	["s:a"] = "s:a",
	["st:a"] = "st:a",
	["v:a"] = "v:a",
}

for i, item in ipairs(data.high_memory_entries) do
	data.high_memory_entries[i] = nil
	data.high_memory_entries[item] = true
end

return data