Module:template parser/data: Difference between revisions
Jump to navigation
Jump to search
(Created page with "local string = string local gsub = string.gsub local load_data = mw.loadData local pairs = pairs local upper = string.upper local data = {} do local magic_words = load_data("Module:data/magic words") local parser_functions_s = {} local parser_functions_i = {} local parser_variables_s = {} local parser_variables_i = {} local transclusion_modifiers = {} for k, v in pairs(magic_words) do local name, s = v.name, v.case_sensitive if v.parser_function then...") |
No edit summary |
||
| Line 1: | Line 1: | ||
local string = string | local string = string | ||
local case_insensitive_pattern = require("Module:string utilities").case_insensitive_pattern | |||
local gsub = string.gsub | local gsub = string.gsub | ||
local upper = string.upper | local upper = string.upper | ||
| Line 9: | Line 8: | ||
do | do | ||
local | local tags = mw.loadData("Module:data/parser extension tags") | ||
local data_end_tags = {} | |||
local | |||
-- Generates the string pattern for the end tag. | -- Generates the string pattern for the end tag. | ||
-- The preprocessor uses the regex "/<\/TAG\s*>/i", so only ASCII characters | |||
-- are case-insensitive. | |||
local function end_tag_pattern(tag) | local function end_tag_pattern(tag) | ||
data_end_tags[tag] = "</" .. case_insensitive_pattern(tag, nil, true) .. "%s*>" | |||
end | end | ||
for tag in pairs(tags) do | for tag in pairs(tags) do | ||
end_tag_pattern(tag) | |||
end | end | ||
end_tag_pattern("includeonly") | |||
end_tag_pattern("noinclude") | |||
data_end_tags["onlyinclude"] = true -- Pattern is not required, but a key is needed for tag validity checks. | |||
data. | data.end_tags = data_end_tags | ||
end | end | ||
-- | -- Character escapes from PHP's htmlspecialchars. | ||
data. | data.php_htmlspecialchars = { | ||
["\""] = """, | |||
" | ["&"] = "&", | ||
" | ["'"] = "'", | ||
" | ["<"] = "<", | ||
" | [">"] = ">", | ||
} | |||
-- The parser's HTML sanitizer validates tag attributes with the regex | |||
-- "/^([:_\p{L}\p{N}][:_\.\-\p{L}\p{N}]*)$/sxu". Ustring's "%w" is defined as | |||
-- "[\p{L}\p{Nd}]", so any characters in \p{N} but not \p{Nd} must be added | |||
-- manually. | |||
-- NOTE: \p{N} *MUST* be defined according to the same version of Unicode that | |||
-- the sanitizer uses in order to remain in sync. As of September 2024, this is | |||
-- version 11.0. | |||
local N_not_Nd = "\194\178" .. -- U+00B2 | |||
"\194\179" .. -- U+00B3 | |||
"\194\185" .. -- U+00B9 | |||
"\194\188-\194\190" .. -- U+00BC-U+00BE | |||
"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9 | |||
"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77 | |||
"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2 | |||
"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E | |||
"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E | |||
"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78 | |||
"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33 | |||
"\225\141\169-\225\141\188" .. -- U+1369-U+137C | |||
"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0 | |||
"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9 | |||
"\225\167\154" .. -- U+19DA | |||
"\226\129\176" .. -- U+2070 | |||
"\226\129\180-\226\129\185" .. -- U+2074-U+2079 | |||
"\226\130\128-\226\130\137" .. -- U+2080-U+2089 | |||
"\226\133\144-\226\134\130" .. -- U+2150-U+2182 | |||
"\226\134\133-\226\134\137" .. -- U+2185-U+2189 | |||
"\226\145\160-\226\146\155" .. -- U+2460-U+249B | |||
"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF | |||
"\226\157\182-\226\158\147" .. -- U+2776-U+2793 | |||
"\226\179\189" .. -- U+2CFD | |||
"\227\128\135" .. -- U+3007 | |||
"\227\128\161-\227\128\169" .. -- U+3021-U+3029 | |||
"\227\128\184-\227\128\186" .. -- U+3038-U+303A | |||
"\227\134\146-\227\134\149" .. -- U+3192-U+3195 | |||
"\227\136\160-\227\136\169" .. -- U+3220-U+3229 | |||
"\227\137\136-\227\137\143" .. -- U+3248-U+324F | |||
"\227\137\145-\227\137\159" .. -- U+3251-U+325F | |||
"\227\138\128-\227\138\137" .. -- U+3280-U+3289 | |||
"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF | |||
"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF | |||
"\234\160\176-\234\160\181" .. -- U+A830-U+A835 | |||
"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133 | |||
"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178 | |||
"\240\144\134\138" .. -- U+1018A | |||
"\240\144\134\139" .. -- U+1018B | |||
"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB | |||
"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323 | |||
"\240\144\141\129" .. -- U+10341 | |||
"\240\144\141\138" .. -- U+1034A | |||
"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5 | |||
"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F | |||
"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F | |||
"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF | |||
"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF | |||
"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B | |||
"\240\144\166\188" .. -- U+109BC | |||
"\240\144\166\189" .. -- U+109BD | |||
"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF | |||
"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF | |||
"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48 | |||
"\240\144\169\189" .. -- U+10A7D | |||
"\240\144\169\190" .. -- U+10A7E | |||
"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F | |||
"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF | |||
"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F | |||
"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F | |||
"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF | |||
"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF | |||
"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E | |||
"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26 | |||
"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54 | |||
"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065 | |||
"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4 | |||
"\240\145\156\186" .. -- U+1173A | |||
"\240\145\156\187" .. -- U+1173B | |||
"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2 | |||
"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C | |||
"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E | |||
"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61 | |||
"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96 | |||
"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3 | |||
"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378 | |||
"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF | |||
"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB | |||
"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF | |||
"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4 | |||
"\240\159\132\128-\240\159\132\140" -- U+1F100-U+1F10C | |||
data.valid_attribute_name = "^[:_%w" .. N_not_Nd .."][:_.%-%w" .. N_not_Nd .. "]*$" | |||
-- Value is the namespace number of the linked page at parameter 0, where 0 is mainspace. | |||
-- If the namespace is the mainspace, it can be overridden by an explicitly specified category (e.g. {{PAGENAME:Category:Foo}} refers to "Category:Foo"). This does not apply to any other namespace (e.g. {{#SPECIAL:Category:Foo}} refers to "Special:Category:Foo"). | |||
data.template_link_param_1 = { | |||
["#CATEGORYTREE:"] = 14, -- Category: | |||
["#IFEXIST:"] = 0, | |||
["#INVOKE:"] = 828, -- Module: | |||
["#LST:"] = 0, | |||
["#LSTH:"] = 0, | |||
["#LSTX:"] = 0, | |||
["#SPECIAL:"] = -1, -- Special: | |||
["#SPECIALE:"] = -1, -- Special: | |||
["#TITLEPARTS:"] = 0, | |||
["BASEPAGENAME:"] = 0, | |||
["BASEPAGENAMEE:"] = 0, | |||
["CANONICALURL:"] = 0, | |||
["CANONICALURLE:"] = 0, | |||
["CASCADINGSOURCES:"] = 0, | |||
["FILEPATH:"] = 6, -- File: | |||
["FULLPAGENAME:"] = 0, | |||
["FULLPAGENAMEE:"] = 0, | |||
["FULLURL:"] = 0, | |||
["FULLURLE:"] = 0, | |||
["INT:"] = 8, -- MediaWiki: | |||
["LOCALURL:"] = 0, | |||
["LOCALURLE:"] = 0, | |||
["NAMESPACE:"] = 0, | |||
["NAMESPACEE:"] = 0, | |||
["NAMESPACENUMBER:"] = 0, | |||
["PAGEID:"] = 0, | |||
["PAGENAME:"] = 0, | |||
["PAGENAMEE:"] = 0, | |||
["PAGESINCATEGORY:"] = 14, -- Category: | |||
["PAGESIZE:"] = 0, | |||
["REVISIONDAY:"] = 0, | |||
["REVISIONDAY2:"] = 0, | |||
["REVISIONID:"] = 0, | |||
["REVISIONMONTH:"] = 0, | |||
["REVISIONMONTH1:"] = 0, | |||
["REVISIONTIMESTAMP:"] = 0, | |||
["REVISIONUSER:"] = 0, | |||
["REVISIONYEAR:"] = 0, | |||
["ROOTPAGENAME:"] = 0, | |||
["ROOTPAGENAMEE:"] = 0, | |||
["SUBJECTPAGENAME:"] = 0, | |||
["SUBJECTPAGENAMEE:"] = 0, | |||
["SUBJECTSPACE:"] = 0, | |||
["SUBJECTSPACEE:"] = 0, | |||
["SUBPAGENAME:"] = 0, | |||
["SUBPAGENAMEE:"] = 0, | |||
["TALKPAGENAME:"] = 0, | |||
["TALKPAGENAMEE:"] = 0, | |||
["TALKSPACE:"] = 0, | |||
["TALKSPACEE:"] = 0, | |||
} | |||
-- Value is the namespace number of the linked page at parameter 1. | |||
data.template_link_param_2 = { | |||
["PROTECTIONEXPIRY:"] = 0, | |||
["PROTECTIONLEVEL:"] = 0, | |||
} | |||
return data | return data | ||
Latest revision as of 21:16, 8 January 2025
Documentation for this module may be created at Module:template parser/data/doc
local string = string
local case_insensitive_pattern = require("Module:string utilities").case_insensitive_pattern
local gsub = string.gsub
local upper = string.upper
local data = {}
do
local tags = mw.loadData("Module:data/parser extension tags")
local data_end_tags = {}
-- Generates the string pattern for the end tag.
-- The preprocessor uses the regex "/<\/TAG\s*>/i", so only ASCII characters
-- are case-insensitive.
local function end_tag_pattern(tag)
data_end_tags[tag] = "</" .. case_insensitive_pattern(tag, nil, true) .. "%s*>"
end
for tag in pairs(tags) do
end_tag_pattern(tag)
end
end_tag_pattern("includeonly")
end_tag_pattern("noinclude")
data_end_tags["onlyinclude"] = true -- Pattern is not required, but a key is needed for tag validity checks.
data.end_tags = data_end_tags
end
-- Character escapes from PHP's htmlspecialchars.
data.php_htmlspecialchars = {
["\""] = """,
["&"] = "&",
["'"] = "'",
["<"] = "<",
[">"] = ">",
}
-- The parser's HTML sanitizer validates tag attributes with the regex
-- "/^([:_\p{L}\p{N}][:_\.\-\p{L}\p{N}]*)$/sxu". Ustring's "%w" is defined as
-- "[\p{L}\p{Nd}]", so any characters in \p{N} but not \p{Nd} must be added
-- manually.
-- NOTE: \p{N} *MUST* be defined according to the same version of Unicode that
-- the sanitizer uses in order to remain in sync. As of September 2024, this is
-- version 11.0.
local N_not_Nd = "\194\178" .. -- U+00B2
"\194\179" .. -- U+00B3
"\194\185" .. -- U+00B9
"\194\188-\194\190" .. -- U+00BC-U+00BE
"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9
"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77
"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2
"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E
"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E
"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78
"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33
"\225\141\169-\225\141\188" .. -- U+1369-U+137C
"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0
"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9
"\225\167\154" .. -- U+19DA
"\226\129\176" .. -- U+2070
"\226\129\180-\226\129\185" .. -- U+2074-U+2079
"\226\130\128-\226\130\137" .. -- U+2080-U+2089
"\226\133\144-\226\134\130" .. -- U+2150-U+2182
"\226\134\133-\226\134\137" .. -- U+2185-U+2189
"\226\145\160-\226\146\155" .. -- U+2460-U+249B
"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF
"\226\157\182-\226\158\147" .. -- U+2776-U+2793
"\226\179\189" .. -- U+2CFD
"\227\128\135" .. -- U+3007
"\227\128\161-\227\128\169" .. -- U+3021-U+3029
"\227\128\184-\227\128\186" .. -- U+3038-U+303A
"\227\134\146-\227\134\149" .. -- U+3192-U+3195
"\227\136\160-\227\136\169" .. -- U+3220-U+3229
"\227\137\136-\227\137\143" .. -- U+3248-U+324F
"\227\137\145-\227\137\159" .. -- U+3251-U+325F
"\227\138\128-\227\138\137" .. -- U+3280-U+3289
"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF
"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF
"\234\160\176-\234\160\181" .. -- U+A830-U+A835
"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133
"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178
"\240\144\134\138" .. -- U+1018A
"\240\144\134\139" .. -- U+1018B
"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB
"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323
"\240\144\141\129" .. -- U+10341
"\240\144\141\138" .. -- U+1034A
"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5
"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F
"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F
"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF
"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF
"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B
"\240\144\166\188" .. -- U+109BC
"\240\144\166\189" .. -- U+109BD
"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF
"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF
"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48
"\240\144\169\189" .. -- U+10A7D
"\240\144\169\190" .. -- U+10A7E
"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F
"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF
"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F
"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F
"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF
"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF
"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E
"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26
"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54
"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065
"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4
"\240\145\156\186" .. -- U+1173A
"\240\145\156\187" .. -- U+1173B
"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2
"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C
"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E
"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61
"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96
"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3
"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378
"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF
"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB
"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF
"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4
"\240\159\132\128-\240\159\132\140" -- U+1F100-U+1F10C
data.valid_attribute_name = "^[:_%w" .. N_not_Nd .."][:_.%-%w" .. N_not_Nd .. "]*$"
-- Value is the namespace number of the linked page at parameter 0, where 0 is mainspace.
-- If the namespace is the mainspace, it can be overridden by an explicitly specified category (e.g. {{PAGENAME:Category:Foo}} refers to "Category:Foo"). This does not apply to any other namespace (e.g. {{#SPECIAL:Category:Foo}} refers to "Special:Category:Foo").
data.template_link_param_1 = {
["#CATEGORYTREE:"] = 14, -- Category:
["#IFEXIST:"] = 0,
["#INVOKE:"] = 828, -- Module:
["#LST:"] = 0,
["#LSTH:"] = 0,
["#LSTX:"] = 0,
["#SPECIAL:"] = -1, -- Special:
["#SPECIALE:"] = -1, -- Special:
["#TITLEPARTS:"] = 0,
["BASEPAGENAME:"] = 0,
["BASEPAGENAMEE:"] = 0,
["CANONICALURL:"] = 0,
["CANONICALURLE:"] = 0,
["CASCADINGSOURCES:"] = 0,
["FILEPATH:"] = 6, -- File:
["FULLPAGENAME:"] = 0,
["FULLPAGENAMEE:"] = 0,
["FULLURL:"] = 0,
["FULLURLE:"] = 0,
["INT:"] = 8, -- MediaWiki:
["LOCALURL:"] = 0,
["LOCALURLE:"] = 0,
["NAMESPACE:"] = 0,
["NAMESPACEE:"] = 0,
["NAMESPACENUMBER:"] = 0,
["PAGEID:"] = 0,
["PAGENAME:"] = 0,
["PAGENAMEE:"] = 0,
["PAGESINCATEGORY:"] = 14, -- Category:
["PAGESIZE:"] = 0,
["REVISIONDAY:"] = 0,
["REVISIONDAY2:"] = 0,
["REVISIONID:"] = 0,
["REVISIONMONTH:"] = 0,
["REVISIONMONTH1:"] = 0,
["REVISIONTIMESTAMP:"] = 0,
["REVISIONUSER:"] = 0,
["REVISIONYEAR:"] = 0,
["ROOTPAGENAME:"] = 0,
["ROOTPAGENAMEE:"] = 0,
["SUBJECTPAGENAME:"] = 0,
["SUBJECTPAGENAMEE:"] = 0,
["SUBJECTSPACE:"] = 0,
["SUBJECTSPACEE:"] = 0,
["SUBPAGENAME:"] = 0,
["SUBPAGENAMEE:"] = 0,
["TALKPAGENAME:"] = 0,
["TALKPAGENAMEE:"] = 0,
["TALKSPACE:"] = 0,
["TALKSPACEE:"] = 0,
}
-- Value is the namespace number of the linked page at parameter 1.
data.template_link_param_2 = {
["PROTECTIONEXPIRY:"] = 0,
["PROTECTIONLEVEL:"] = 0,
}
return data