Module:template parser: Difference between revisions

(Created page with "--[[ NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary. Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by...")
 
m (1 revision imported)
 
(2 intermediate revisions by 2 users not shown)
Line 6: Line 6:
It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly.
It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly.
]]
]]
local export = {}
local data_module = "Module:template parser/data"
local load_module = "Module:load"
local magic_words_data_module = "Module:data/magic words"
local pages_module = "Module:pages"
local parser_extension_tags_data_module = "Module:data/parser extension tags"
local parser_module = "Module:parser"
local scribunto_module = "Module:Scribunto"
local string_pattern_escape_module = "Module:string/patternEscape"
local string_replacement_escape_module = "Module:string/replacementEscape"
local string_utilities_module = "Module:string utilities"
local table_length_module = "Module:table/length"
local table_shallow_copy_module = "Module:table/shallowCopy"
local table_sorted_pairs_module = "Module:table/sortedPairs"
local title_is_title_module = "Module:title/isTitle"
local title_make_title_module = "Module:title/makeTitle"
local title_new_title_module = "Module:title/newTitle"
local title_redirect_target_module = "Module:title/redirectTarget"
local require = require
local require = require
local m_parser = require("Module:parser")
 
local m_str_utils = require("Module:string utilities")
local m_parser = require(parser_module)
local mw = mw
local mw = mw
local mw_title = mw.title
local mw_title = mw.title
local mw_uri = mw.uri
local string = string
local string = string
local table = table
local table = table


local anchor_encode = mw_uri.anchorEncode
local build_template -- defined as export.buildTemplate below
local class_else_type = m_parser.class_else_type
local concat = table.concat
local concat = table.concat
local decode_entities = m_str_utils.decode_entities
local encode_uri = mw_uri.encode
local find = string.find
local find = string.find
local format = string.format
local format = string.format
local gsub = string.gsub
local gsub = string.gsub
local html_create = mw.html.create
local insert = table.insert
local insert = table.insert
local is_node = m_parser.is_node
local is_node = m_parser.is_node
local lower = m_str_utils.lower
local lower = string.lower
local match = string.match
local match = string.match
local new_title = mw_title.new
local next = next
local next = next
local pairs = pairs
local parse -- defined as export.parse below
local parse -- defined as export.parse below
local parse_template_name -- defined as export.parseTemplateName below
local parse_template_name -- defined below
local pcall = pcall
local pcall = pcall
local php_trim = m_str_utils.php_trim
local rep = string.rep
local rep = string.rep
local reverse = string.reverse
local scribunto_param_key = m_str_utils.scribunto_param_key
local select = select
local select = select
local sub = string.sub
local sub = string.sub
Line 37: Line 59:
local tostring = m_parser.tostring
local tostring = m_parser.tostring
local type = type
local type = type
local type_or_class = m_parser.type_or_class
local umatch = mw.ustring.match
local umatch = mw.ustring.match
local upper = m_str_utils.upper


local data = mw.loadData("Module:template parser/data")
--[==[
local frame = mw.getCurrentFrame()
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local invalid_tag_attribute_name_char = data.invalid_tag_attribute_name_char
local function decode_entities(...)
decode_entities = require(string_utilities_module).decode_entities
return decode_entities(...)
end
 
local function encode_entities(...)
encode_entities = require(string_utilities_module).encode_entities
return encode_entities(...)
end
 
local function get_link_target(...)
get_link_target = require(pages_module).get_link_target
return get_link_target(...)
end
 
local function is_title(...)
is_title = require(title_is_title_module)
return is_title(...)
end
 
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end
 
local function make_title(...)
make_title = require(title_make_title_module)
return make_title(...)
end
 
local function new_title(...)
new_title = require(title_new_title_module)
return new_title(...)
end
 
local function pattern_escape(...)
pattern_escape = require(string_pattern_escape_module)
return pattern_escape(...)
end
 
local function php_htmlspecialchars(...)
php_htmlspecialchars = require(scribunto_module).php_htmlspecialchars
return php_htmlspecialchars(...)
end
 
local function php_ltrim(...)
php_ltrim = require(scribunto_module).php_ltrim
return php_ltrim(...)
end
 
local function php_trim(...)
php_trim = require(scribunto_module).php_trim
return php_trim(...)
end
 
local function redirect_target(...)
redirect_target = require(title_redirect_target_module)
return redirect_target(...)
end
 
local function replacement_escape(...)
replacement_escape = require(string_replacement_escape_module)
return replacement_escape(...)
end
 
local function scribunto_parameter_key(...)
scribunto_parameter_key = require(scribunto_module).scribunto_parameter_key
return scribunto_parameter_key(...)
end
 
local function shallow_copy(...)
shallow_copy = require(table_shallow_copy_module)
return shallow_copy(...)
end
 
local function sorted_pairs(...)
sorted_pairs = require(table_sorted_pairs_module)
return sorted_pairs(...)
end


local Parser, Node = m_parser.new()
local function split(...)
split = require(string_utilities_module).split
return split(...)
end


local function preprocess(text, args)
local function table_len(...)
return is_node(text) and text:preprocess(args) or text
table_len = require(table_length_module)
return table_len(...)
end
end


local export = {}
local function uupper(...)
uupper = require(string_utilities_module).upper
return uupper(...)
end
 
--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local data
local function get_data()
data, get_data = load_data(data_module), nil
return data
end
 
local frame
local function get_frame()
frame, get_frame = mw.getCurrentFrame(), nil
return frame
end
 
local magic_words
local function get_magic_words()
magic_words, get_magic_words = load_data(magic_words_data_module), nil
return magic_words
end
 
local parser_extension_tags
local function get_parser_extension_tags()
parser_extension_tags, get_parser_extension_tags = load_data(parser_extension_tags_data_module), nil
return parser_extension_tags
end


------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
Line 59: Line 190:
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------


function Node:preprocess(args)
local Node = m_parser.node()
local new_node = Node.new
 
local function expand(obj, frame_args)
return is_node(obj) and obj:expand(frame_args) or obj
end
export.expand = expand
 
function Node:expand(frame_args)
local output = {}
local output = {}
for i = 1, #self do
for i = 1, #self do
output[i] = preprocess(self[i], args)
output[i] = expand(self[i], frame_args)
end
end
return concat(output)
return concat(output)
Line 69: Line 208:
local Wikitext = Node:new_class("wikitext")
local Wikitext = Node:new_class("wikitext")


-- force_node ensures the output will always be a node.
-- force_node ensures the output will always be a Wikitext node.
function Wikitext:new(this, force_node)
function Wikitext:new(this, force_node)
if type(this) ~= "table" then
if type(this) ~= "table" then
return force_node and Node.new(self, {this}) or this
return force_node and new_node(self, {this}) or this
elseif #this == 1 then
elseif #this == 1 then
local this1 = this[1]
local this1 = this[1]
return force_node and not is_node(this1) and Node.new(self, this) or this1
return force_node and class_else_type(this1) ~= "wikitext" and new_node(self, this) or this1
end
end
local success, str = pcall(concat, this)
local success, str = pcall(concat, this)
if success then
if success then
return force_node and Node.new(self, {str}) or str
return force_node and new_node(self, {str}) or str
end
end
return Node.new(self, this)
return new_node(self, this)
end
end


-- First value is the argument name.
-- First value is the parameter name.
-- Second value is the argument's default value.
-- Second value is the parameter's default value.
-- Any additional values are ignored: "{{{a|b|c}}}" is argument "a" with default value "b" (*not* "b|c").
-- Any additional values are ignored: e.g. "{{{a|b|c}}}" is parameter "a" with default value "b" (*not* "b|c").
local Argument = Node:new_class("argument")
local Parameter = Node:new_class("parameter")


function Argument:new(this)
function Parameter:new(this)
local this2 = this[2]
local this2 = this[2]
if type_or_class(this2) == "parameter" then
if class_else_type(this2) == "argument" then
insert(this2, 2, "=")
insert(this2, 2, "=")
this2 = Wikitext:new(this2)
this2 = Wikitext:new(this2)
end
end
return Node.new(self, {this[1], this2})
if this[3] == nil then
this[2] = this2
else
this = {this[1], this2}
end
return new_node(self, this)
end
end


function Argument:__tostring()
function Parameter:__tostring()
local output = {}
local output = {}
for i = 1, #self do
for i = 1, #self do
Line 106: Line 250:
end
end


function Argument:next(i)
function Parameter:get_name(frame_args)
i = i + 1
return scribunto_parameter_key(expand(self[1], frame_args))
if i <= 2 then
end
return self[i], i
 
function Parameter:get_default(frame_args)
local default = self[2]
if default ~= nil then
return expand(default, frame_args)
end
end
return "{{{" .. expand(self[1], frame_args) .. "}}}"
end
end


function Argument:get_name(args)
function Parameter:expand(frame_args)
return scribunto_param_key(preprocess(self[1], args))
if frame_args == nil then
return self:get_default()
end
local name = expand(self[1], frame_args)
local val = frame_args[scribunto_parameter_key(name)] -- Parameter in use.
if val ~= nil then
return val
end
val = self[2] -- Default.
if val ~= nil then
return expand(val, frame_args)
end
return "{{{" .. name .. "}}}"
end
end


function Argument:get_default(args)
local Argument = Node:new_class("argument")
return tostring(self[2]) or "{{{" .. tostring(self[1]) .. "}}}"
end


function Argument:preprocess(args)
function Argument:new(this)
if not args then
local key = this._parse_data.key
return preprocess(self[2], args) or
this = Wikitext:new(this)
"{{{" .. preprocess(self[1], args) .. "}}}"
if key == nil then
return this
end
end
local name = preprocess(self[1], args)
return new_node(self, {Wikitext:new(key), this})
return args[php_trim(name)] or
preprocess(self[2], args) or
"{{{" .. name .. "}}}"
end
end


local Parameter = Node:new_class("parameter")
function Argument:__tostring()
return tostring(self[1]) .. "=" .. tostring(self[2])
end


function Parameter:__tostring()
function Argument:expand(frame_args)
return tostring(self[1]) .. "=" .. tostring(self[2])
return expand(self[1], frame_args) .. "=" .. expand(self[2], frame_args)
end
end


Line 146: Line 305:
end
end
return "{{" .. concat(output, "|") .. "}}"
return "{{" .. concat(output, "|") .. "}}"
end
function Template:get_params(args)
local params, implicit = {}, 0
for i = 2, #self do
local param = self[i]
if type_or_class(param) == "parameter" then
params[scribunto_param_key(preprocess(param[1], args))] = php_trim(tostring(param[2]))
else
implicit = implicit + 1
params[implicit] = tostring(param) -- Not trimmed.
end
end
return params
end
end


Line 166: Line 311:
-- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}).
-- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}).
do
do
local memo = {}
local templates, parser_variables, parser_functions = {}, {}, {}
local function retrieve_magic_word_data(chunk)
local mgw_data = (magic_words or get_magic_words())[chunk]
if mgw_data then
return mgw_data
end
local normalized = uupper(chunk)
mgw_data = magic_words[normalized]
if mgw_data and not mgw_data.case_sensitive then
return mgw_data
end
end
local function get_array_params(self)
-- Returns the name required to transclude the title object `title` using
local params = {}
-- template {{ }} syntax. If the `shortcut` flag is set, then any calls
for i = 2, #self do
-- which require a namespace prefix will use the abbreviated form where one
params[i - 1] = tostring(self[i])
-- exists (e.g. "Template:PAGENAME" becomes "T:PAGENAME").
local function get_template_invocation_name(title, shortcut)
if not (is_title(title) and not title.isExternal) then
error("Template invocations require a valid page title, which cannot contain an interwiki prefix.")
end
local namespace = title.namespace
-- If not in the template namespace, include the prefix (or ":" if
-- mainspace).
if namespace ~= 10 then
return get_link_target(title, shortcut)
end
-- If in the template namespace and it shares a name with a magic word,
-- it needs the prefix "Template:".
local text, fragment = title.text, title.fragment
if fragment and fragment ~= "" then
text = text .. "#" .. fragment
end
local colon = find(text, ":", nil, true)
if not colon then
local mgw_data = retrieve_magic_word_data(text)
return mgw_data and mgw_data.parser_variable and get_link_target(title, shortcut) or text
end
local mgw_data = retrieve_magic_word_data(sub(text, 1, colon - 1))
if mgw_data and (mgw_data.parser_function or mgw_data.transclusion_modifier) then
return get_link_target(title, shortcut)
end
end
return params
-- Also if "Template:" is necessary for disambiguation (e.g.
-- "Template:Category:Foo" can't be called with "Category:Foo").
local check = new_title(text, namespace)
return check and title_equals(title, check) and text or get_link_target(title, shortcut)
end
end
export.getTemplateInvocationName = get_template_invocation_name
local function convert_to_parser_function(self, name, arg1)
function parse_template_name(name, has_args, fragment, force_transclusion)
insert(self, 2, arg1)
local chunks, colon, start, n, p = {}, find(name, ":", nil, true), 1, 0, 0
self.get_params = get_array_params
while colon do
return name
local mgw_data = retrieve_magic_word_data(php_ltrim(sub(name, start, colon - 1)))
if not mgw_data then
break
end
local priority = mgw_data.priority
if not (priority and priority > p) then
local pf = mgw_data.parser_function and mgw_data.name or nil
if pf then
n = n + 1
chunks[n] = pf .. ":"
return chunks, "parser function", sub(name, colon + 1)
end
break
end
n = n + 1
chunks[n] = mgw_data.name .. ":"
start, p = colon + 1, priority
colon = find(name, ":", start, true)
end
if start > 1 then
name = sub(name, start)
end
name = php_trim(name)
-- Parser variables can only take SUBST:/SAFESUBST: as modifiers.
if not has_args and p <= 1 then
local mgw_data = retrieve_magic_word_data(name)
local pv = mgw_data and mgw_data.parser_variable and mgw_data.name or nil
if pv then
n = n + 1
chunks[n] = pv
return chunks, "parser variable"
end
end
-- Get the template title with the custom new_title() function in
-- [[Module:title/newTitle]], with `allowOnlyFragment` set to false
-- (e.g. "{{#foo}}" is invalid) and `allowRelative` set to true, for
-- relative links for namespaces with subpages (e.g. "{{/foo}}").
local title = new_title(name, 10, false, true)
if not (title and not title.isExternal) then
return nil
end
-- Resolve any redirects. If the redirect target is an interwiki link,
-- the template won't fail, but the redirect does not get resolved (i.e.
-- the redirect page itself gets transcluded, so the template name
-- should not be normalized to the target).
local redirect = redirect_target(title, force_transclusion)
if redirect and not redirect.isExternal then
title = redirect
end
-- If `fragment` is not true, unset it from the title object to prevent
-- it from being included by get_template_invocation_name.
if not fragment then
title.fragment = ""
end
chunks[n + 1] = get_template_invocation_name(title)
return chunks, "template"
end
end
function Template:get_name(args)
-- Note: force_transclusion avoids incrementing the expensive parser
local name = preprocess(self[1], args)
-- function count by forcing transclusion instead. This should only be used
local norm = memo[name]
-- when there is a real risk that the expensive parser function limit of
-- 500 will be hit.
local function process_name(self, frame_args, force_transclusion)
local name = expand(self[1], frame_args)
local has_args, norm = #self > 1
if not has_args then
norm = parser_variables[name]
if norm then
return norm, "parser variable"
end
end
norm = templates[name]
if norm then
if norm then
if type(norm) == "table" then
local pf_arg1 = parser_functions[name]
return convert_to_parser_function(self, norm[1], norm[2])
return norm, pf_arg1 and "parser function" or "template", pf_arg1
end
return norm
elseif norm == false then
elseif norm == false then
return
return nil
end
end
local chunks, pf_arg1 = parse_template_name(name, #self > 1)
local chunks, subclass, pf_arg1 = parse_template_name(name, has_args, nil, force_transclusion)
-- Fail if invalid.
-- Fail if invalid.
if not chunks then
if not chunks then
memo[name] = false
templates[name] = false
return
return nil
end
end
local chunk1 = chunks[1]
local chunk1 = chunks[1]
-- Fail on SUBST:.
-- Fail on SUBST:.
if chunk1 == "SUBST:" then
if chunk1 == "SUBST:" then
memo[name] = false
templates[name] = false
return
return nil
-- If pf_arg1 is returned, it's a parser function with pf_arg1 as the first argument.
-- Any modifiers are ignored.
-- Any modifiers are ignored.
elseif pf_arg1 then
elseif subclass == "parser function" then
local pf = chunks[#chunks]
local pf = chunks[#chunks]
memo[name] = {pf, pf_arg1}
templates[name] = pf
return convert_to_parser_function(self, pf, pf_arg1)
parser_functions[name] = pf_arg1
return pf, "parser function", pf_arg1
end
end
-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after).
-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after).
if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then
if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then
pf_arg1 = chunks[#chunks]
pf_arg1 = chunks[#chunks]
memo[name] = {"MSGNW:", pf_arg1}
local pf = "MSGNW:"
return convert_to_parser_function(self, "MSGNW:", pf_arg1)
templates[name] = pf
parser_functions[name] = pf_arg1
return pf, "parser function", pf_arg1
end
end
-- Ignore any remaining modifiers, as they've done their job.
-- Ignore any remaining modifiers, as they've done their job.
local output = chunks[#chunks]
local output = chunks[#chunks]
memo[name] = output
if subclass == "parser variable" then
return output
parser_variables[name] = output
else
templates[name] = output
end
return output, subclass
end
function Template:get_name(frame_args, force_transclusion)
-- Only return the first return value.
return (process_name(self, frame_args, force_transclusion))
end
function Template:get_arguments(frame_args)
local name, subclass, pf_arg1 = process_name(self, frame_args)
if name == nil then
return nil
elseif subclass == "parser variable" then
return {}
end
local template_args = {}
if subclass == "parser function" then
template_args[1] = pf_arg1
for i = 2, #self do
template_args[i] = expand(self[i], frame_args) -- Not trimmed.
end
return template_args
end
local implicit = 0
for i = 2, #self do
local arg = self[i]
if class_else_type(arg) == "argument" then
template_args[scribunto_parameter_key(expand(arg[1], frame_args))] = php_trim((expand(arg[2], frame_args)))
else
implicit = implicit + 1
template_args[implicit] = expand(arg, frame_args) -- Not trimmed.
end
end
return template_args
end
end
end


function Template:preprocess()
-- BIG TODO: manual template expansion.
return frame:preprocess(tostring(self))
function Template:expand(frame_args)
local name, subclass, pf_arg1 = process_name(self, frame_args)
if name == nil then
local output = {}
for i = 1, #self do
output[i] = expand(self[i], frame_args)
end
return "{{" .. concat(output, "|") .. "}}"
elseif subclass == "parser variable" then
return (frame or get_frame()):preprocess("{{" .. name .. "}}")
elseif subclass == "parser function" then
local f = frame or get_frame()
if frame_args ~= nil then
local success, new_f = pcall(f.newChild, f, {args = frame_args})
if success then
f = new_f
end
end
return f:preprocess(tostring(self))
end
local output = {}
for i = 1, #self do
output[i] = expand(self[i], frame_args)
end
return (frame or get_frame()):preprocess("{{" .. concat(output, "|") .. "}}")
end
end
end


Line 231: Line 543:


function Tag:__tostring()
function Tag:__tostring()
local open_tag, attributes, i = {"<", self.name}, self:get_attributes(), 2
local open_tag, attributes, n = {"<", self.name}, self:get_attributes(), 2
for attr, value in next, attributes do
for attr, value in next, attributes do
i = i + 1
n = n + 1
-- Quote value using "" by default, '' if it contains ", and leave unquoted if it contains both.
open_tag[n] = " " .. php_htmlspecialchars(attr) .. "=\"" .. php_htmlspecialchars(value, "compat") .. "\""
local quoter = not find(value, "\"", 1, true) and "\"" or
not find(value, "'", 1, true) and "'" or
match(value, "^()[^\t\n\f\r ]*$") and "" or
-- This shouldn't happen, unless the node has been edited manually. Not possible to stringify in a way that can be interpreted by the native parser, since it doesn't recognise escapes.
error("Tag attribute values cannot contain all three of \", ' and whitespace simultaneously.")
open_tag[i] = " " .. attr .. "=" .. quoter .. value .. quoter
end
end
if self.self_closing then
if self.self_closing then
Line 248: Line 554:
end
end


function Tag:get_attributes()
do
local raw = self.attributes
local valid_attribute_name
if not raw then
local function get_valid_attribute_name()
self.attributes = {}
valid_attribute_name, get_valid_attribute_name = (data or get_data()).valid_attribute_name, nil
return self.attributes
return valid_attribute_name
elseif type(raw) == "table" then
return raw
end
end
if sub(raw, -1) == "/" then
raw = sub(raw, 1, -2)
function Tag:get_attributes()
end
local raw = self.attributes
local attributes, head = {}, 1
if not raw then
-- Semi-manual implementation of the native regex.
self.attributes = {}
while true do
return self.attributes
local name, loc = match(raw, "([^\t\n\f\r />][^\t\n\f\r /=>]*)()", head)
elseif type(raw) == "table" then
if not name then
return raw
break
end
if sub(raw, -1) == "/" then
raw = sub(raw, 1, -2)
end
end
head = loc
local attributes, head = {}, 1
local value
-- Semi-manual implementation of the native regex.
loc = match(raw, "^[\t\n\f\r ]*=[\t\n\f\r ]*()", head)
while true do
if loc then
local name, loc = match(raw, "([^\t\n\f\r />][^\t\n\f\r /=>]*)()", head)
if not name then
break
end
head = loc
head = loc
value = match(raw, "^%b\"\"", head) or match(raw, "^%b''", head)
local value
if value then
loc = match(raw, "^[\t\n\f\r ]*=[\t\n\f\r ]*()", head)
head = head + #value
if loc then
value = sub(value, 2, -2)
head = loc
else
-- Either "", '' or the value ends on a space/at the end. Missing
local raw_value
-- end quotes are repaired by closing the value at the end.
raw_value, value = match(raw, "^([\"']?([^\t\n\f\r ]*))", head)
value, loc = match(raw, "^\"([^\"]*)\"?()", head)
head = head + #raw_value
if not value then
value, loc = match(raw, "^'([^']*)'?()", head)
if not value then
value, loc = match(raw, "^([^\t\n\f\r ]*)()", head)
end
end
head = loc
end
-- valid_attribute_name is a pattern matching a valid attribute name.
-- Defined in the data due to its length - see there for more info.
if umatch(name, valid_attribute_name or get_valid_attribute_name()) then
-- Sanitizer applies PHP strtolower (ASCII-only).
attributes[lower(name)] = value and decode_entities(
php_trim((gsub(value, "[\t\n\r ]+", " ")))
) or ""
end
end
end
end
if not (
self.attributes = attributes
match(name, "^[%-.]") or
return attributes
umatch(name, invalid_tag_attribute_name_char)
) then
attributes[lower(name)] = value and decode_entities(
php_trim(gsub(value, "[\t\n\r ]+", " "))
) or ""
end
end
end
self.attributes = attributes
return attributes
end
end


function Tag:preprocess()
function Tag:expand()
return frame:preprocess(tostring(self))
return (frame or get_frame()):preprocess(tostring(self))
end
end


Line 304: Line 619:
local success, str = pcall(concat, this)
local success, str = pcall(concat, this)
if success then
if success then
return Node.new(self, {
return new_node(self, {
str,
str,
level = this.level,
level = this.level,
section = this.section,
section = this.section,
pos = this.pos
index = this.index
})
})
end
end
end
end
return Node.new(self, this)
return new_node(self, this)
end
end


function Heading:__tostring()
do
local eq = rep("=", self.level)
local node_tostring = Node.__tostring
return eq .. Node.__tostring(self) .. eq
 
function Heading:__tostring()
local eq = rep("=", self.level)
return eq .. node_tostring(self) .. eq
end
end
end


function Heading:get_name(args)
do
return php_trim(Node.preprocess(self, args))
local expand_node = Node.expand
end


function Heading:preprocess(args)
-- Expanded heading names can contain "\n" (e.g. inside nowiki tags), which
local eq = rep("=", self.level)
-- causes any heading containing them to fail. However, in such cases, the
return eq .. Node.preprocess(self, args) .. eq
-- native parser still treats it as a heading for the purpose of section
-- numbers.
local function validate_name(self, frame_args)
local name = expand_node(self, frame_args)
if find(name, "\n", nil, true) then
return nil
end
return name
end
function Heading:get_name(frame_args)
local name = validate_name(self, frame_args)
return name ~= nil and php_trim(name) or nil
end
-- FIXME: account for anchor disambiguation.
function Heading:get_anchor(frame_args)
local name = validate_name(self, frame_args)
return name ~= nil and decode_entities(anchor_encode(name)) or nil
end
function Heading:expand(frame_args)
local eq = rep("=", self.level)
return eq .. expand_node(self, frame_args) .. eq
end
end
end


Line 335: Line 677:
------------------------------------------------------------------------------------
------------------------------------------------------------------------------------


function Parser:read(i, j)
local Parser = m_parser.string_parser()
local head, i = self.head, i or 0
return sub(self.text, head + i, head + (j or i))
end


function Parser:advance(n)
-- Template or parameter.
self.head = self.head + (n or self[-1].step or 1)
end


function Parser:consume(this)
-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Parameters {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces.
local layer = self[-1]
if not this then
local text, head = self.text, self.head
local loc1, loc2 = find(text, layer.pattern, head)
this = sub(text, head, loc1 and (loc1 == head and loc2 or loc1 - 1) or nil)
end
layer.step = #this
return layer.handler(self, this)
end


-- Template or argument.
-- For example, "{{{{foo}}}}" (4) is parsed as a parameter enclosed by single braces, and "{{{{{foo}}}}}" (5) is a parameter inside a template. However, "{{{{{foo }} }}}" is a template inside a parameter, due to "}}" forcing the closure of the inner node.
 
-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Arguments {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces.
 
-- For example, "{{{{foo}}}}" (4) is parsed as an argument enclosed by single braces, and "{{{{{foo}}}}}" (5) is an argument inside a template. However, "{{{{{foo }} }}}" is a template inside an argument, due to "}}" forcing the closure of the inner node.
do
do
-- Handlers.
-- Handlers.
local handle_name
local handle_name
local handle_parameter
local handle_argument
local handle_value
local function do_template_or_parameter(self, inner_node)
self:push_sublayer(handle_name)
self:set_pattern("[\n<[{|}]")
-- If a node has already been parsed, nest it at the start of the new
-- outer node (e.g. when parsing"{{{{foo}}bar}}", the template "{{foo}}"
-- is parsed first, since it's the innermost, and becomes the first
-- node of the outer template.
if inner_node then
self:emit(inner_node)
end
end
local function pipe(self)
self:emit(Wikitext:new(self:pop_sublayer()))
self:push_sublayer(handle_argument)
self:set_pattern("[\n<=[{|}]")
end
local function rbrace(self, this)
if self:read(1) == "}" then
self:emit(Wikitext:new(self:pop_sublayer()))
return self:pop()
end
self:emit(this)
end
function handle_name(self, ...)
function handle_name(self, ...)
handle_name = self:switch(handle_name, {
handle_name = self:switch(handle_name, {
Line 371: Line 722:
["["] = Parser.wikilink_block,
["["] = Parser.wikilink_block,
["{"] = Parser.braces,
["{"] = Parser.braces,
["|"] = pipe,
["|"] = function(self)
["}"] = rbrace,
self:emit(Wikitext:new(self:pop_sublayer()))
self:push_sublayer(handle_parameter)
self[-1].pattern = "[\n<=[{|}]"
end,
["}"] = function(self)
if self:read(1) == "}" then
self:emit(Wikitext:new(self:pop_sublayer()))
return self:pop()
end
self:emit("}")
end,
[""] = Parser.fail_route,
[""] = Parser.fail_route,
[false] = Parser.emit
[false] = Parser.emit
Line 391: Line 729:
return handle_name(self, ...)
return handle_name(self, ...)
end
end
 
function handle_parameter(self, ...)
function handle_argument(self, ...)
local function emit_parameter(self)
handle_argument = self:switch(handle_argument, {
local param = Wikitext:new(self:pop_sublayer())
["\n"] = function(self, this)
local layer = self[-1]
return self:heading_block(this, "==")
local key = layer.key
if key then
param = Parameter:new{key, param}
layer.key = nil
end
self:emit(param)
end
handle_parameter = self:switch(handle_parameter, {
["\n"] = function(self)
if self[-1].key then
return self:heading_block()
end
self:newline()
while self:read(0, 2) == "\n==" do
self:advance()
self:emit(select(2, self:get("do_heading_block")))
end
end,
end,
 
["<"] = Parser.tag,
["<"] = Parser.tag,
 
["="] = function(self)
["="] = function(self)
local key = Wikitext:new(self:pop_sublayer())
local key = self:pop_sublayer()
self[-1].key = key
self:push_sublayer(handle_value)
self:push_sublayer(handle_parameter)
self:set_pattern("[\n<[{|}]")
self[-1].pattern = "[\n<[{|}]"
self.current_layer._parse_data.key = key
end,
end,
 
["["] = Parser.wikilink_block,
["{"] = Parser.braces,
["|"] = pipe,
["}"] = rbrace,
[""] = Parser.fail_route,
[false] = Parser.emit
})
return handle_argument(self, ...)
end
 
function handle_value(self, ...)
handle_value = self:switch(handle_value, {
["\n"] = Parser.heading_block,
["<"] = Parser.tag,
["["] = Parser.wikilink_block,
["["] = Parser.wikilink_block,
["{"] = Parser.braces,
["{"] = Parser.braces,
 
["|"] = function(self)
["|"] = function(self)
emit_parameter(self)
self:emit(Argument:new(self:pop_sublayer()))
self:push_sublayer(handle_parameter)
self:push_sublayer(handle_argument)
self[-1].pattern = "[\n<=[{|}]"
self:set_pattern("[\n<=[{|}]")
end,
end,
 
["}"] = function(self)
["}"] = function(self, this)
if self:read(1) == "}" then
if self:read(1) == "}" then
emit_parameter(self)
self:emit(Argument:new(self:pop_sublayer()))
return self:pop()
return self:pop()
end
end
self:emit("}")
self:emit(this)
end,
end,
 
[""] = Parser.fail_route,
[""] = Parser.fail_route,
[false] = Parser.emit
[false] = Parser.emit
})
})
return handle_parameter(self, ...)
return handle_value(self, ...)
end
end
function Parser:do_template_or_argument()
function Parser:template_or_parameter()
self:push_sublayer(handle_name)
local text, head, node_to_emit, failed = self.text, self.head
self[-1].pattern = "[\n<[{|}]"
end
function Parser:template_or_argument()
local text, head, node_to_emit = self.text, self.head
-- Comments/tags interrupt the brace count.
-- Comments/tags interrupt the brace count.
local braces = match(text, "^{+()", head) - head
local braces = match(text, "^{+()", head) - head
self:advance(braces)
self:advance(braces)
repeat
while true do
local success, node = self:get("do_template_or_argument")
local success, node = self:try(do_template_or_parameter, node_to_emit)
-- Fail means no "}}" or "}}}" was found, so emit any remaining
-- unmatched opening braces before any templates/parameters that
-- were found.
if not success then
if not success then
self:emit(rep("{", braces))
self:emit(rep("{", braces))
failed = true
break
break
elseif node_to_emit then
-- If there are 3+ opening and closing braces, it's a parameter.
-- Nest the already-parsed node at the start of the new node.
elseif braces >= 3 and self:read(2) == "}" then
local node1 = node[1]
node[1] = (
node1 == "" and node_to_emit or
Wikitext:new{node_to_emit, node1}
)
end
if self:read(2) == "}" and braces > 2 then
self:advance(3)
self:advance(3)
braces = braces - 3
braces = braces - 3
node = Argument:new(node)
node = Parameter:new(node)
-- Otherwise, it's a template.
else
else
self:advance(2)
self:advance(2)
Line 480: Line 807:
node = Template:new(node)
node = Template:new(node)
end
end
local pos = head + braces
local index = head + braces
node.pos = pos
node.index = index
node.raw = sub(text, pos, self.head - 1)
node.raw = sub(text, index, self.head - 1)
node_to_emit = node
node_to_emit = node
if braces == 1 then
-- Terminate once not enough braces remain for further matches.
if braces == 0 then
break
-- Emit any stray opening brace before any matched nodes.
elseif braces == 1 then
self:emit("{")
self:emit("{")
break
break
end
end
until braces == 0
end
if node_to_emit then
if node_to_emit then
self:emit(node_to_emit)
self:emit(node_to_emit)
end
end
return braces
return braces, failed
end
end
end
end
Line 498: Line 829:
-- Tag.
-- Tag.
do
do
local tags = data.tags
local end_tags
local function get_end_tags()
end_tags, get_end_tags = (data or get_data()).end_tags, nil
return end_tags
end
-- Handlers.
-- Handlers.
local handle_start
local handle_start
local handle_tag
local handle_tag
local function do_tag(self)
local layer = self.current_layer
layer._parse_data.handler, layer.index = handle_start, self.head
self:set_pattern("[%s/>]")
self:advance()
end
local function is_ignored_tag(self, this)
local function is_ignored_tag(self, this)
Line 516: Line 859:
return self:fail_route()
return self:fail_route()
end
end
self.head = loc
self:jump(loc)
self[-1].ignored = true
local tag = self:pop()
return self:pop()
tag.ignored = true
return tag
end
end
function handle_start(self, this)
function handle_start(self, this)
if this == "/" then
if this == "/" then
Line 535: Line 879:
return self:fail_route()
return self:fail_route()
end
end
-- Tags are only case-insensitive with ASCII characters.
local raw_name = this
this = lower(this)
this = lower(this)
if not tags[this] then
local end_tag_pattern = (end_tags or get_end_tags())[this]
if not end_tag_pattern then -- Validity check.
return self:fail_route()
return self:fail_route()
end
end
local layer = self[-1]
local layer = self.current_layer
local text, head = self.text, self.head + layer.step
local pdata = layer._parse_data
local text, head = self.text, self.head + pdata.step
if match(text, "^/[^>]", head) then
if match(text, "^/[^>]", head) then
return self:fail_route()
return self:fail_route()
elseif is_ignored_tag(self, this) then
elseif is_ignored_tag(self, this) then
return ignored_tag(self, text, head)
return ignored_tag(self, text, head)
-- If an onlyinclude tag is not ignored (and cannot be active since it
-- would have triggered special handling earlier), it must be plaintext.
elseif this == "onlyinclude" then
return self:fail_route()
elseif this == "noinclude" or this == "includeonly" then
elseif this == "noinclude" or this == "includeonly" then
layer.ignored = true -- Ignored block.
layer.ignored = true -- Ignored block.
layer.raw_name = raw_name
end
end
layer.name, layer.handler, layer.pattern = this, handle_tag, ">"
layer.name, pdata.handler, pdata.end_tag_pattern = this, handle_tag, end_tag_pattern
self:set_pattern(">")
end
end
Line 554: Line 908:
if this == "" then
if this == "" then
return self:fail_route()
return self:fail_route()
elseif this ~= ">" then
end
self[-1].attributes = this
local layer = self.current_layer
if this ~= ">" then
layer.attributes = this
return
return
elseif self:read(-1) == "/" then
elseif self:read(-1) == "/" then
self[-1].self_closing = true
layer.self_closing = true
return self:pop()
return self:pop()
end
end
local text, head, layer = self.text, self.head + 1, self[-1]
local text, head = self.text, self.head + 1
local loc1, loc2 = find(text, tags[layer.name], head)
local loc1, loc2 = find(text, layer._parse_data.end_tag_pattern, head)
if loc1 then
if loc1 then
if loc1 > head then
if loc1 > head then
self:emit(sub(text, head, loc1 - 1))
self:emit(sub(text, head, loc1 - 1))
end
end
self.head = loc2
self:jump(loc2)
return self:pop()
return self:pop()
-- noinclude and includeonly will tolerate having no closing tag, but
-- only if given in lowercase. This is due to a preprocessor bug, as
-- it uses a regex with the /i (case-insensitive) flag to check for
-- end tags, but a simple array lookup with lowercase tag names when
-- looking up which tags should tolerate no closing tag (exact match
-- only, so case-sensitive).
elseif layer.ignored then
elseif layer.ignored then
self.head = #self.text
local raw_name = layer.raw_name
return self:pop()
if raw_name == "noinclude" or raw_name == "includeonly" then
self:jump(#text)
return self:pop()
end
end
end
return self:fail_route()
return self:fail_route()
end
function Parser:do_tag()
local layer = self[-1]
layer.handler, layer.pattern = handle_start, "[%s/>]"
self:advance()
end
local function find_next_chunk(text, pattern, head)
return select(2, find(text, pattern, head, true)) or #text
end
end
Line 589: Line 944:
-- HTML comment.
-- HTML comment.
if self:read(1, 3) == "!--" then
if self:read(1, 3) == "!--" then
self.head = find_next_chunk(self.text, "-->", self.head + 4)
local text = self.text
-- onlyinclude closing tag (whitespace intolerant).
self:jump(select(2, find(text, "-->", self.head + 4, true)) or #text)
-- onlyinclude tags (which must be lowercase with no whitespace).
elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then
elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then
self.head = find_next_chunk(self.text, "<onlyinclude>", self.head + 14)
local text = self.text
self:jump(select(2, find(text, "<onlyinclude>", self.head + 14, true)) or #text)
else
else
local success, tag = self:get("do_tag")
local success, tag = self:try(do_tag)
if not success then
if not success then
self:emit("<")
self:emit("<")
Line 605: Line 962:


-- Heading.
-- Heading.
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, arguments and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be.
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, parameters and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be.
do
do
-- Handlers.
-- Handlers.
Line 611: Line 968:
local handle_body
local handle_body
local handle_possible_end
local handle_possible_end
local function do_heading(self)
local layer, head = self.current_layer, self.head
layer._parse_data.handler, layer.index = handle_start, head
self:set_pattern("[\t\n ]")
-- Comments/tags interrupt the equals count.
local eq = match(self.text, "^=+()", head) - head
layer.level = eq
self:advance(eq)
end
local function do_heading_possible_end(self)
self.current_layer._parse_data.handler = handle_possible_end
self:set_pattern("[\n<]")
end
function handle_start(self, ...)
function handle_start(self, ...)
-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
local function newline(self)
local function newline(self)
local layer = self[-1]
local layer = self.current_layer
local eq = layer.level
local eq = layer.level
if eq <= 2 then
if eq <= 2 then
Line 630: Line 1,002:
local function whitespace(self)
local function whitespace(self)
local success, possible_end = self:get("do_heading_possible_end")
local success, possible_end = self:try(do_heading_possible_end)
if success then
if success then
self:emit(Wikitext:new(possible_end))
self:emit(Wikitext:new(possible_end))
local layer = self[-1]
self.current_layer._parse_data.handler = handle_body
layer.handler, layer.pattern = handle_body, "[\n<={]"
self:set_pattern("[\n<=[{]")
return self:consume()
return self:consume()
end
end
Line 648: Line 1,020:
[false] = function(self)
[false] = function(self)
-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
local layer = self[-1]
local layer = self.current_layer
local eq = layer.level
local eq = layer.level
if eq > 6 then
if eq > 6 then
Line 654: Line 1,026:
layer.level = 6
layer.level = 6
end
end
layer.handler, layer.pattern = handle_body, "[\n<=[{]"
layer._parse_data.handler = handle_body
self:set_pattern("[\n<=[{]")
return self:consume()
return self:consume()
end
end
Line 671: Line 1,044:
local eq_len = #eq
local eq_len = #eq
self:advance(eq_len)
self:advance(eq_len)
local success, possible_end = self:get("do_heading_possible_end")
local success, possible_end = self:try(do_heading_possible_end)
if success then
if success then
self:emit(eq)
self:emit(eq)
Line 677: Line 1,050:
return self:consume()
return self:consume()
end
end
local layer = self[-1]
local layer = self.current_layer
local level = layer.level
local level = layer.level
if eq_len > level then
if eq_len > level then
Line 689: Line 1,062:
["["] = Parser.wikilink_block,
["["] = Parser.wikilink_block,
["{"] = Parser.braces,
["{"] = function(self, this)
return self:braces(this, true)
end,
[""] = Parser.fail_route,
[""] = Parser.fail_route,
[false] = Parser.emit
[false] = Parser.emit
Line 701: Line 1,078:
["<"] = function(self)
["<"] = function(self)
local head = (
if self:read(1, 3) ~= "!--" then
self:read(1, 3) == "!--" and
return self:pop()
select(2, find(self.text, "-->", self.head + 4, true))
end
)
local head = select(2, find(self.text, "-->", self.head + 4, true))
if not head then
if not head then
return self:pop()
return self:pop()
end
end
self.head = head
self:jump(head)
end,
end,
Line 714: Line 1,091:
[false] = function(self, this)
[false] = function(self, this)
if not match(this, "^[\t ]+$") then
if not match(this, "^[\t ]+()$") then
return self:pop()
return self:pop()
end
end
Line 721: Line 1,098:
})
})
return handle_possible_end(self, ...)
return handle_possible_end(self, ...)
end
function Parser:do_heading()
local layer, head = self[-1], self.head
layer.handler, layer.pattern, layer.pos = handle_start, "[\t\n ]", head
-- Comments/tags interrupt the equals count.
local eq = match(self.text, "^=+()", head) - head
layer.level = eq
self:advance(eq)
end
function Parser:do_heading_possible_end()
local layer = self[-1]
layer.handler, layer.pattern = handle_possible_end, "[\n<]"
end
end
function Parser:heading()
function Parser:heading()
local success, heading = self:get("do_heading")
local success, heading = self:try(do_heading)
if success then
if success then
local section = self.section + 1
local section = self.section + 1
Line 759: Line 1,122:
-- Block handlers.
-- Block handlers.


-- These are blocks which can affect template/argument parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).
-- These are blocks which can affect template/parameter parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).


-- All blocks (including templates/arguments) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| [[ }}" will result in an unprocessed template, since the inner "[[" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "{{template| [[ ]] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| [[ }} ]] }}" would also work, even though "[[ }} ]]" is not a valid wikilink.
-- All blocks (including templates/parameters) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| [[ }}" will result in an unprocessed template, since the inner "[[" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "{{template| [[ ]] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| [[ }} ]] }}" would also work, even though "[[ }} ]]" is not a valid wikilink.


-- Note that nesting also affects pipes and equals signs, in addition to block closures.
-- Note that nesting also affects pipes and equals signs, in addition to block closures.
Line 770: Line 1,133:


-- Language conversion block.
-- Language conversion block.
-- Opens with "-{" and closes with "}-". However, templates/arguments take priority, so "-{{" is parsed as "-" followed by the opening of a template/argument block (depending on what comes after).
-- Opens with "-{" and closes with "}-". However, templates/parameters take priority, so "-{{" is parsed as "-" followed by the opening of a template/parameter block (depending on what comes after).
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block.
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block.
do
do
local function handle_language_conversion_block(self, ...)
--Handler.
local handle_language_conversion_block
local function do_language_conversion_block(self)
self.current_layer._parse_data.handler = handle_language_conversion_block
self:set_pattern("[\n<[{}]")
end
function handle_language_conversion_block(self, ...)
handle_language_conversion_block = self:switch(handle_language_conversion_block, {
handle_language_conversion_block = self:switch(handle_language_conversion_block, {
["\n"] = Parser.heading_block,
["\n"] = Parser.heading_block,
Line 780: Line 1,151:
["{"] = Parser.braces,
["{"] = Parser.braces,
["}"] = function(self)
["}"] = function(self, this)
if self:read(1) == "-" then
if self:read(1) == "-" then
self:emit("}-")
self:emit("}-")
Line 786: Line 1,157:
return self:pop()
return self:pop()
end
end
self:emit("}")
self:emit(this)
end,
end,
Line 795: Line 1,166:
end
end
function Parser:do_language_conversion_block()
function Parser:braces(this, fail_on_unclosed_braces)
local layer = self[-1]
layer.handler, layer.pattern = handle_language_conversion_block, "[\n<[{}]"
end
function Parser:braces()
local language_conversion_block = self:read(-1) == "-"
local language_conversion_block = self:read(-1) == "-"
if self:read(1) == "{" then
if self:read(1) == "{" then
local braces = self:template_or_argument()
local braces, failed = self:template_or_parameter()
if not (braces == 1 and language_conversion_block) then
-- Headings will fail if they contain an unclosed brace block.
if failed and fail_on_unclosed_braces then
return self:fail_route()
-- Language conversion blocks cannot begin "-{{", but can begin
-- "-{{{" iff parsed as "-{" + "{{".
elseif not (language_conversion_block and braces == 1) then
return self:consume()
return self:consume()
end
end
else
else
self:emit("{")
self:emit(this)
if not language_conversion_block then
if not language_conversion_block then
return
return
Line 814: Line 1,185:
self:advance()
self:advance()
end
end
self:emit(Wikitext:new(select(2, self:get("do_language_conversion_block"))))
self:emit(Wikitext:new(self:get(do_language_conversion_block)))
end
end
end
end
Line 825: Line 1,196:
Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings).
Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings).


Note 2: if directly inside a template parameter with no previous equals signs, a newline followed by a single equals sign is parsed as a parameter equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|parameter\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|parameter\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block.
Note 2: if directly inside a template argument with no previous equals signs, a newline followed by a single equals sign is parsed as an argument equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|key\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|key\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block.
]==]
]==]
do
do
local function handle_heading_block(self, ...)
--Handler.
local handle_heading_block
local function do_heading_block(self)
self.current_layer._parse_data.handler = handle_heading_block
self:set_pattern("[\n<[{]")
end
function handle_heading_block(self, ...)
handle_heading_block = self:switch(handle_heading_block, {
handle_heading_block = self:switch(handle_heading_block, {
["\n"] = function(self)
["\n"] = function(self)
Line 844: Line 1,223:
end
end
function Parser:do_heading_block()
function Parser:heading_block(this, nxt)
local layer = self[-1]
layer.handler, layer.pattern = handle_heading_block, "[\n<[{]"
end
function Parser:heading_block()
self:newline()
self:newline()
while self:read(0, 1) == "\n=" do
this = this .. (nxt or "=")
local loc = #this - 1
while self:read(0, loc) == this do
self:advance()
self:advance()
self:emit(Wikitext:new(select(2, self:get("do_heading_block"))))
self:emit(Wikitext:new(self:get(do_heading_block)))
end
end
end
end
Line 861: Line 1,237:
-- Opens with "[[" and closes with "]]".
-- Opens with "[[" and closes with "]]".
do
do
local function handle_wikilink_block(self, ...)
-- Handler.
local handle_wikilink_block
local function do_wikilink_block(self)
self.current_layer._parse_data.handler = handle_wikilink_block
self:set_pattern("[\n<[%]{]")
end
function handle_wikilink_block(self, ...)
handle_wikilink_block = self:switch(handle_wikilink_block, {
handle_wikilink_block = self:switch(handle_wikilink_block, {
["\n"] = Parser.heading_block,
["\n"] = Parser.heading_block,
Line 867: Line 1,251:
["["] = Parser.wikilink_block,
["["] = Parser.wikilink_block,
["]"] = function(self)
["]"] = function(self, this)
if self:read(1) == "]" then
if self:read(1) == "]" then
self:emit("]]")
self:emit("]]")
Line 873: Line 1,257:
return self:pop()
return self:pop()
end
end
self:emit("]")
self:emit(this)
end,
end,
Line 881: Line 1,265:
})
})
return handle_wikilink_block(self, ...)
return handle_wikilink_block(self, ...)
end
function Parser:do_wikilink_block()
local layer = self[-1]
layer.handler, layer.pattern = handle_wikilink_block, "[\n<[%]{]"
end
end
Line 892: Line 1,271:
self:emit("[[")
self:emit("[[")
self:advance(2)
self:advance(2)
self:emit(Wikitext:new(select(2, self:get("do_wikilink_block"))))
self:emit(Wikitext:new(self:get(do_wikilink_block)))
else
else
self:emit("[")
self:emit("[")
Line 920: Line 1,299:
break
break
end
end
self.head = head
self:jump(head)
end
end
self:emit("\n")
self:emit("\n")
Line 929: Line 1,308:
local handle_start
local handle_start
local main_handler
local main_handler
-- If `transcluded` is true, then the text is checked for a pair of
-- onlyinclude tags. If these are found (even if they're in the wrong
-- order), then the start of the page is treated as though it is preceded
-- by a closing onlyinclude tag.
-- Note 1: unlike other parser extension tags, onlyinclude tags are case-
-- sensitive and cannot contain whitespace.
-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the
-- text, but the hard requirement above means this can only happen if
-- either the tags are in the wrong order or there are multiple onlyinclude
-- blocks.
local function do_parse(self, transcluded)
self.current_layer._parse_data.handler = handle_start
self:set_pattern(".")
self.section = 0
if not transcluded then
return
end
self.transcluded = true
local text = self.text
if find(text, "</onlyinclude>", nil, true) then
local head = find(text, "<onlyinclude>", nil, true)
if head then
self.onlyinclude = true
self:jump(head + 13)
end
end
end
-- If the first character is "=", try parsing it as a heading.
-- If the first character is "=", try parsing it as a heading.
function handle_start(self, this)
function handle_start(self, this)
local layer = self[-1]
self.current_layer._parse_data.handler = main_handler
layer.handler, layer.pattern = main_handler, "[\n<{]"
self:set_pattern("[\n<{]")
if this == "=" then
if this == "=" then
return self:heading()
return self:heading()
Line 952: Line 1,359:
["<"] = Parser.tag,
["<"] = Parser.tag,
["{"] = function(self)
["{"] = function(self, this)
if self:read(1) == "{" then
if self:read(1) == "{" then
self:template_or_argument()
self:template_or_parameter()
return self:consume()
return self:consume()
end
end
self:emit("{")
self:emit(this)
end,
end,
Line 964: Line 1,371:
})
})
return main_handler(self, ...)
return main_handler(self, ...)
end
-- If `transcluded` is true, then the text is checked for a pair of
-- onlyinclude tags. If these are found (even if they're in the wrong
-- order), then the start of the page is treated as though it is preceded
-- by a closing onlyinclude tag.
-- Note 1: unlike other parser extension tags, onlyinclude tags are case-
-- sensitive and cannot contain whitespace.
-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the
-- text, but the hard requirement above means this can only happen if
-- either the tags are in the wrong order or there are multiple onlyinclude
-- blocks.
function Parser:do_parse(transcluded)
local layer = self[-1]
layer.handler, layer.pattern = handle_start, "."
self.section = 0
if not transcluded then
return
end
self.transcluded = true
local text = self.text
if find(text, "</onlyinclude>", 1, true) then
local head = find(text, "<onlyinclude>", 1, true)
if head then
self.onlyinclude = true
self.head = head + 13
end
end
end
end
Line 998: Line 1,377:
return (select(2, Parser:parse{
return (select(2, Parser:parse{
text = text_type == "string" and text or
text = text_type == "string" and text or
text_type == "number" and format("%.14g", text) or
text_type == "number" and tostring(text) or
error("bad argument #1 (string expected, got " .. text_type .. ")"),
error("bad argument #1 (string expected, got " .. text_type .. ")"),
node = {Wikitext, true},
node = {Wikitext, true},
route = {"do_parse", transcluded}
route = {do_parse, transcluded}
}))
}))
end
end
parse = export.parse
parse = export.parse
end
function export.find_templates(text, not_transcluded)
return parse(text, not not_transcluded):iterate_nodes("template")
end
end


do
do
local page_title = mw.title.getCurrentTitle()
local link_parameter_1, link_parameter_2
local namespace_has_subpages = mw.site.namespaces[page_title.namespace].hasSubpages
local raw_pagename = page_title.fullText
local parser_functions_s = data.parser_functions_case_sensitive
local function get_link_parameter_1()
local parser_functions_i = data.parser_functions_case_insensitive
link_parameter_1, get_link_parameter_1 = (data or get_data()).template_link_param_1, nil
local parser_variables_s = data.parser_variables_case_sensitive
return link_parameter_1
local parser_variables_i = data.parser_variables_case_insensitive
end
local transclusion_modifiers = data.transclusion_modifiers
-- Mainspace titles starting with "#" should be invalid, but a bug in
local function get_link_parameter_2()
-- mw.title.new means a title object is returned that has the empty string
link_parameter_2, get_link_parameter_2 = (data or get_data()).template_link_param_2, nil
-- for prefixedText, so we need to filter them out. Interwiki links aren't
return link_parameter_2
-- valid as templates, either.
local function is_valid_title(title)
return title and
#title.prefixedText > 0 and
#title.interwiki == 0
end
end
function export.parseTemplateName(name, has_args, fragment)
-- Generate a link. If the target title doesn't have a fragment, use "#top"
local chunks, colon, start, n, p = {}, find(name, ":", 1, true), 1, 0, 0
-- (which is an implicit anchor at the top of every page), as this ensures
while colon do
-- self-links still display as links, since bold display is distracting and
-- Pattern is a PHP left-trim.
-- unintuitive for template links.
local chunk = match(sub(name, start, colon), "[^%z\t-\v\r ].*") or ""
local function link_page(title, display)
local normalized = upper(chunk)
local fragment = title.fragment
local priority = transclusion_modifiers[normalized]
if fragment == "" then
if not (priority and priority > p) then
fragment = "top"
local pf = parser_functions_s[chunk] or parser_functions_i[normalized]
if pf then
n = n + 1
chunks[n] = normalized
return chunks, sub(name, colon + 1)
end
break
end
n = n + 1
chunks[n] = normalized
start, p = colon + 1, priority
colon = find(name, ":", start, true)
end
end
if start > 1 then
return format(
name = sub(name, start)
"[[:%s|%s]]",
encode_uri(title.prefixedText .. "#" .. fragment, "WIKI"),
display
)
end
-- pf_arg1 or pf_arg2 may need to be linked if a given parser function
-- treats them as a pagename. If a key exists in `namespace`, the value is
-- the namespace for the page: if not 0, then the namespace prefix will
-- always be added to the input (e.g. {{#invoke:}} can only target the
-- Module: namespace, so inputting "Template:foo" gives
-- "Module:Template:foo", and "Module:foo" gives "Module:Module:foo").
-- However, this isn't possible with mainspace (namespace 0), so prefixes
-- are respected. make_title() handles all of this automatically.
local function finalize_arg(pagename, namespace)
if namespace == nil then
return pagename
end
end
name = php_trim(name)
local title = make_title(namespace, pagename)
-- Parser variables can only take SUBST:/SAFESUBST: as modifiers.
return title and not title.isExternal and link_page(title, pagename) or pagename
if not has_args and p <= 1 then
end
local pv = parser_variables_s[name] or parser_variables_i[upper(name)]
if pv then
local function render_title(name, args)
n = n + 1
-- parse_template_name returns a table of transclusion modifiers plus
chunks[n] = pv
-- the normalized template/magic word name, which will be used as link
return chunks
-- targets. The third return value pf_arg1 is the first argument of a
end
-- a parser function, which comes after the colon (e.g. "foo" in
-- "{{#IF:foo|bar|baz}}"). This means args[1] (i.e. the first argument
-- that comes after a pipe is actually argument 2, and so on. Note: the
-- second parameter of parse_template_name checks if there are any
-- arguments, since parser variables cannot take arguments (e.g.
-- {{CURRENTYEAR}} is a parser variable, but {{CURRENTYEAR|foo}}
-- transcludes "Template:CURRENTYEAR"). In such cases, the returned
-- table explicitly includes the "Template:" prefix in the template
-- name. The third parameter instructs it to retain any fragment in the
-- template name in the returned table, if present.
local chunks, subclass, pf_arg1 = parse_template_name(
name,
args and pairs(args)(args) ~= nil,
true
)
if chunks == nil then
return name, args
end
end
-- Handle relative template names.
local chunks_len = #chunks
if namespace_has_subpages then
-- Additionally, generate the corresponding table `rawchunks`, which
-- If the name starts with "/", it's treated as a subpage of the
-- is a list of colon-separated chunks in the raw input. This is used
-- current page. Final slashes are trimmed, but this can't affect
-- to retrieve the display forms for each chunk.
-- the intervening slash (e.g. {{///}} refers to "{{PAGENAME}}/").
local rawchunks = split(name, ":")
if sub(name, 1, 1) == "/" then
for i = 1, chunks_len - 1 do
name = raw_pagename .. (match(name, "^/.*[^/]") or "/")
chunks[i] = format(
-- If it starts with "../", trim it and any that follow, and go up
"[[%s|%s]]",
-- that many subpage levels. Then, treat any additional text as
encode_uri((magic_words or get_magic_words())[sub(chunks[i], 1, -2)].transclusion_modifier, "WIKI"),
-- a subpage of that page; final slashes are trimmed.
rawchunks[i]
elseif match(name, "^()%.%./") then
)
local n = 4
while sub(name, n, n + 2) == "../" do
n = n + 3
end
-- Retain an initial "/".
name = sub(name, n - 1)
-- Trim the relevant number of subpages from the pagename.
local pagename, i = reverse(raw_pagename), 0
for _ = 1, (n - 1) / 3 do
i = find(pagename, "/", i + 1, true)
-- Fail if there aren't enough slashes.
if not i then
return nil
end
end
-- Add the subpage text; since the intervening "/" is retained
-- in `name`, it can be trimmed along with any other final
-- slashes (e.g. {{..///}} refers to "{{BASEPAGENAME}}".)
name = reverse(sub(pagename, i + 1)) .. (match(name, "^.*[^/]") or "")
end
end
end
local title = new_title(name, 10)
local chunk = chunks[chunks_len]
if not is_valid_title(title) then
-- If it's a template, return a link to it with link_page, concatenating
return nil
-- the remaining chunks in `rawchunks` to form the display text.
-- Use new_title with the default namespace 10 (Template:) to generate
-- a target title, which is the same setting used for retrieving
-- templates (including those in other namespaces, as prefixes override
-- the default).
if subclass == "template" then
chunks[chunks_len] = link_page(
new_title(chunk, 10),
concat(rawchunks, "&#58;", chunks_len) -- :
)
return concat(chunks, "&#58;"), args -- :
elseif subclass == "parser variable" then
chunks[chunks_len] = format(
"[[%s|%s]]",
encode_uri((magic_words or get_magic_words())[chunk].parser_variable, "WIKI"),
rawchunks[chunks_len]
)
return concat(chunks, "&#58;"), args -- :
end
end
-- If `fragment` is set, save the original title's fragment, since it
-- Otherwise, it must be a parser function.
-- won't carry through to any redirect targets.
local mgw_data = (magic_words or get_magic_words())[sub(chunk, 1, -2)]
if fragment then
local link = mgw_data.parser_function or mgw_data.transclusion_modifier
fragment = title.fragment
local pf_arg2 = args and args[1] or nil
-- Some magic words have different links, depending on whether argument
-- 2 is specified (e.g. "baz" in {{foo:bar|baz}}).
if type(link) == "table" then
link = pf_arg2 and link[2] or link[1]
end
end
-- Resolve any redirects. Unlike links, double redirects seem to work
chunks[chunks_len] = format(
-- (but not higher). If the redirect target is an interwiki link, then
"[[%s|%s]]",
-- the template won't fail, but the redirect page itself gets
encode_uri(link, "WIKI"),
-- transcluded (i.e. the template name shouldn't be normalized to the
rawchunks[chunks_len]
-- target).
)
for _ = 1, 2 do
-- #TAG: has special handling, because documentation links for parser
local redirect = title.redirectTarget
-- extension tags come from [[Module:data/parser extension tags]].
if not is_valid_title(redirect) then
if chunk == "#TAG:" then
break
-- Tags are only case-insensitive with ASCII characters.
end
local tag = (parser_extension_tags or get_parser_extension_tags())[lower(php_trim(pf_arg1))]
title = redirect
if tag then
end
pf_arg1 = format(
local namespace, chunk = title.namespace
"[[%s|%s]]",
-- Normalize name.
encode_uri(tag, "WIKI"),
if namespace == 10 then
pf_arg1
-- If in the template namespace and it shares a name with a magic
)
-- word, it needs the prefix "Template:".
local text = title.text
if parser_functions_s[text] or parser_variables_s[text] then
chunk = title.prefixedText
else
local normalized = upper(text)
if (
parser_functions_i[normalized] or
parser_variables_i[normalized] or
transclusion_modifiers[normalized]
) then
chunk = title.prefixedText
elseif not find(text, ":", 1, true) then
chunk = text
-- Also if the prefix is necessary for disambiguation (e.g.
-- "Template:Category:Foo" can't be abbreviated to
-- "Category:Foo").
else
local check = new_title(text, 10)
chunk = check and title_equals(title, check) and text or title.prefixedText
end
end
end
-- If not in the template namespace, include the prefix (or ":" if
-- Otherwise, finalize pf_arg1 and add it to `chunks`.
-- mainspace).
else
else
chunk = namespace == 0 and ":" .. title.text or title.prefixedText
pf_arg1 = finalize_arg(pf_arg1, (link_parameter_1 or get_link_parameter_1())[chunk])
end
end
-- Set the fragment (if applicable).
chunks[chunks_len + 1] = pf_arg1
if fragment then
-- Finalize pf_arg2 (if applicable), then return.
chunk = chunk .. "#" .. fragment
if pf_arg2 then
args = shallow_copy(args) -- Avoid destructively modifying args.
args[1] = finalize_arg(pf_arg2, (link_parameter_2 or get_link_parameter_2())[chunk])
end
end
chunks[n + 1] = chunk
return concat(chunks, "&#58;"), args -- :
return chunks
end
end
parse_template_name = export.parseTemplateName
end
function export.buildTemplate(title, args)
 
local output = {title}
function export.parseTemplate(text, not_transcluded)
if not args then
text = parse(text, not not_transcluded)
return output
if type_or_class(text) == "template" then
local name = text:get_name()
if name then
return name, text:get_params()
end
end
end
-- Iterate over all numbered parameters in order, followed by any
return nil, nil
-- remaining parameters in codepoint order. Implicit parameters are
end
-- used wherever possible, even if explicit numbers are interpolated
 
-- between them (e.g. 0 would go before any implicit parameters, and
do
-- 2.5 between 2 and 3).
local function next_template(iter)
-- TODO: handle "=" and "|" in params/values.
while true do repeat -- break acts like continue
local implicit
local node = iter()
for k, v in sorted_pairs(args) do
if not node then
if type(k) == "number" and k >= 1 and k % 1 == 0 then
return nil, nil, nil, nil
if implicit == nil then
elseif type_or_class(node) ~= "template" then
implicit = table_len(args)
break
end
insert(output, k <= implicit and v or k .. "=" .. v)
else
insert(output, k .. "=" .. v)
end
end
local name = node:get_name()
end
if name then
return output
return name, node:get_params(), node.raw, node.pos
end
until true end
end
end
build_template = export.buildTemplate
function export.findTemplates(text, not_transcluded)
function export.templateLink(title, args, no_link)
return next_template, parse(text, not not_transcluded):__pairs("next_node")
if not no_link then
title, args = render_title(title, args)
end
local output = build_template(title, args)
for i = 1, #output do
output[i] = encode_entities(output[i], "={}", true, true)
end
return tostring(html_create("code")
:css("white-space", "pre-wrap")
:wikitext("&#123;&#123;" .. concat(output, "&#124;") .. "&#125;&#125;") -- {{ | }}
)
end
end
end
end


do
do
local function next_argument(iter)
function export.find_parameters(text, not_transcluded)
while true do
return parse(text, not not_transcluded):iterate_nodes("parameter")
local node = iter()
if not node then
return nil, nil, nil, nil
elseif type_or_class(node) == "argument" then
local args = iter.args
return node:get_name(args), node:get_default(args), node.raw, node.pos
end
end
end
end
function export.findArguments(text, args, not_transcluded)
function export.displayParameter(name, default)
local iter = parse(text, not not_transcluded):__pairs("next_node")
return tostring(html_create("code")
iter.args = args
:css("white-space", "pre-wrap")
return next_argument, iter
:wikitext("&#123;&#123;&#123;" .. concat({name, default}, "&#124;") .. "&#125;&#125;&#125;") -- {{{ | }}}
)
end
end
end
end
Line 1,218: Line 1,593:
end
end
local function next_heading(iter)
-- FIXME: should headings which contain "\n" be returned? This may depend
while true do repeat -- break acts like continue
-- on variable factors, like template expansion. They iterate the heading
local node = iter()
-- count number, but fail on rendering. However, in some cases a different
if not node then
-- heading might still be rendered due to intermediate equals signs; it
return nil, nil, nil, nil
-- may even be of a different heading level: e.g., this is parsed as an
elseif type_or_class(node) ~= "heading" then
-- L2 heading with a newline (due to the wikilink block), but renders as the
break
-- L1 heading "=foo[[". Section edit links are sometimes (but not always)
end
-- present in such cases.
local level = node.level
if level < iter.i or level > iter.j then
break
end
local name = node:get_name()
if not find(name, "\n", 1, true) then
return name, level, node.section, node.pos
end
until true end
end
-- Note: heading names can contain "\n" (e.g. inside nowiki tags), which
-- ==[[=
-- causes any heading containing them to fail. When that happens, the
-- ]]==
-- heading is not returned by this function, but the heading count is still
-- iterated, since Parsoid's preprocessor still counts it as a heading for
-- the purpose of heading strip markers (i.e. the section number).
-- TODO: section numbers for edit links seem to also include headings
-- TODO: section numbers for edit links seem to also include headings
-- nested inside templates and arguments (but apparently not those in
-- nested inside templates and parameters (but apparently not those in
-- parser extension tags - need to test this more). If we ever want to add
-- parser extension tags - need to test this more). If we ever want to add
-- section edit links manually, this will need to be accounted for.
-- section edit links manually, this will need to be accounted for.
function export.findHeadings(text, i, j)
function export.find_headings(text, i, j)
local iter = parse(text):__pairs("next_node")
local parsed = parse(text)
iter.i, iter.j = i and check_level(i) or 1, j and check_level(j) or 6
if i == nil and j == nil then
return next_heading, iter
return parse(text):iterate_nodes("heading")
end
i = i and check_level(i) or 1
j = j and check_level(j) or 6
return parsed:iterate(function(v)
if class_else_type(v) == "heading" then
local level = v.level
return level >= i and level <= j
end
end)
end
end
 
do
local function make_tag(tag)
return tostring(html_create("code")
:css("white-space", "pre-wrap")
:wikitext("&lt;" .. tag .. "&gt;")
)
end
 
-- Note: invalid tags are returned without links.
function export.wikitagLink(tag)
-- ">" can't appear in tags (including attributes) since the parser
-- unconditionally treats ">" as the end of a tag.
if find(tag, ">", nil, true) then
return make_tag(tag)
end
-- Tags must start "<tagname..." or "</tagname...", with no whitespace
-- after "<" or "</".
local slash, tagname, remainder = match(tag, "^(/?)([^/%s]+)(.*)$")
if not tagname then
return make_tag(tag)
end
-- Tags are only case-insensitive with ASCII characters.
local link = lower(tagname)
if (
-- onlyinclude tags must be lowercase and are whitespace intolerant.
link == "onlyinclude" and (link ~= tagname or remainder ~= "") or
-- Closing wikitags (except onlyinclude) can only have whitespace
-- after the tag name.
slash == "/" and not match(remainder, "^%s*()$") or
-- Tagnames cannot be followed immediately by "/", unless it comes
-- at the end (e.g. "<nowiki/>", but not "<nowiki/ >").
remainder ~= "/" and sub(remainder, 1, 1) == "/"
) then
-- Output with no link.
return make_tag(tag)
end
-- Partial transclusion tags aren't in the table of parser extension
-- tags.
if link == "noinclude" or link == "includeonly" or link == "onlyinclude" then
link = "mw:Transclusion#Partial transclusion"
else
link = (parser_extension_tags or get_parser_extension_tags())[link]
end
if link then
tag = gsub(tag, pattern_escape(tagname), "[[" .. replacement_escape(encode_uri(link, "WIKI")) .. "|%0]]", 1)
end
return make_tag(tag)
end
end
end
end
-- For convenience.
export.class_else_type = class_else_type


return export
return export

Latest revision as of 17:47, 4 November 2025

Documentation for this module may be created at Module:template parser/doc

--[[
NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary.

Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by removing these, since you are almost guaranteed to slow things down, which could seriously impact performance on pages which call this module hundreds or thousands of times.

It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly.
]]
local export = {}

local data_module = "Module:template parser/data"
local load_module = "Module:load"
local magic_words_data_module = "Module:data/magic words"
local pages_module = "Module:pages"
local parser_extension_tags_data_module = "Module:data/parser extension tags"
local parser_module = "Module:parser"
local scribunto_module = "Module:Scribunto"
local string_pattern_escape_module = "Module:string/patternEscape"
local string_replacement_escape_module = "Module:string/replacementEscape"
local string_utilities_module = "Module:string utilities"
local table_length_module = "Module:table/length"
local table_shallow_copy_module = "Module:table/shallowCopy"
local table_sorted_pairs_module = "Module:table/sortedPairs"
local title_is_title_module = "Module:title/isTitle"
local title_make_title_module = "Module:title/makeTitle"
local title_new_title_module = "Module:title/newTitle"
local title_redirect_target_module = "Module:title/redirectTarget"

local require = require

local m_parser = require(parser_module)
local mw = mw
local mw_title = mw.title
local mw_uri = mw.uri
local string = string
local table = table

local anchor_encode = mw_uri.anchorEncode
local build_template -- defined as export.buildTemplate below
local class_else_type = m_parser.class_else_type
local concat = table.concat
local encode_uri = mw_uri.encode
local find = string.find
local format = string.format
local gsub = string.gsub
local html_create = mw.html.create
local insert = table.insert
local is_node = m_parser.is_node
local lower = string.lower
local match = string.match
local next = next
local pairs = pairs
local parse -- defined as export.parse below
local parse_template_name -- defined below
local pcall = pcall
local rep = string.rep
local select = select
local sub = string.sub
local title_equals = mw_title.equals
local tostring = m_parser.tostring
local type = type
local umatch = mw.ustring.match

--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function decode_entities(...)
	decode_entities = require(string_utilities_module).decode_entities
	return decode_entities(...)
end

local function encode_entities(...)
	encode_entities = require(string_utilities_module).encode_entities
	return encode_entities(...)
end

local function get_link_target(...)
	get_link_target = require(pages_module).get_link_target
	return get_link_target(...)
end

local function is_title(...)
	is_title = require(title_is_title_module)
	return is_title(...)
end

local function load_data(...)
	load_data = require(load_module).load_data
	return load_data(...)
end

local function make_title(...)
	make_title = require(title_make_title_module)
	return make_title(...)
end

local function new_title(...)
	new_title = require(title_new_title_module)
	return new_title(...)
end

local function pattern_escape(...)
	pattern_escape = require(string_pattern_escape_module)
	return pattern_escape(...)
end

local function php_htmlspecialchars(...)
	php_htmlspecialchars = require(scribunto_module).php_htmlspecialchars
	return php_htmlspecialchars(...)
end

local function php_ltrim(...)
	php_ltrim = require(scribunto_module).php_ltrim
	return php_ltrim(...)
end

local function php_trim(...)
	php_trim = require(scribunto_module).php_trim
	return php_trim(...)
end

local function redirect_target(...)
	redirect_target = require(title_redirect_target_module)
	return redirect_target(...)
end

local function replacement_escape(...)
	replacement_escape = require(string_replacement_escape_module)
	return replacement_escape(...)
end

local function scribunto_parameter_key(...)
	scribunto_parameter_key = require(scribunto_module).scribunto_parameter_key
	return scribunto_parameter_key(...)
end

local function shallow_copy(...)
	shallow_copy = require(table_shallow_copy_module)
	return shallow_copy(...)
end

local function sorted_pairs(...)
	sorted_pairs = require(table_sorted_pairs_module)
	return sorted_pairs(...)
end

local function split(...)
	split = require(string_utilities_module).split
	return split(...)
end

local function table_len(...)
	table_len = require(table_length_module)
	return table_len(...)
end

local function uupper(...)
	uupper = require(string_utilities_module).upper
	return uupper(...)
end

--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local data
local function get_data()
	data, get_data = load_data(data_module), nil
	return data
end

local frame
local function get_frame()
	frame, get_frame = mw.getCurrentFrame(), nil
	return frame
end

local magic_words
local function get_magic_words()
	magic_words, get_magic_words = load_data(magic_words_data_module), nil
	return magic_words
end

local parser_extension_tags
local function get_parser_extension_tags()
	parser_extension_tags, get_parser_extension_tags = load_data(parser_extension_tags_data_module), nil
	return parser_extension_tags
end

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

local Node = m_parser.node()
local new_node = Node.new

local function expand(obj, frame_args)
	return is_node(obj) and obj:expand(frame_args) or obj
end
export.expand = expand

function Node:expand(frame_args)
	local output = {}
	for i = 1, #self do
		output[i] = expand(self[i], frame_args)
	end
	return concat(output)
end

local Wikitext = Node:new_class("wikitext")

-- force_node ensures the output will always be a Wikitext node.
function Wikitext:new(this, force_node)
	if type(this) ~= "table" then
		return force_node and new_node(self, {this}) or this
	elseif #this == 1 then
		local this1 = this[1]
		return force_node and class_else_type(this1) ~= "wikitext" and new_node(self, this) or this1
	end
	local success, str = pcall(concat, this)
	if success then
		return force_node and new_node(self, {str}) or str
	end
	return new_node(self, this)
end

-- First value is the parameter name.
-- Second value is the parameter's default value.
-- Any additional values are ignored: e.g. "{{{a|b|c}}}" is parameter "a" with default value "b" (*not* "b|c").
local Parameter = Node:new_class("parameter")

function Parameter:new(this)
	local this2 = this[2]
	if class_else_type(this2) == "argument" then
		insert(this2, 2, "=")
		this2 = Wikitext:new(this2)
	end
	if this[3] == nil then
		this[2] = this2
	else
		this = {this[1], this2}
	end
	return new_node(self, this)
end

function Parameter:__tostring()
	local output = {}
	for i = 1, #self do
		output[i] = tostring(self[i])
	end
	return "{{{" .. concat(output, "|") .. "}}}"
end

function Parameter:get_name(frame_args)
	return scribunto_parameter_key(expand(self[1], frame_args))
end

function Parameter:get_default(frame_args)
	local default = self[2]
	if default ~= nil then
		return expand(default, frame_args)
	end
	return "{{{" .. expand(self[1], frame_args) .. "}}}"
end

function Parameter:expand(frame_args)
	if frame_args == nil then
		return self:get_default()
	end
	local name = expand(self[1], frame_args)
	local val = frame_args[scribunto_parameter_key(name)] -- Parameter in use.
	if val ~= nil then
		return val
	end
	val = self[2] -- Default.
	if val ~= nil then
		return expand(val, frame_args)
	end
	return "{{{" .. name .. "}}}"
end

local Argument = Node:new_class("argument")

function Argument:new(this)
	local key = this._parse_data.key
	this = Wikitext:new(this)
	if key == nil then
		return this
	end
	return new_node(self, {Wikitext:new(key), this})
end

function Argument:__tostring()
	return tostring(self[1]) .. "=" .. tostring(self[2])
end

function Argument:expand(frame_args)
	return expand(self[1], frame_args) .. "=" .. expand(self[2], frame_args)
end

local Template = Node:new_class("template")

function Template:__tostring()
	local output = {}
	for i = 1, #self do
		output[i] = tostring(self[i])
	end
	return "{{" .. concat(output, "|") .. "}}"
end

-- Normalize the template name, check it's a valid template, then memoize results (using false for invalid titles).
-- Parser functions (e.g. {{#IF:a|b|c}}) need to have the first argument extracted from the title, as it comes after the colon. Because of this, the parser function and first argument are memoized as a table.
-- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}).
do
	local templates, parser_variables, parser_functions = {}, {}, {}
	
	local function retrieve_magic_word_data(chunk)
		local mgw_data = (magic_words or get_magic_words())[chunk]
		if mgw_data then
			return mgw_data
		end
		local normalized = uupper(chunk)
		mgw_data = magic_words[normalized]
		if mgw_data and not mgw_data.case_sensitive then
			return mgw_data
		end
	end
	
	-- Returns the name required to transclude the title object `title` using
	-- template {{ }} syntax. If the `shortcut` flag is set, then any calls
	-- which require a namespace prefix will use the abbreviated form where one
	-- exists (e.g. "Template:PAGENAME" becomes "T:PAGENAME").
	local function get_template_invocation_name(title, shortcut)
		if not (is_title(title) and not title.isExternal) then
			error("Template invocations require a valid page title, which cannot contain an interwiki prefix.")
		end
		local namespace = title.namespace
		-- If not in the template namespace, include the prefix (or ":" if
		-- mainspace).
		if namespace ~= 10 then
			return get_link_target(title, shortcut)
		end
		-- If in the template namespace and it shares a name with a magic word,
		-- it needs the prefix "Template:".
		local text, fragment = title.text, title.fragment
		if fragment and fragment ~= "" then
			text = text .. "#" .. fragment
		end
		local colon = find(text, ":", nil, true)
		if not colon then
			local mgw_data = retrieve_magic_word_data(text)
			return mgw_data and mgw_data.parser_variable and get_link_target(title, shortcut) or text
		end
		local mgw_data = retrieve_magic_word_data(sub(text, 1, colon - 1))
		if mgw_data and (mgw_data.parser_function or mgw_data.transclusion_modifier) then
			return get_link_target(title, shortcut)
		end
		-- Also if "Template:" is necessary for disambiguation (e.g.
		-- "Template:Category:Foo" can't be called with "Category:Foo").
		local check = new_title(text, namespace)
		return check and title_equals(title, check) and text or get_link_target(title, shortcut)
	end
	export.getTemplateInvocationName = get_template_invocation_name
	
	function parse_template_name(name, has_args, fragment, force_transclusion)
		local chunks, colon, start, n, p = {}, find(name, ":", nil, true), 1, 0, 0
		while colon do
			local mgw_data = retrieve_magic_word_data(php_ltrim(sub(name, start, colon - 1)))
			if not mgw_data then
				break
			end
			local priority = mgw_data.priority
			if not (priority and priority > p) then
				local pf = mgw_data.parser_function and mgw_data.name or nil
				if pf then
					n = n + 1
					chunks[n] = pf .. ":"
					return chunks, "parser function", sub(name, colon + 1)
				end
				break
			end
			n = n + 1
			chunks[n] = mgw_data.name .. ":"
			start, p = colon + 1, priority
			colon = find(name, ":", start, true)
		end
		if start > 1 then
			name = sub(name, start)
		end
		name = php_trim(name)
		-- Parser variables can only take SUBST:/SAFESUBST: as modifiers.
		if not has_args and p <= 1 then
			local mgw_data = retrieve_magic_word_data(name)
			local pv = mgw_data and mgw_data.parser_variable and mgw_data.name or nil
			if pv then
				n = n + 1
				chunks[n] = pv
				return chunks, "parser variable"
			end
		end
		-- Get the template title with the custom new_title() function in
		-- [[Module:title/newTitle]], with `allowOnlyFragment` set to false
		-- (e.g. "{{#foo}}" is invalid) and `allowRelative` set to true, for
		-- relative links for namespaces with subpages (e.g. "{{/foo}}").
		local title = new_title(name, 10, false, true)
		if not (title and not title.isExternal) then
			return nil
		end
		-- Resolve any redirects. If the redirect target is an interwiki link,
		-- the template won't fail, but the redirect does not get resolved (i.e.
		-- the redirect page itself gets transcluded, so the template name
		-- should not be normalized to the target).
		local redirect = redirect_target(title, force_transclusion)
		if redirect and not redirect.isExternal then
			title = redirect
		end
		-- If `fragment` is not true, unset it from the title object to prevent
		-- it from being included by get_template_invocation_name.
		if not fragment then
			title.fragment = ""
		end
		chunks[n + 1] = get_template_invocation_name(title)
		return chunks, "template"
	end
	
	-- Note: force_transclusion avoids incrementing the expensive parser
	-- function count by forcing transclusion instead. This should only be used
	-- when there is a real risk that the expensive parser function limit of
	-- 500 will be hit.
	local function process_name(self, frame_args, force_transclusion)
		local name = expand(self[1], frame_args)
		local has_args, norm = #self > 1
		if not has_args then
			norm = parser_variables[name]
			if norm then
				return norm, "parser variable"
			end
		end
		norm = templates[name]
		if norm then
			local pf_arg1 = parser_functions[name]
			return norm, pf_arg1 and "parser function" or "template", pf_arg1
		elseif norm == false then
			return nil
		end
		local chunks, subclass, pf_arg1 = parse_template_name(name, has_args, nil, force_transclusion)
		-- Fail if invalid.
		if not chunks then
			templates[name] = false
			return nil
		end
		local chunk1 = chunks[1]
		-- Fail on SUBST:.
		if chunk1 == "SUBST:" then
			templates[name] = false
			return nil
		-- Any modifiers are ignored.
		elseif subclass == "parser function" then
			local pf = chunks[#chunks]
			templates[name] = pf
			parser_functions[name] = pf_arg1
			return pf, "parser function", pf_arg1
		end
		-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after).
		if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then
			pf_arg1 = chunks[#chunks]
			local pf = "MSGNW:"
			templates[name] = pf
			parser_functions[name] = pf_arg1
			return pf, "parser function", pf_arg1
		end
		-- Ignore any remaining modifiers, as they've done their job.
		local output = chunks[#chunks]
		if subclass == "parser variable" then
			parser_variables[name] = output
		else
			templates[name] = output
		end
		return output, subclass
	end
	
	function Template:get_name(frame_args, force_transclusion)
		-- Only return the first return value.
		return (process_name(self, frame_args, force_transclusion))
	end
	
	function Template:get_arguments(frame_args)
		local name, subclass, pf_arg1 = process_name(self, frame_args)
		if name == nil then
			return nil
		elseif subclass == "parser variable" then
			return {}
		end
		local template_args = {}
		if subclass == "parser function" then
			template_args[1] = pf_arg1
			for i = 2, #self do
				template_args[i] = expand(self[i], frame_args) -- Not trimmed.
			end
			return template_args
		end
		local implicit = 0
		for i = 2, #self do
			local arg = self[i]
			if class_else_type(arg) == "argument" then
				template_args[scribunto_parameter_key(expand(arg[1], frame_args))] = php_trim((expand(arg[2], frame_args)))
			else
				implicit = implicit + 1
				template_args[implicit] = expand(arg, frame_args) -- Not trimmed.
			end
		end
		return template_args
	end

	-- BIG TODO: manual template expansion.
	function Template:expand(frame_args)
		local name, subclass, pf_arg1 = process_name(self, frame_args)
		if name == nil then
			local output = {}
			for i = 1, #self do
				output[i] = expand(self[i], frame_args)
			end
			return "{{" .. concat(output, "|") .. "}}"
		elseif subclass == "parser variable" then
			return (frame or get_frame()):preprocess("{{" .. name .. "}}")
		elseif subclass == "parser function" then
			local f = frame or get_frame()
			if frame_args ~= nil then
				local success, new_f = pcall(f.newChild, f, {args = frame_args})
				if success then
					f = new_f
				end
			end
			return f:preprocess(tostring(self))
		end
		local output = {}
		for i = 1, #self do
			output[i] = expand(self[i], frame_args)
		end
		return (frame or get_frame()):preprocess("{{" .. concat(output, "|") .. "}}")
	end
end

local Tag = Node:new_class("tag")

function Tag:__tostring()
	local open_tag, attributes, n = {"<", self.name}, self:get_attributes(), 2
	for attr, value in next, attributes do
		n = n + 1
		open_tag[n] = " " .. php_htmlspecialchars(attr) .. "=\"" .. php_htmlspecialchars(value, "compat") .. "\""
	end
	if self.self_closing then
		return concat(open_tag) .. "/>"
	end
	return concat(open_tag) .. ">" .. concat(self) .. "</" .. self.name .. ">"
end

do
	local valid_attribute_name
	local function get_valid_attribute_name()
		valid_attribute_name, get_valid_attribute_name = (data or get_data()).valid_attribute_name, nil
		return valid_attribute_name
	end
	
	function Tag:get_attributes()
		local raw = self.attributes
		if not raw then
			self.attributes = {}
			return self.attributes
		elseif type(raw) == "table" then
			return raw
		end
		if sub(raw, -1) == "/" then
			raw = sub(raw, 1, -2)
		end
		local attributes, head = {}, 1
		-- Semi-manual implementation of the native regex.
		while true do
			local name, loc = match(raw, "([^\t\n\f\r />][^\t\n\f\r /=>]*)()", head)
			if not name then
				break
			end
			head = loc
			local value
			loc = match(raw, "^[\t\n\f\r ]*=[\t\n\f\r ]*()", head)
			if loc then
				head = loc
				-- Either "", '' or the value ends on a space/at the end. Missing
				-- end quotes are repaired by closing the value at the end.
				value, loc = match(raw, "^\"([^\"]*)\"?()", head)
				if not value then
					value, loc = match(raw, "^'([^']*)'?()", head)
					if not value then
						value, loc = match(raw, "^([^\t\n\f\r ]*)()", head)
					end
				end
				head = loc
			end
			-- valid_attribute_name is a pattern matching a valid attribute name.
			-- Defined in the data due to its length - see there for more info.
			if umatch(name, valid_attribute_name or get_valid_attribute_name()) then
				-- Sanitizer applies PHP strtolower (ASCII-only).
				attributes[lower(name)] = value and decode_entities(
					php_trim((gsub(value, "[\t\n\r ]+", " ")))
				) or ""
			end
		end
		self.attributes = attributes
		return attributes
	end
end

function Tag:expand()
	return (frame or get_frame()):preprocess(tostring(self))
end

local Heading = Node:new_class("heading")

function Heading:new(this)
	if #this > 1 then
		local success, str = pcall(concat, this)
		if success then
			return new_node(self, {
				str,
				level = this.level,
				section = this.section,
				index = this.index
			})
		end
	end
	return new_node(self, this)
end

do
	local node_tostring = Node.__tostring

	function Heading:__tostring()
		local eq = rep("=", self.level)
		return eq .. node_tostring(self) .. eq
	end
end

do
	local expand_node = Node.expand

	-- Expanded heading names can contain "\n" (e.g. inside nowiki tags), which
	-- causes any heading containing them to fail. However, in such cases, the
	-- native parser still treats it as a heading for the purpose of section
	-- numbers.
	local function validate_name(self, frame_args)
		local name = expand_node(self, frame_args)
		if find(name, "\n", nil, true) then
			return nil
		end
		return name
	end
	
	function Heading:get_name(frame_args)
		local name = validate_name(self, frame_args)
		return name ~= nil and php_trim(name) or nil
	end
	
	-- FIXME: account for anchor disambiguation.
	function Heading:get_anchor(frame_args)
		local name = validate_name(self, frame_args)
		return name ~= nil and decode_entities(anchor_encode(name)) or nil
	end
	
	function Heading:expand(frame_args)
		local eq = rep("=", self.level)
		return eq .. expand_node(self, frame_args) .. eq
	end
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

local Parser = m_parser.string_parser()

-- Template or parameter.

-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Parameters {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces.

-- For example, "{{{{foo}}}}" (4) is parsed as a parameter enclosed by single braces, and "{{{{{foo}}}}}" (5) is a parameter inside a template. However, "{{{{{foo }} }}}" is a template inside a parameter, due to "}}" forcing the closure of the inner node.
do
	-- Handlers.
	local handle_name
	local handle_argument
	local handle_value
	
	local function do_template_or_parameter(self, inner_node)
		self:push_sublayer(handle_name)
		self:set_pattern("[\n<[{|}]")
		-- If a node has already been parsed, nest it at the start of the new
		-- outer node (e.g. when parsing"{{{{foo}}bar}}", the template "{{foo}}"
		-- is parsed first, since it's the innermost, and becomes the first
		-- node of the outer template.
		if inner_node then
			self:emit(inner_node)
		end
	end

	local function pipe(self)
		self:emit(Wikitext:new(self:pop_sublayer()))
		self:push_sublayer(handle_argument)
		self:set_pattern("[\n<=[{|}]")
	end

	local function rbrace(self, this)
		if self:read(1) == "}" then
			self:emit(Wikitext:new(self:pop_sublayer()))
			return self:pop()
		end
		self:emit(this)
	end

	function handle_name(self, ...)
		handle_name = self:switch(handle_name, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			["|"] = pipe,
			["}"] = rbrace,
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_name(self, ...)
	end

	function handle_argument(self, ...)
		handle_argument = self:switch(handle_argument, {
			["\n"] = function(self, this)
				return self:heading_block(this, "==")
			end,

			["<"] = Parser.tag,

			["="] = function(self)
				local key = self:pop_sublayer()
				self:push_sublayer(handle_value)
				self:set_pattern("[\n<[{|}]")
				self.current_layer._parse_data.key = key
			end,

			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			["|"] = pipe,
			["}"] = rbrace,
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_argument(self, ...)
	end

	function handle_value(self, ...)
		handle_value = self:switch(handle_value, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,

			["|"] = function(self)
				self:emit(Argument:new(self:pop_sublayer()))
				self:push_sublayer(handle_argument)
				self:set_pattern("[\n<=[{|}]")
			end,

			["}"] = function(self, this)
				if self:read(1) == "}" then
					self:emit(Argument:new(self:pop_sublayer()))
					return self:pop()
				end
				self:emit(this)
			end,

			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_value(self, ...)
	end
	
	function Parser:template_or_parameter()
		local text, head, node_to_emit, failed = self.text, self.head
		-- Comments/tags interrupt the brace count.
		local braces = match(text, "^{+()", head) - head
		self:advance(braces)
		while true do
			local success, node = self:try(do_template_or_parameter, node_to_emit)
			-- Fail means no "}}" or "}}}" was found, so emit any remaining
			-- unmatched opening braces before any templates/parameters that
			-- were found.
			if not success then
				self:emit(rep("{", braces))
				failed = true
				break
			-- If there are 3+ opening and closing braces, it's a parameter.
			elseif braces >= 3 and self:read(2) == "}" then
				self:advance(3)
				braces = braces - 3
				node = Parameter:new(node)
			-- Otherwise, it's a template.
			else
				self:advance(2)
				braces = braces - 2
				node = Template:new(node)
			end
			local index = head + braces
			node.index = index
			node.raw = sub(text, index, self.head - 1)
			node_to_emit = node
			-- Terminate once not enough braces remain for further matches.
			if braces == 0 then
				break
			-- Emit any stray opening brace before any matched nodes.
			elseif braces == 1 then
				self:emit("{")
				break
			end
		end
		if node_to_emit then
			self:emit(node_to_emit)
		end
		return braces, failed
	end
end

-- Tag.
do
	local end_tags
	
	local function get_end_tags()
		end_tags, get_end_tags = (data or get_data()).end_tags, nil
		return end_tags
	end
	
	-- Handlers.
	local handle_start
	local handle_tag
	
	local function do_tag(self)
		local layer = self.current_layer
		layer._parse_data.handler, layer.index = handle_start, self.head
		self:set_pattern("[%s/>]")
		self:advance()
	end
	
	local function is_ignored_tag(self, this)
		if self.transcluded then
			return this == "includeonly"
		end
		return this == "noinclude" or this == "onlyinclude"
	end
	
	local function ignored_tag(self, text, head)
		local loc = find(text, ">", head, true)
		if not loc then
			return self:fail_route()
		end
		self:jump(loc)
		local tag = self:pop()
		tag.ignored = true
		return tag
	end
	
	function handle_start(self, this)
		if this == "/" then
			local text, head = self.text, self.head + 1
			local this = match(text, "^[^%s/>]+", head)
			if this and is_ignored_tag(self, lower(this)) then
				head = head + #this
				if not match(text, "^/[^>]", head) then
					return ignored_tag(self, text, head)
				end
			end
			return self:fail_route()
		elseif this == "" then
			return self:fail_route()
		end
		-- Tags are only case-insensitive with ASCII characters.
		local raw_name = this
		this = lower(this)
		local end_tag_pattern = (end_tags or get_end_tags())[this]
		if not end_tag_pattern then -- Validity check.
			return self:fail_route()
		end
		local layer = self.current_layer
		local pdata = layer._parse_data
		local text, head = self.text, self.head + pdata.step
		if match(text, "^/[^>]", head) then
			return self:fail_route()
		elseif is_ignored_tag(self, this) then
			return ignored_tag(self, text, head)
		-- If an onlyinclude tag is not ignored (and cannot be active since it
		-- would have triggered special handling earlier), it must be plaintext.
		elseif this == "onlyinclude" then
			return self:fail_route()
		elseif this == "noinclude" or this == "includeonly" then
			layer.ignored = true -- Ignored block.
			layer.raw_name = raw_name
		end
		layer.name, pdata.handler, pdata.end_tag_pattern = this, handle_tag, end_tag_pattern
		self:set_pattern(">")
	end
	
	function handle_tag(self, this)
		if this == "" then
			return self:fail_route()
		end
		local layer = self.current_layer
		if this ~= ">" then
			layer.attributes = this
			return
		elseif self:read(-1) == "/" then
			layer.self_closing = true
			return self:pop()
		end
		local text, head = self.text, self.head + 1
		local loc1, loc2 = find(text, layer._parse_data.end_tag_pattern, head)
		if loc1 then
			if loc1 > head then
				self:emit(sub(text, head, loc1 - 1))
			end
			self:jump(loc2)
			return self:pop()
		-- noinclude and includeonly will tolerate having no closing tag, but
		-- only if given in lowercase. This is due to a preprocessor bug, as
		-- it uses a regex with the /i (case-insensitive) flag to check for
		-- end tags, but a simple array lookup with lowercase tag names when
		-- looking up which tags should tolerate no closing tag (exact match
		-- only, so case-sensitive).
		elseif layer.ignored then
			local raw_name = layer.raw_name
			if raw_name == "noinclude" or raw_name == "includeonly" then
				self:jump(#text)
				return self:pop()
			end
		end
		return self:fail_route()
	end
	
	function Parser:tag()
		-- HTML comment.
		if self:read(1, 3) == "!--" then
			local text = self.text
			self:jump(select(2, find(text, "-->", self.head + 4, true)) or #text)
		-- onlyinclude tags (which must be lowercase with no whitespace).
		elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then
			local text = self.text
			self:jump(select(2, find(text, "<onlyinclude>", self.head + 14, true)) or #text)
		else
			local success, tag = self:try(do_tag)
			if not success then
				self:emit("<")
			elseif not tag.ignored then
				self:emit(Tag:new(tag))
			end
		end
	end
end

-- Heading.
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, parameters and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be.
do
	-- Handlers.
	local handle_start
	local handle_body
	local handle_possible_end
	
	local function do_heading(self)
		local layer, head = self.current_layer, self.head
		layer._parse_data.handler, layer.index = handle_start, head
		self:set_pattern("[\t\n ]")
		-- Comments/tags interrupt the equals count.
		local eq = match(self.text, "^=+()", head) - head
		layer.level = eq
		self:advance(eq)
	end
	
	local function do_heading_possible_end(self)
		self.current_layer._parse_data.handler = handle_possible_end
		self:set_pattern("[\n<]")
	end
	
	function handle_start(self, ...)
		-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
		local function newline(self)
			local layer = self.current_layer
			local eq = layer.level
			if eq <= 2 then
				return self:fail_route()
			end
			-- Calculate which equals signs determine the heading level.
			local level_eq = eq - (2 - eq % 2)
			level_eq = level_eq > 12 and 12 or level_eq
			-- Emit the excess.
			self:emit(rep("=", eq - level_eq))
			layer.level = level_eq / 2
			return self:pop()
		end
		
		local function whitespace(self)
			local success, possible_end = self:try(do_heading_possible_end)
			if success then
				self:emit(Wikitext:new(possible_end))
				self.current_layer._parse_data.handler = handle_body
				self:set_pattern("[\n<=[{]")
				return self:consume()
			end
			return newline(self)
		end
		
		handle_start = self:switch(handle_start, {
			["\t"] = whitespace,
			["\n"] = newline,
			[" "] = whitespace,
			[""] = newline,
			
			[false] = function(self)
				-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
				local layer = self.current_layer
				local eq = layer.level
				if eq > 6 then
					self:emit(1, rep("=", eq - 6))
					layer.level = 6
				end
				layer._parse_data.handler = handle_body
				self:set_pattern("[\n<=[{]")
				return self:consume()
			end
		})
		return handle_start(self, ...)
	end
	
	function handle_body(self, ...)
		handle_body = self:switch(handle_body, {
			["\n"] = Parser.fail_route,
			["<"] = Parser.tag,
			
			["="] = function(self)
				-- Comments/tags interrupt the equals count.
				local eq = match(self.text, "^=+", self.head)
				local eq_len = #eq
				self:advance(eq_len)
				local success, possible_end = self:try(do_heading_possible_end)
				if success then
					self:emit(eq)
					self:emit(Wikitext:new(possible_end))
					return self:consume()
				end
				local layer = self.current_layer
				local level = layer.level
				if eq_len > level then
					self:emit(rep("=", eq_len - level))
				elseif level > eq_len then
					layer.level = eq_len
					self:emit(1, rep("=", level - eq_len))
				end
				return self:pop()
			end,
			
			["["] = Parser.wikilink_block,
			
			["{"] = function(self, this)
				return self:braces(this, true)
			end,
			
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_body(self, ...)
	end
	
	function handle_possible_end(self, ...)
		handle_possible_end = self:switch(handle_possible_end, {
			["\n"] = Parser.fail_route,
			
			["<"] = function(self)
				if self:read(1, 3) ~= "!--" then
					return self:pop()
				end
				local head = select(2, find(self.text, "-->", self.head + 4, true))
				if not head then
					return self:pop()
				end
				self:jump(head)
			end,
			
			[""] = Parser.fail_route,
			
			[false] = function(self, this)
				if not match(this, "^[\t ]+()$") then
					return self:pop()
				end
				self:emit(this)
			end
		})
		return handle_possible_end(self, ...)
	end
	
	function Parser:heading()
		local success, heading = self:try(do_heading)
		if success then
			local section = self.section + 1
			heading.section = section
			self.section = section
			self:emit(Heading:new(heading))
			return self:consume()
		else
			self:emit("=")
		end
	end
end

------------------------------------------------------------------------------------
--
-- Block handlers
--
------------------------------------------------------------------------------------

-- Block handlers.

-- These are blocks which can affect template/parameter parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).

-- All blocks (including templates/parameters) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| [[ }}" will result in an unprocessed template, since the inner "[[" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "{{template| [[ ]] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| [[ }} ]] }}" would also work, even though "[[ }} ]]" is not a valid wikilink.

-- Note that nesting also affects pipes and equals signs, in addition to block closures.

-- These blocks can be nested to any degree, so "{{template| [[ [[ [[ ]] }}" will not work, since only one of the three wikilink blocks has been closed. On the other hand, "{{template| [[ [[ [[ ]] ]] ]] }}" will work.

-- All blocks are implicitly closed by the end of the text, since their validity is irrelevant at this stage.

-- Language conversion block.
-- Opens with "-{" and closes with "}-". However, templates/parameters take priority, so "-{{" is parsed as "-" followed by the opening of a template/parameter block (depending on what comes after).
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block.
do
	--Handler.
	local handle_language_conversion_block
	
	local function do_language_conversion_block(self)
		self.current_layer._parse_data.handler = handle_language_conversion_block
		self:set_pattern("[\n<[{}]")
	end
	
	function handle_language_conversion_block(self, ...)
		handle_language_conversion_block = self:switch(handle_language_conversion_block, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			
			["}"] = function(self, this)
				if self:read(1) == "-" then
					self:emit("}-")
					self:advance()
					return self:pop()
				end
				self:emit(this)
			end,
			
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_language_conversion_block(self, ...)
	end
	
	function Parser:braces(this, fail_on_unclosed_braces)
		local language_conversion_block = self:read(-1) == "-"
		if self:read(1) == "{" then
			local braces, failed = self:template_or_parameter()
			-- Headings will fail if they contain an unclosed brace block.
			if failed and fail_on_unclosed_braces then
				return self:fail_route()
			-- Language conversion blocks cannot begin "-{{", but can begin
			-- "-{{{" iff parsed as "-{" + "{{".
			elseif not (language_conversion_block and braces == 1) then
				return self:consume()
			end
		else
			self:emit(this)
			if not language_conversion_block then
				return
			end
			self:advance()
		end
		self:emit(Wikitext:new(self:get(do_language_conversion_block)))
	end
end

--[==[
Headings

Opens with "\n=" (or "=" at the start of the text), and closes with "\n" or the end of the text. Note that it doesn't matter whether the heading will fail to process due to a premature newline (e.g. if there are no closing signs), so at this stage the only thing that matters for closure is the newline or end of text.

Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings).

Note 2: if directly inside a template argument with no previous equals signs, a newline followed by a single equals sign is parsed as an argument equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|key\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|key\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block.
]==]
do
	--Handler.
	local handle_heading_block
	
	local function do_heading_block(self)
		self.current_layer._parse_data.handler = handle_heading_block
		self:set_pattern("[\n<[{]")
	end
	
	function handle_heading_block(self, ...)
		handle_heading_block = self:switch(handle_heading_block, {
			["\n"] = function(self)
				self:newline()
				return self:pop()
			end,
			
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_heading_block(self, ...)
	end
	
	function Parser:heading_block(this, nxt)
		self:newline()
		this = this .. (nxt or "=")
		local loc = #this - 1
		while self:read(0, loc) == this do
			self:advance()
			self:emit(Wikitext:new(self:get(do_heading_block)))
		end
	end
end

-- Wikilink block.
-- Opens with "[[" and closes with "]]".
do
	-- Handler.
	local handle_wikilink_block
	
	local function do_wikilink_block(self)
		self.current_layer._parse_data.handler = handle_wikilink_block
		self:set_pattern("[\n<[%]{]")
	end
	
	function handle_wikilink_block(self, ...)
		handle_wikilink_block = self:switch(handle_wikilink_block, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			
			["]"] = function(self, this)
				if self:read(1) == "]" then
					self:emit("]]")
					self:advance()
					return self:pop()
				end
				self:emit(this)
			end,
			
			["{"] = Parser.braces,
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_wikilink_block(self, ...)
	end
	
	function Parser:wikilink_block()
		if self:read(1) == "[" then
			self:emit("[[")
			self:advance(2)
			self:emit(Wikitext:new(self:get(do_wikilink_block)))
		else
			self:emit("[")
		end
	end
end

-- Lines which only contain comments, " " and "\t" are eaten, so long as
-- they're bookended by "\n" (i.e. not the first or last line).
function Parser:newline()
	local text, head = self.text, self.head
	while true do
		repeat
			local loc = match(text, "^[\t ]*<!%-%-()", head + 1)
			if not loc then
				break
			end
			loc = select(2, find(text, "-->", loc, true))
			head = loc or head
		until not loc
		-- Fail if no comments found.
		if head == self.head then
			break
		end
		head = match(text, "^[\t ]*()\n", head + 1)
		if not head then
			break
		end
		self:jump(head)
	end
	self:emit("\n")
end

do
	-- Handlers.
	local handle_start
	local main_handler
	
	-- If `transcluded` is true, then the text is checked for a pair of
	-- onlyinclude tags. If these are found (even if they're in the wrong
	-- order), then the start of the page is treated as though it is preceded
	-- by a closing onlyinclude tag.
	-- Note 1: unlike other parser extension tags, onlyinclude tags are case-
	-- sensitive and cannot contain whitespace.
	-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the
	-- text, but the hard requirement above means this can only happen if
	-- either the tags are in the wrong order or there are multiple onlyinclude
	-- blocks.
	local function do_parse(self, transcluded)
		self.current_layer._parse_data.handler = handle_start
		self:set_pattern(".")
		self.section = 0
		if not transcluded then
			return
		end
		self.transcluded = true
		local text = self.text
		if find(text, "</onlyinclude>", nil, true) then
			local head = find(text, "<onlyinclude>", nil, true)
			if head then
				self.onlyinclude = true
				self:jump(head + 13)
			end
		end
	end
	
	-- If the first character is "=", try parsing it as a heading.
	function handle_start(self, this)
		self.current_layer._parse_data.handler = main_handler
		self:set_pattern("[\n<{]")
		if this == "=" then
			return self:heading()
		end
		return self:consume()
	end
	
	function main_handler(self, ...)
		main_handler = self:switch(main_handler, {
			["\n"] = function(self)
				self:newline()
				if self:read(1) == "=" then
					self:advance()
					return self:heading()
				end
			end,
			
			["<"] = Parser.tag,
			
			["{"] = function(self, this)
				if self:read(1) == "{" then
					self:template_or_parameter()
					return self:consume()
				end
				self:emit(this)
			end,
			
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return main_handler(self, ...)
	end
	
	function export.parse(text, transcluded)
		local text_type = type(text)
		return (select(2, Parser:parse{
			text = text_type == "string" and text or
				text_type == "number" and tostring(text) or
				error("bad argument #1 (string expected, got " .. text_type .. ")"),
			node = {Wikitext, true},
			route = {do_parse, transcluded}
		}))
	end
	parse = export.parse
end

function export.find_templates(text, not_transcluded)
	return parse(text, not not_transcluded):iterate_nodes("template")
end

do
	local link_parameter_1, link_parameter_2
	
	local function get_link_parameter_1()
		link_parameter_1, get_link_parameter_1 = (data or get_data()).template_link_param_1, nil
		return link_parameter_1
	end
	
	local function get_link_parameter_2()
		link_parameter_2, get_link_parameter_2 = (data or get_data()).template_link_param_2, nil
		return link_parameter_2
	end
	
	-- Generate a link. If the target title doesn't have a fragment, use "#top"
	-- (which is an implicit anchor at the top of every page), as this ensures
	-- self-links still display as links, since bold display is distracting and
	-- unintuitive for template links.
	local function link_page(title, display)
		local fragment = title.fragment
		if fragment == "" then
			fragment = "top"
		end
		return format(
			"[[:%s|%s]]",
			encode_uri(title.prefixedText .. "#" .. fragment, "WIKI"),
			display
		)
	end
	
	-- pf_arg1 or pf_arg2 may need to be linked if a given parser function
	-- treats them as a pagename. If a key exists in `namespace`, the value is
	-- the namespace for the page: if not 0, then the namespace prefix will
	-- always be added to the input (e.g. {{#invoke:}} can only target the
	-- Module: namespace, so inputting "Template:foo" gives
	-- "Module:Template:foo", and "Module:foo" gives "Module:Module:foo").
	-- However, this isn't possible with mainspace (namespace 0), so prefixes
	-- are respected. make_title() handles all of this automatically.
	local function finalize_arg(pagename, namespace)
		if namespace == nil then
			return pagename
		end
		local title = make_title(namespace, pagename)
		return title and not title.isExternal and link_page(title, pagename) or pagename
	end
	
	local function render_title(name, args)
		-- parse_template_name returns a table of transclusion modifiers plus
		-- the normalized template/magic word name, which will be used as link
		-- targets. The third return value pf_arg1 is the first argument of a
		-- a parser function, which comes after the colon (e.g. "foo" in
		-- "{{#IF:foo|bar|baz}}"). This means args[1] (i.e. the first argument
		-- that comes after a pipe is actually argument 2, and so on. Note: the
		-- second parameter of parse_template_name checks if there are any
		-- arguments, since parser variables cannot take arguments (e.g.
		-- {{CURRENTYEAR}} is a parser variable, but {{CURRENTYEAR|foo}}
		-- transcludes "Template:CURRENTYEAR"). In such cases, the returned
		-- table explicitly includes the "Template:" prefix in the template
		-- name. The third parameter instructs it to retain any fragment in the
		-- template name in the returned table, if present.
		local chunks, subclass, pf_arg1 = parse_template_name(
			name,
			args and pairs(args)(args) ~= nil,
			true
		)
		if chunks == nil then
			return name, args
		end
		local chunks_len = #chunks
		-- Additionally, generate the corresponding table `rawchunks`, which
		-- is a list of colon-separated chunks in the raw input. This is used
		-- to retrieve the display forms for each chunk.
		local rawchunks = split(name, ":")
		for i = 1, chunks_len - 1 do
			chunks[i] = format(
				"[[%s|%s]]",
				encode_uri((magic_words or get_magic_words())[sub(chunks[i], 1, -2)].transclusion_modifier, "WIKI"),
				rawchunks[i]
			)
		end
		local chunk = chunks[chunks_len]
		-- If it's a template, return a link to it with link_page, concatenating
		-- the remaining chunks in `rawchunks` to form the display text.
		-- Use new_title with the default namespace 10 (Template:) to generate
		-- a target title, which is the same setting used for retrieving
		-- templates (including those in other namespaces, as prefixes override
		-- the default).
		if subclass == "template" then
			chunks[chunks_len] = link_page(
				new_title(chunk, 10),
				concat(rawchunks, "&#58;", chunks_len) -- :
			)
			return concat(chunks, "&#58;"), args -- :
		elseif subclass == "parser variable" then
			chunks[chunks_len] = format(
				"[[%s|%s]]",
				encode_uri((magic_words or get_magic_words())[chunk].parser_variable, "WIKI"),
				rawchunks[chunks_len]
			)
			return concat(chunks, "&#58;"), args -- :
		end
		-- Otherwise, it must be a parser function.
		local mgw_data = (magic_words or get_magic_words())[sub(chunk, 1, -2)]
		local link = mgw_data.parser_function or mgw_data.transclusion_modifier
		local pf_arg2 = args and args[1] or nil
		-- Some magic words have different links,  depending on whether argument
		-- 2 is specified (e.g. "baz" in {{foo:bar|baz}}).
		if type(link) == "table" then
			link = pf_arg2 and link[2] or link[1]
		end
		chunks[chunks_len] = format(
			"[[%s|%s]]",
			encode_uri(link, "WIKI"),
			rawchunks[chunks_len]
		)
		-- #TAG: has special handling, because documentation links for parser
		-- extension tags come from [[Module:data/parser extension tags]].
		if chunk == "#TAG:" then
			-- Tags are only case-insensitive with ASCII characters.
			local tag = (parser_extension_tags or get_parser_extension_tags())[lower(php_trim(pf_arg1))]
			if tag then
				pf_arg1 = format(
					"[[%s|%s]]",
					encode_uri(tag, "WIKI"),
					pf_arg1
				)
			end
		-- Otherwise, finalize pf_arg1 and add it to `chunks`.
		else
			pf_arg1 = finalize_arg(pf_arg1, (link_parameter_1 or get_link_parameter_1())[chunk])
		end
		chunks[chunks_len + 1] = pf_arg1
		-- Finalize pf_arg2 (if applicable), then return.
		if pf_arg2 then
			args = shallow_copy(args) -- Avoid destructively modifying args.
			args[1] = finalize_arg(pf_arg2, (link_parameter_2 or get_link_parameter_2())[chunk])
		end
		return concat(chunks, "&#58;"), args -- :
	end
	
	function export.buildTemplate(title, args)
		local output = {title}
		if not args then
			return output
		end
		-- Iterate over all numbered parameters in order, followed by any
		-- remaining parameters in codepoint order. Implicit parameters are
		-- used wherever possible, even if explicit numbers are interpolated
		-- between them (e.g. 0 would go before any implicit parameters, and
		-- 2.5 between 2 and 3).
		-- TODO: handle "=" and "|" in params/values.
		local implicit
		for k, v in sorted_pairs(args) do
			if type(k) == "number" and k >= 1 and k % 1 == 0 then
				if implicit == nil then
					implicit = table_len(args)
				end
				insert(output, k <= implicit and v or k .. "=" .. v)
			else
				insert(output, k .. "=" .. v)
			end
		end
		return output
	end
	build_template = export.buildTemplate
	
	function export.templateLink(title, args, no_link)
		if not no_link then
			title, args = render_title(title, args)
		end
		local output = build_template(title, args)
		for i = 1, #output do
			output[i] = encode_entities(output[i], "={}", true, true)
		end
		return tostring(html_create("code")
			:css("white-space", "pre-wrap")
			:wikitext("&#123;&#123;" .. concat(output, "&#124;") .. "&#125;&#125;") -- {{ | }}
		)
	end
end

do
	function export.find_parameters(text, not_transcluded)
		return parse(text, not not_transcluded):iterate_nodes("parameter")
	end
	
	function export.displayParameter(name, default)
		return tostring(html_create("code")
			:css("white-space", "pre-wrap")
			:wikitext("&#123;&#123;&#123;" .. concat({name, default}, "&#124;") .. "&#125;&#125;&#125;") -- {{{ | }}}
		)
	end
end

do
	local function check_level(level)
		if type(level) ~= "number" then
			error("Heading levels must be numbers.")
		elseif level < 1 or level > 6 or level % 1 ~= 0 then
			error("Heading levels must be integers between 1 and 6.")
		end
		return level
	end
	
	-- FIXME: should headings which contain "\n" be returned? This may depend
	-- on variable factors, like template expansion. They iterate the heading
	-- count number, but fail on rendering. However, in some cases a different
	-- heading might still be rendered due to intermediate equals signs; it
	-- may even be of a different heading level: e.g., this is parsed as an
	-- L2 heading with a newline (due to the wikilink block), but renders as the
	-- L1 heading "=foo[[". Section edit links are sometimes (but not always)
	-- present in such cases.
	
	-- ==[[=
	-- ]]==
	
	-- TODO: section numbers for edit links seem to also include headings
	-- nested inside templates and parameters (but apparently not those in
	-- parser extension tags - need to test this more). If we ever want to add
	-- section edit links manually, this will need to be accounted for.
	function export.find_headings(text, i, j)
		local parsed = parse(text)
		if i == nil and j == nil then
			return parse(text):iterate_nodes("heading")
		end
		i = i and check_level(i) or 1
		j = j and check_level(j) or 6
		return parsed:iterate(function(v)
			if class_else_type(v) == "heading" then
				local level = v.level
				return level >= i and level <= j
			end
		end)
	end
end

do
	local function make_tag(tag)
		return tostring(html_create("code")
			:css("white-space", "pre-wrap")
			:wikitext("&lt;" .. tag .. "&gt;")
		)
	end

	-- Note: invalid tags are returned without links.
	function export.wikitagLink(tag)
		-- ">" can't appear in tags (including attributes) since the parser
		-- unconditionally treats ">" as the end of a tag.
		if find(tag, ">", nil, true) then
			return make_tag(tag)
		end
		-- Tags must start "<tagname..." or "</tagname...", with no whitespace
		-- after "<" or "</".
		local slash, tagname, remainder = match(tag, "^(/?)([^/%s]+)(.*)$")
		if not tagname then
			return make_tag(tag)
		end
		-- Tags are only case-insensitive with ASCII characters.
		local link = lower(tagname)
		if (
			-- onlyinclude tags must be lowercase and are whitespace intolerant.
			link == "onlyinclude" and (link ~= tagname or remainder ~= "") or
			-- Closing wikitags (except onlyinclude) can only have whitespace
			-- after the tag name.
			slash == "/" and not match(remainder, "^%s*()$") or
			-- Tagnames cannot be followed immediately by "/", unless it comes
			-- at the end (e.g. "<nowiki/>", but not "<nowiki/ >").
			remainder ~= "/" and sub(remainder, 1, 1) == "/"
		) then
			-- Output with no link.
			return make_tag(tag)
		end
		-- Partial transclusion tags aren't in the table of parser extension
		-- tags.
		if link == "noinclude" or link == "includeonly" or link == "onlyinclude" then
			link = "mw:Transclusion#Partial transclusion"
		else
			link = (parser_extension_tags or get_parser_extension_tags())[link]
		end
		if link then
			tag = gsub(tag, pattern_escape(tagname), "[[" .. replacement_escape(encode_uri(link, "WIKI")) .. "|%0]]", 1)
		end
		return make_tag(tag)
	end
end

-- For convenience.
export.class_else_type = class_else_type

return export