Module:template parser

From Linguifex
Revision as of 09:44, 31 July 2024 by Sware (talk | contribs) (Created page with "--[[ NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary. Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:template parser/doc

--[[
NOTE: This module works by using recursive backtracking to build a node tree, which can then be traversed as necessary.

Because it is called by a number of high-use modules, it has been optimised for speed using a profiler, since it is used to scrape data from large numbers of pages very quickly. To that end, it rolls some of its own methods in cases where this is faster than using a function from one of the standard libraries. Please DO NOT "simplify" the code by removing these, since you are almost guaranteed to slow things down, which could seriously impact performance on pages which call this module hundreds or thousands of times.

It has also been designed to emulate the native parser's behaviour as much as possible, which in some cases means replicating bugs or unintuitive behaviours in that code; these should not be "fixed", since it is important that the outputs are the same. Most of these originate from deficient regular expressions, which can't be used here, so the bugs have to be manually reintroduced as special cases (e.g. onlyinclude tags being case-sensitive and whitespace intolerant, unlike all other tags). If any of these are fixed, this module should also be updated accordingly.
]]
local require = require
local m_parser = require("Module:parser")
local m_str_utils = require("Module:string utilities")
local mw = mw
local mw_title = mw.title
local string = string
local table = table

local concat = table.concat
local decode_entities = m_str_utils.decode_entities
local find = string.find
local format = string.format
local gsub = string.gsub
local insert = table.insert
local is_node = m_parser.is_node
local lower = m_str_utils.lower
local match = string.match
local new_title = mw_title.new
local next = next
local parse -- defined as export.parse below
local parse_template_name -- defined as export.parseTemplateName below
local pcall = pcall
local php_trim = m_str_utils.php_trim
local rep = string.rep
local reverse = string.reverse
local scribunto_param_key = m_str_utils.scribunto_param_key
local select = select
local sub = string.sub
local title_equals = mw_title.equals
local tostring = m_parser.tostring
local type = type
local type_or_class = m_parser.type_or_class
local umatch = mw.ustring.match
local upper = m_str_utils.upper

local data = mw.loadData("Module:template parser/data")
local frame = mw.getCurrentFrame()
local invalid_tag_attribute_name_char = data.invalid_tag_attribute_name_char

local Parser, Node = m_parser.new()

local function preprocess(text, args)
	return is_node(text) and text:preprocess(args) or text
end

local export = {}

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

function Node:preprocess(args)
	local output = {}
	for i = 1, #self do
		output[i] = preprocess(self[i], args)
	end
	return concat(output)
end

local Wikitext = Node:new_class("wikitext")

-- force_node ensures the output will always be a node.
function Wikitext:new(this, force_node)
	if type(this) ~= "table" then
		return force_node and Node.new(self, {this}) or this
	elseif #this == 1 then
		local this1 = this[1]
		return force_node and not is_node(this1) and Node.new(self, this) or this1
	end
	local success, str = pcall(concat, this)
	if success then
		return force_node and Node.new(self, {str}) or str
	end
	return Node.new(self, this)
end

-- First value is the argument name.
-- Second value is the argument's default value.
-- Any additional values are ignored: "{{{a|b|c}}}" is argument "a" with default value "b" (*not* "b|c").
local Argument = Node:new_class("argument")

function Argument:new(this)
	local this2 = this[2]
	if type_or_class(this2) == "parameter" then
		insert(this2, 2, "=")
		this2 = Wikitext:new(this2)
	end
	return Node.new(self, {this[1], this2})
end

function Argument:__tostring()
	local output = {}
	for i = 1, #self do
		output[i] = tostring(self[i])
	end
	return "{{{" .. concat(output, "|") .. "}}}"
end

function Argument:next(i)
	i = i + 1
	if i <= 2 then
		return self[i], i
	end
end

function Argument:get_name(args)
	return scribunto_param_key(preprocess(self[1], args))
end

function Argument:get_default(args)
	return tostring(self[2]) or "{{{" .. tostring(self[1]) .. "}}}"
end

function Argument:preprocess(args)
	if not args then
		return preprocess(self[2], args) or
			"{{{" .. preprocess(self[1], args) .. "}}}"
	end
	local name = preprocess(self[1], args)
	return args[php_trim(name)] or
		preprocess(self[2], args) or
		"{{{" .. name .. "}}}"
end

local Parameter = Node:new_class("parameter")

function Parameter:__tostring()
	return tostring(self[1]) .. "=" .. tostring(self[2])
end

local Template = Node:new_class("template")

function Template:__tostring()
	local output = {}
	for i = 1, #self do
		output[i] = tostring(self[i])
	end
	return "{{" .. concat(output, "|") .. "}}"
end

function Template:get_params(args)
	local params, implicit = {}, 0
	for i = 2, #self do
		local param = self[i]
		if type_or_class(param) == "parameter" then
			params[scribunto_param_key(preprocess(param[1], args))] = php_trim(tostring(param[2]))
		else
			implicit = implicit + 1
			params[implicit] = tostring(param) -- Not trimmed.
		end
	end
	return params
end

-- Normalize the template name, check it's a valid template, then memoize results (using false for invalid titles).
-- Parser functions (e.g. {{#IF:a|b|c}}) need to have the first argument extracted from the title, as it comes after the colon. Because of this, the parser function and first argument are memoized as a table.
-- FIXME: Some parser functions have special argument handling (e.g. {{#SWITCH:}}).
do
	local memo = {}
	
	local function get_array_params(self)
		local params = {}
		for i = 2, #self do
			params[i - 1] = tostring(self[i])
		end
		return params
	end
	
	local function convert_to_parser_function(self, name, arg1)
		insert(self, 2, arg1)
		self.get_params = get_array_params
		return name
	end
	
	function Template:get_name(args)
		local name = preprocess(self[1], args)
		local norm = memo[name]
		if norm then
			if type(norm) == "table" then
				return convert_to_parser_function(self, norm[1], norm[2])
			end
			return norm
		elseif norm == false then
			return
		end
		local chunks, pf_arg1 = parse_template_name(name, #self > 1)
		-- Fail if invalid.
		if not chunks then
			memo[name] = false
			return
		end
		local chunk1 = chunks[1]
		-- Fail on SUBST:.
		if chunk1 == "SUBST:" then
			memo[name] = false
			return
		-- If pf_arg1 is returned, it's a parser function with pf_arg1 as the first argument.
		-- Any modifiers are ignored.
		elseif pf_arg1 then
			local pf = chunks[#chunks]
			memo[name] = {pf, pf_arg1}
			return convert_to_parser_function(self, pf, pf_arg1)
		end
		-- Ignore SAFESUBST:, and treat MSGNW: as a parser function with the pagename as its first argument (ignoring any RAW: that comes after).
		if chunks[chunk1 == "SAFESUBST:" and 2 or 1] == "MSGNW:" then
			pf_arg1 = chunks[#chunks]
			memo[name] = {"MSGNW:", pf_arg1}
			return convert_to_parser_function(self, "MSGNW:", pf_arg1)
		end
		-- Ignore any remaining modifiers, as they've done their job.
		local output = chunks[#chunks]
		memo[name] = output
		return output
	end
end

function Template:preprocess()
	return frame:preprocess(tostring(self))
end

local Tag = Node:new_class("tag")

function Tag:__tostring()
	local open_tag, attributes, i = {"<", self.name}, self:get_attributes(), 2
	for attr, value in next, attributes do
		i = i + 1
		-- Quote value using "" by default, '' if it contains ", and leave unquoted if it contains both.
		local quoter = not find(value, "\"", 1, true) and "\"" or
			not find(value, "'", 1, true) and "'" or
			match(value, "^()[^\t\n\f\r ]*$") and "" or
			-- This shouldn't happen, unless the node has been edited manually. Not possible to stringify in a way that can be interpreted by the native parser, since it doesn't recognise escapes.
			error("Tag attribute values cannot contain all three of \", ' and whitespace simultaneously.")
		open_tag[i] = " " .. attr .. "=" .. quoter .. value .. quoter
	end
	if self.self_closing then
		return concat(open_tag) .. "/>"
	end
	return concat(open_tag) .. ">" .. concat(self) .. "</" .. self.name .. ">"
end

function Tag:get_attributes()
	local raw = self.attributes
	if not raw then
		self.attributes = {}
		return self.attributes
	elseif type(raw) == "table" then
		return raw
	end
	if sub(raw, -1) == "/" then
		raw = sub(raw, 1, -2)
	end
	local attributes, head = {}, 1
	-- Semi-manual implementation of the native regex.
	while true do
		local name, loc = match(raw, "([^\t\n\f\r />][^\t\n\f\r /=>]*)()", head)
		if not name then
			break
		end
		head = loc
		local value
		loc = match(raw, "^[\t\n\f\r ]*=[\t\n\f\r ]*()", head)
		if loc then
			head = loc
			value = match(raw, "^%b\"\"", head) or match(raw, "^%b''", head)
			if value then
				head = head + #value
				value = sub(value, 2, -2)
			else
				local raw_value
				raw_value, value = match(raw, "^([\"']?([^\t\n\f\r ]*))", head)
				head = head + #raw_value
			end
		end
		if not (
			match(name, "^[%-.]") or
			umatch(name, invalid_tag_attribute_name_char)
		) then
			attributes[lower(name)] = value and decode_entities(
				php_trim(gsub(value, "[\t\n\r ]+", " "))
			) or ""
		end
	end
	self.attributes = attributes
	return attributes
end

function Tag:preprocess()
	return frame:preprocess(tostring(self))
end

local Heading = Node:new_class("heading")

function Heading:new(this)
	if #this > 1 then
		local success, str = pcall(concat, this)
		if success then
			return Node.new(self, {
				str,
				level = this.level,
				section = this.section,
				pos = this.pos
			})
		end
	end
	return Node.new(self, this)
end

function Heading:__tostring()
	local eq = rep("=", self.level)
	return eq .. Node.__tostring(self) .. eq
end

function Heading:get_name(args)
	return php_trim(Node.preprocess(self, args))
end

function Heading:preprocess(args)
	local eq = rep("=", self.level)
	return eq .. Node.preprocess(self, args) .. eq
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

function Parser:read(i, j)
	local head, i = self.head, i or 0
	return sub(self.text, head + i, head + (j or i))
end

function Parser:advance(n)
	self.head = self.head + (n or self[-1].step or 1)
end

function Parser:consume(this)
	local layer = self[-1]
	if not this then
		local text, head = self.text, self.head
		local loc1, loc2 = find(text, layer.pattern, head)
		this = sub(text, head, loc1 and (loc1 == head and loc2 or loc1 - 1) or nil)
	end
	layer.step = #this
	return layer.handler(self, this)
end

-- Template or argument.

-- Parsed by matching the opening braces innermost-to-outermost (ignoring lone closing braces). Arguments {{{ }}} take priority over templates {{ }} where possible, but a double closing brace will always result in a closure, even if there are 3+ opening braces.

-- For example, "{{{{foo}}}}" (4) is parsed as an argument enclosed by single braces, and "{{{{{foo}}}}}" (5) is an argument inside a template. However, "{{{{{foo }} }}}" is a template inside an argument, due to "}}" forcing the closure of the inner node.
do
	-- Handlers.
	local handle_name
	local handle_parameter
	
	function handle_name(self, ...)
		handle_name = self:switch(handle_name, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			
			["|"] = function(self)
				self:emit(Wikitext:new(self:pop_sublayer()))
				self:push_sublayer(handle_parameter)
				self[-1].pattern = "[\n<=[{|}]"
			end,
			
			["}"] = function(self)
				if self:read(1) == "}" then
					self:emit(Wikitext:new(self:pop_sublayer()))
					return self:pop()
				end
				self:emit("}")
			end,
			
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_name(self, ...)
	end
	
	function handle_parameter(self, ...)
		local function emit_parameter(self)
			local param = Wikitext:new(self:pop_sublayer())
			local layer = self[-1]
			local key = layer.key
			if key then
				param = Parameter:new{key, param}
				layer.key = nil
			end
			self:emit(param)
		end
		
		handle_parameter = self:switch(handle_parameter, {
			["\n"] = function(self)
				if self[-1].key then
					return self:heading_block()
				end
				self:newline()
				while self:read(0, 2) == "\n==" do
					self:advance()
					self:emit(select(2, self:get("do_heading_block")))
				end
			end,
			
			["<"] = Parser.tag,
			
			["="] = function(self)
				local key = Wikitext:new(self:pop_sublayer())
				self[-1].key = key
				self:push_sublayer(handle_parameter)
				self[-1].pattern = "[\n<[{|}]"
			end,
			
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			
			["|"] = function(self)
				emit_parameter(self)
				self:push_sublayer(handle_parameter)
				self[-1].pattern = "[\n<=[{|}]"
			end,
			
			["}"] = function(self)
				if self:read(1) == "}" then
					emit_parameter(self)
					return self:pop()
				end
				self:emit("}")
			end,
			
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_parameter(self, ...)
	end
	
	function Parser:do_template_or_argument()
		self:push_sublayer(handle_name)
		self[-1].pattern = "[\n<[{|}]"
	end
	
	function Parser:template_or_argument()
		local text, head, node_to_emit = self.text, self.head
		-- Comments/tags interrupt the brace count.
		local braces = match(text, "^{+()", head) - head
		self:advance(braces)
		repeat
			local success, node = self:get("do_template_or_argument")
			if not success then
				self:emit(rep("{", braces))
				break
			elseif node_to_emit then
				-- Nest the already-parsed node at the start of the new node.
				local node1 = node[1]
				node[1] = (
					node1 == "" and node_to_emit or
					Wikitext:new{node_to_emit, node1}
				)
			end
			if self:read(2) == "}" and braces > 2 then
				self:advance(3)
				braces = braces - 3
				node = Argument:new(node)
			else
				self:advance(2)
				braces = braces - 2
				node = Template:new(node)
			end
			local pos = head + braces
			node.pos = pos
			node.raw = sub(text, pos, self.head - 1)
			node_to_emit = node
			if braces == 1 then
				self:emit("{")
				break
			end
		until braces == 0
		if node_to_emit then
			self:emit(node_to_emit)
		end
		return braces
	end
end

-- Tag.
do
	local tags = data.tags
	
	-- Handlers.
	local handle_start
	local handle_tag
	
	local function is_ignored_tag(self, this)
		if self.transcluded then
			return this == "includeonly"
		end
		return this == "noinclude" or this == "onlyinclude"
	end
	
	local function ignored_tag(self, text, head)
		local loc = find(text, ">", head, true)
		if not loc then
			return self:fail_route()
		end
		self.head = loc
		self[-1].ignored = true
		return self:pop()
	end
		
	function handle_start(self, this)
		if this == "/" then
			local text, head = self.text, self.head + 1
			local this = match(text, "^[^%s/>]+", head)
			if this and is_ignored_tag(self, lower(this)) then
				head = head + #this
				if not match(text, "^/[^>]", head) then
					return ignored_tag(self, text, head)
				end
			end
			return self:fail_route()
		elseif this == "" then
			return self:fail_route()
		end
		this = lower(this)
		if not tags[this] then
			return self:fail_route()
		end
		local layer = self[-1]
		local text, head = self.text, self.head + layer.step
		if match(text, "^/[^>]", head) then
			return self:fail_route()
		elseif is_ignored_tag(self, this) then
			return ignored_tag(self, text, head)
		elseif this == "noinclude" or this == "includeonly" then
			layer.ignored = true -- Ignored block.
		end
		layer.name, layer.handler, layer.pattern = this, handle_tag, ">"
	end
	
	function handle_tag(self, this)
		if this == "" then
			return self:fail_route()
		elseif this ~= ">" then
			self[-1].attributes = this
			return
		elseif self:read(-1) == "/" then
			self[-1].self_closing = true
			return self:pop()
		end
		local text, head, layer = self.text, self.head + 1, self[-1]
		local loc1, loc2 = find(text, tags[layer.name], head)
		if loc1 then
			if loc1 > head then
				self:emit(sub(text, head, loc1 - 1))
			end
			self.head = loc2
			return self:pop()
		elseif layer.ignored then
			self.head = #self.text
			return self:pop()
		end
		return self:fail_route()
	end
	
	function Parser:do_tag()
		local layer = self[-1]
		layer.handler, layer.pattern = handle_start, "[%s/>]"
		self:advance()
	end
	
	local function find_next_chunk(text, pattern, head)
		return select(2, find(text, pattern, head, true)) or #text
	end
	
	function Parser:tag()
		-- HTML comment.
		if self:read(1, 3) == "!--" then
			self.head = find_next_chunk(self.text, "-->", self.head + 4)
		-- onlyinclude closing tag (whitespace intolerant).
		elseif self.onlyinclude and self:read(1, 13) == "/onlyinclude>" then
			self.head = find_next_chunk(self.text, "<onlyinclude>", self.head + 14)
		else
			local success, tag = self:get("do_tag")
			if not success then
				self:emit("<")
			elseif not tag.ignored then
				self:emit(Tag:new(tag))
			end
		end
	end
end

-- Heading.
-- The preparser assigns each heading a number, which is used for things like section edit links. The preparser will only do this for heading blocks which aren't nested inside templates, arguments and parser tags. In some cases (e.g. when template blocks contain untrimmed newlines), a preparsed heading may not be treated as a heading in the final output. That does not affect the preparser, however, which will always count sections based on the preparser heading count, since it can't know what a template's final output will be.
do
	-- Handlers.
	local handle_start
	local handle_body
	local handle_possible_end
	
	function handle_start(self, ...)
		-- ===== is "=" as an L2; ======== is "==" as an L3 etc.
		local function newline(self)
			local layer = self[-1]
			local eq = layer.level
			if eq <= 2 then
				return self:fail_route()
			end
			-- Calculate which equals signs determine the heading level.
			local level_eq = eq - (2 - eq % 2)
			level_eq = level_eq > 12 and 12 or level_eq
			-- Emit the excess.
			self:emit(rep("=", eq - level_eq))
			layer.level = level_eq / 2
			return self:pop()
		end
		
		local function whitespace(self)
			local success, possible_end = self:get("do_heading_possible_end")
			if success then
				self:emit(Wikitext:new(possible_end))
				local layer = self[-1]
				layer.handler, layer.pattern = handle_body, "[\n<={]"
				return self:consume()
			end
			return newline(self)
		end
		
		handle_start = self:switch(handle_start, {
			["\t"] = whitespace,
			["\n"] = newline,
			[" "] = whitespace,
			[""] = newline,
			
			[false] = function(self)
				-- Emit any excess = signs once we know it's a conventional heading. Up till now, we couldn't know if the heading is just a string of = signs (e.g. ========), so it wasn't guaranteed that the heading text starts after the 6th.
				local layer = self[-1]
				local eq = layer.level
				if eq > 6 then
					self:emit(1, rep("=", eq - 6))
					layer.level = 6
				end
				layer.handler, layer.pattern = handle_body, "[\n<=[{]"
				return self:consume()
			end
		})
		return handle_start(self, ...)
	end
	
	function handle_body(self, ...)
		handle_body = self:switch(handle_body, {
			["\n"] = Parser.fail_route,
			["<"] = Parser.tag,
			
			["="] = function(self)
				-- Comments/tags interrupt the equals count.
				local eq = match(self.text, "^=+", self.head)
				local eq_len = #eq
				self:advance(eq_len)
				local success, possible_end = self:get("do_heading_possible_end")
				if success then
					self:emit(eq)
					self:emit(Wikitext:new(possible_end))
					return self:consume()
				end
				local layer = self[-1]
				local level = layer.level
				if eq_len > level then
					self:emit(rep("=", eq_len - level))
				elseif level > eq_len then
					layer.level = eq_len
					self:emit(1, rep("=", level - eq_len))
				end
				return self:pop()
			end,
			
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			[""] = Parser.fail_route,
			[false] = Parser.emit
		})
		return handle_body(self, ...)
	end
	
	function handle_possible_end(self, ...)
		handle_possible_end = self:switch(handle_possible_end, {
			["\n"] = Parser.fail_route,
			
			["<"] = function(self)
				local head = (
					self:read(1, 3) == "!--" and
					select(2, find(self.text, "-->", self.head + 4, true))
				)
				if not head then
					return self:pop()
				end
				self.head = head
			end,
			
			[""] = Parser.fail_route,
			
			[false] = function(self, this)
				if not match(this, "^[\t ]+$") then
					return self:pop()
				end
				self:emit(this)
			end
		})
		return handle_possible_end(self, ...)
	end
	
	function Parser:do_heading()
		local layer, head = self[-1], self.head
		layer.handler, layer.pattern, layer.pos = handle_start, "[\t\n ]", head
		-- Comments/tags interrupt the equals count.
		local eq = match(self.text, "^=+()", head) - head
		layer.level = eq
		self:advance(eq)
	end
	
	function Parser:do_heading_possible_end()
		local layer = self[-1]
		layer.handler, layer.pattern = handle_possible_end, "[\n<]"
	end
	
	function Parser:heading()
		local success, heading = self:get("do_heading")
		if success then
			local section = self.section + 1
			heading.section = section
			self.section = section
			self:emit(Heading:new(heading))
			return self:consume()
		else
			self:emit("=")
		end
	end
end

------------------------------------------------------------------------------------
--
-- Block handlers
--
------------------------------------------------------------------------------------

-- Block handlers.

-- These are blocks which can affect template/argument parsing, since they're also parsed by Parsoid at the same time (even though they aren't processed until later).

-- All blocks (including templates/arguments) can nest inside each other, but an inner block must be closed before the outer block which contains it. This is why, for example, the wikitext "{{template| [[ }}" will result in an unprocessed template, since the inner "[[" is treated as the opening of a wikilink block, which prevents "}}" from being treated as the closure of the template block. On the other hand, "{{template| [[ ]] }}" will process correctly, since the wikilink block is closed before the template closure. It makes no difference whether the block will be treated as valid or not when it's processed later on, so "{{template| [[ }} ]] }}" would also work, even though "[[ }} ]]" is not a valid wikilink.

-- Note that nesting also affects pipes and equals signs, in addition to block closures.

-- These blocks can be nested to any degree, so "{{template| [[ [[ [[ ]] }}" will not work, since only one of the three wikilink blocks has been closed. On the other hand, "{{template| [[ [[ [[ ]] ]] ]] }}" will work.

-- All blocks are implicitly closed by the end of the text, since their validity is irrelevant at this stage.

-- Language conversion block.
-- Opens with "-{" and closes with "}-". However, templates/arguments take priority, so "-{{" is parsed as "-" followed by the opening of a template/argument block (depending on what comes after).
-- Note: Language conversion blocks aren't actually enabled on the English Wiktionary, but Parsoid still parses them at this stage, so they can affect the closure of outer blocks: e.g. "[[ -{ ]]" is not a valid wikilink block, since the "]]" falls inside the new language conversion block.
do
	local function handle_language_conversion_block(self, ...)
		handle_language_conversion_block = self:switch(handle_language_conversion_block, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			
			["}"] = function(self)
				if self:read(1) == "-" then
					self:emit("}-")
					self:advance()
					return self:pop()
				end
				self:emit("}")
			end,
			
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_language_conversion_block(self, ...)
	end
	
	function Parser:do_language_conversion_block()
		local layer = self[-1]
		layer.handler, layer.pattern = handle_language_conversion_block, "[\n<[{}]"
	end
	
	function Parser:braces()
		local language_conversion_block = self:read(-1) == "-"
		if self:read(1) == "{" then
			local braces = self:template_or_argument()
			if not (braces == 1 and language_conversion_block) then
				return self:consume()
			end
		else
			self:emit("{")
			if not language_conversion_block then
				return
			end
			self:advance()
		end
		self:emit(Wikitext:new(select(2, self:get("do_language_conversion_block"))))
	end
end

--[==[
Headings

Opens with "\n=" (or "=" at the start of the text), and closes with "\n" or the end of the text. Note that it doesn't matter whether the heading will fail to process due to a premature newline (e.g. if there are no closing signs), so at this stage the only thing that matters for closure is the newline or end of text.

Note: Heading blocks are only parsed like this if they occur inside a template, since they do not iterate the preparser's heading count (i.e. they aren't proper headings).

Note 2: if directly inside a template parameter with no previous equals signs, a newline followed by a single equals sign is parsed as a parameter equals sign, not the opening of a new L1 heading block. This does not apply to any other heading levels. As such, {{template|parameter\n=}}, {{template|key\n=value}} or even {{template|\n=}} will successfully close, but {{template|parameter\n==}}, {{template|key=value\n=more value}}, {{template\n=}} etc. will not, since in the latter cases the "}}" would fall inside the new heading block.
]==]
do
	local function handle_heading_block(self, ...)
		handle_heading_block = self:switch(handle_heading_block, {
			["\n"] = function(self)
				self:newline()
				return self:pop()
			end,
			
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			["{"] = Parser.braces,
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_heading_block(self, ...)
	end
	
	function Parser:do_heading_block()
		local layer = self[-1]
		layer.handler, layer.pattern = handle_heading_block, "[\n<[{]"
	end
	
	function Parser:heading_block()
		self:newline()
		while self:read(0, 1) == "\n=" do
			self:advance()
			self:emit(Wikitext:new(select(2, self:get("do_heading_block"))))
		end
	end
end

-- Wikilink block.
-- Opens with "[[" and closes with "]]".
do
	local function handle_wikilink_block(self, ...)
		handle_wikilink_block = self:switch(handle_wikilink_block, {
			["\n"] = Parser.heading_block,
			["<"] = Parser.tag,
			["["] = Parser.wikilink_block,
			
			["]"] = function(self)
				if self:read(1) == "]" then
					self:emit("]]")
					self:advance()
					return self:pop()
				end
				self:emit("]")
			end,
			
			["{"] = Parser.braces,
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return handle_wikilink_block(self, ...)
	end
	
	function Parser:do_wikilink_block()
		local layer = self[-1]
		layer.handler, layer.pattern = handle_wikilink_block, "[\n<[%]{]"
	end
	
	function Parser:wikilink_block()
		if self:read(1) == "[" then
			self:emit("[[")
			self:advance(2)
			self:emit(Wikitext:new(select(2, self:get("do_wikilink_block"))))
		else
			self:emit("[")
		end
	end
end

-- Lines which only contain comments, " " and "\t" are eaten, so long as
-- they're bookended by "\n" (i.e. not the first or last line).
function Parser:newline()
	local text, head = self.text, self.head
	while true do
		repeat
			local loc = match(text, "^[\t ]*<!%-%-()", head + 1)
			if not loc then
				break
			end
			loc = select(2, find(text, "-->", loc, true))
			head = loc or head
		until not loc
		-- Fail if no comments found.
		if head == self.head then
			break
		end
		head = match(text, "^[\t ]*()\n", head + 1)
		if not head then
			break
		end
		self.head = head
	end
	self:emit("\n")
end

do
	-- Handlers.
	local handle_start
	local main_handler
	
	-- If the first character is "=", try parsing it as a heading.
	function handle_start(self, this)
		local layer = self[-1]
		layer.handler, layer.pattern = main_handler, "[\n<{]"
		if this == "=" then
			return self:heading()
		end
		return self:consume()
	end
	
	function main_handler(self, ...)
		main_handler = self:switch(main_handler, {
			["\n"] = function(self)
				self:newline()
				if self:read(1) == "=" then
					self:advance()
					return self:heading()
				end
			end,
			
			["<"] = Parser.tag,
			
			["{"] = function(self)
				if self:read(1) == "{" then
					self:template_or_argument()
					return self:consume()
				end
				self:emit("{")
			end,
			
			[""] = Parser.pop,
			[false] = Parser.emit
		})
		return main_handler(self, ...)
	end
	
	-- If `transcluded` is true, then the text is checked for a pair of
	-- onlyinclude tags. If these are found (even if they're in the wrong
	-- order), then the start of the page is treated as though it is preceded
	-- by a closing onlyinclude tag.
	-- Note 1: unlike other parser extension tags, onlyinclude tags are case-
	-- sensitive and cannot contain whitespace.
	-- Note 2: onlyinclude tags *can* be implicitly closed by the end of the
	-- text, but the hard requirement above means this can only happen if
	-- either the tags are in the wrong order or there are multiple onlyinclude
	-- blocks.
	function Parser:do_parse(transcluded)
		local layer = self[-1]
		layer.handler, layer.pattern = handle_start, "."
		self.section = 0
		if not transcluded then
			return
		end
		self.transcluded = true
		local text = self.text
		if find(text, "</onlyinclude>", 1, true) then
			local head = find(text, "<onlyinclude>", 1, true)
			if head then
				self.onlyinclude = true
				self.head = head + 13
			end
		end
	end
	
	function export.parse(text, transcluded)
		local text_type = type(text)
		return (select(2, Parser:parse{
			text = text_type == "string" and text or
				text_type == "number" and format("%.14g", text) or
				error("bad argument #1 (string expected, got " .. text_type .. ")"),
			node = {Wikitext, true},
			route = {"do_parse", transcluded}
		}))
	end
	parse = export.parse
end

do
	local page_title = mw.title.getCurrentTitle()
	local namespace_has_subpages = mw.site.namespaces[page_title.namespace].hasSubpages
	local raw_pagename = page_title.fullText
	
	local parser_functions_s = data.parser_functions_case_sensitive
	local parser_functions_i = data.parser_functions_case_insensitive
	local parser_variables_s = data.parser_variables_case_sensitive
	local parser_variables_i = data.parser_variables_case_insensitive
	local transclusion_modifiers = data.transclusion_modifiers
	
	-- Mainspace titles starting with "#" should be invalid, but a bug in
	-- mw.title.new means a title object is returned that has the empty string
	-- for prefixedText, so we need to filter them out. Interwiki links aren't
	-- valid as templates, either.
	local function is_valid_title(title)
		return title and
			#title.prefixedText > 0 and
			#title.interwiki == 0
	end
	
	function export.parseTemplateName(name, has_args, fragment)
		local chunks, colon, start, n, p = {}, find(name, ":", 1, true), 1, 0, 0
		while colon do
			-- Pattern is a PHP left-trim.
			local chunk = match(sub(name, start, colon), "[^%z\t-\v\r ].*") or ""
			local normalized = upper(chunk)
			local priority = transclusion_modifiers[normalized]
			if not (priority and priority > p) then
				local pf = parser_functions_s[chunk] or parser_functions_i[normalized]
				if pf then
					n = n + 1
					chunks[n] = normalized
					return chunks, sub(name, colon + 1)
				end
				break
			end
			n = n + 1
			chunks[n] = normalized
			start, p = colon + 1, priority
			colon = find(name, ":", start, true)
		end
		if start > 1 then
			name = sub(name, start)
		end
		name = php_trim(name)
		-- Parser variables can only take SUBST:/SAFESUBST: as modifiers.
		if not has_args and p <= 1 then
			local pv = parser_variables_s[name] or parser_variables_i[upper(name)]
			if pv then
				n = n + 1
				chunks[n] = pv
				return chunks
			end
		end
		-- Handle relative template names.
		if namespace_has_subpages then
			-- If the name starts with "/", it's treated as a subpage of the
			-- current page. Final slashes are trimmed, but this can't affect
			-- the intervening slash (e.g. {{///}} refers to "{{PAGENAME}}/").
			if sub(name, 1, 1) == "/" then
				name = raw_pagename .. (match(name, "^/.*[^/]") or "/")
			-- If it starts with "../", trim it and any that follow, and go up
			-- that many subpage levels. Then, treat any additional text as
			-- a subpage of that page; final slashes are trimmed.
			elseif match(name, "^()%.%./") then
				local n = 4
				while sub(name, n, n + 2) == "../" do
					n = n + 3
				end
				-- Retain an initial "/".
				name = sub(name, n - 1)
				-- Trim the relevant number of subpages from the pagename.
				local pagename, i = reverse(raw_pagename), 0
				for _ = 1, (n - 1) / 3 do
					i = find(pagename, "/", i + 1, true)
					-- Fail if there aren't enough slashes.
					if not i then
						return nil
					end
				end
				-- Add the subpage text; since the intervening "/" is retained
				-- in `name`, it can be trimmed along with any other final
				-- slashes (e.g. {{..///}} refers to "{{BASEPAGENAME}}".)
				name = reverse(sub(pagename, i + 1)) .. (match(name, "^.*[^/]") or "")
			end
		end
		local title = new_title(name, 10)
		if not is_valid_title(title) then
			return nil
		end
		-- If `fragment` is set, save the original title's fragment, since it
		-- won't carry through to any redirect targets.
		if fragment then
			fragment = title.fragment
		end
		-- Resolve any redirects. Unlike links, double redirects seem to work
		-- (but not higher). If the redirect target is an interwiki link, then
		-- the template won't fail, but the redirect page itself gets
		-- transcluded (i.e. the template name shouldn't be normalized to the
		-- target).
		for _ = 1, 2 do
			local redirect = title.redirectTarget
			if not is_valid_title(redirect) then
				break
			end
			title = redirect
		end
		local namespace, chunk = title.namespace
		-- Normalize name.
		if namespace == 10 then
			-- If in the template namespace and it shares a name with a magic
			-- word, it needs the prefix "Template:".
			local text = title.text
			if parser_functions_s[text] or parser_variables_s[text] then
				chunk = title.prefixedText
			else
				local normalized = upper(text)
				if (
					parser_functions_i[normalized] or
					parser_variables_i[normalized] or
					transclusion_modifiers[normalized]
				) then
					chunk = title.prefixedText
				elseif not find(text, ":", 1, true) then
					chunk = text
				-- Also if the prefix is necessary for disambiguation (e.g.
				-- "Template:Category:Foo" can't be abbreviated to
				-- "Category:Foo").
				else
					local check = new_title(text, 10)
					chunk = check and title_equals(title, check) and text or title.prefixedText
				end
			end
		-- If not in the template namespace, include the prefix (or ":" if
		-- mainspace).
		else
			chunk = namespace == 0 and ":" .. title.text or title.prefixedText
		end
		-- Set the fragment (if applicable).
		if fragment then
			chunk = chunk .. "#" .. fragment
		end
		chunks[n + 1] = chunk
		return chunks
	end
	parse_template_name = export.parseTemplateName
end

function export.parseTemplate(text, not_transcluded)
	text = parse(text, not not_transcluded)
	if type_or_class(text) == "template" then
		local name = text:get_name()
		if name then
			return name, text:get_params()
		end
	end
	return nil, nil
end

do
	local function next_template(iter)
		while true do repeat -- break acts like continue
			local node = iter()
			if not node then
				return nil, nil, nil, nil
			elseif type_or_class(node) ~= "template" then
				break
			end
			local name = node:get_name()
			if name then
				return name, node:get_params(), node.raw, node.pos
			end
		until true end
	end
	
	function export.findTemplates(text, not_transcluded)
		return next_template, parse(text, not not_transcluded):__pairs("next_node")
	end
end

do
	local function next_argument(iter)
		while true do
			local node = iter()
			if not node then
				return nil, nil, nil, nil
			elseif type_or_class(node) == "argument" then
				local args = iter.args
				return node:get_name(args), node:get_default(args), node.raw, node.pos
			end
		end
	end
	
	function export.findArguments(text, args, not_transcluded)
		local iter = parse(text, not not_transcluded):__pairs("next_node")
		iter.args = args
		return next_argument, iter
	end
end

do
	local function check_level(level)
		if type(level) ~= "number" then
			error("Heading levels must be numbers.")
		elseif level < 1 or level > 6 or level % 1 ~= 0 then
			error("Heading levels must be integers between 1 and 6.")
		end
		return level
	end
	
	local function next_heading(iter)
		while true do repeat -- break acts like continue
			local node = iter()
			if not node then
				return nil, nil, nil, nil
			elseif type_or_class(node) ~= "heading" then
				break
			end
			local level = node.level
			if level < iter.i or level > iter.j then
				break
			end
			local name = node:get_name()
			if not find(name, "\n", 1, true) then
				return name, level, node.section, node.pos
			end
		until true end
	end
	
	-- Note: heading names can contain "\n" (e.g. inside nowiki tags), which
	-- causes any heading containing them to fail. When that happens, the
	-- heading is not returned by this function, but the heading count is still
	-- iterated, since Parsoid's preprocessor still counts it as a heading for
	-- the purpose of heading strip markers (i.e. the section number).
	
	-- TODO: section numbers for edit links seem to also include headings
	-- nested inside templates and arguments (but apparently not those in
	-- parser extension tags - need to test this more). If we ever want to add
	-- section edit links manually, this will need to be accounted for.
	function export.findHeadings(text, i, j)
		local iter = parse(text):__pairs("next_node")
		iter.i, iter.j = i and check_level(i) or 1, j and check_level(j) or 6
		return next_heading, iter
	end
end

return export