Module:parser

From Linguifex
Jump to navigation Jump to search

Documentation for this module may be created at Module:parser/doc

local m_table

local concat = table.concat
local getmetatable = getmetatable
local insert = table.insert
local next = next
local rawget = rawget
local rawset = rawset
local remove = table.remove
local select = select
local setmetatable = setmetatable
local type = type
local unpack = unpack

local classes = {}
local metamethods = mw.loadData("Module:parser/data").metamethods

------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------

local function get_nested(a, b, ...)
	if not a then
		return nil
	elseif ... then
		return get_nested(a[b], ...)
	end
	return a[b]
end

local function set_nested(a, b, c, ...)
	if not (a and b) then
		return
	elseif c and ... then
		a[b] = a[b] or {}
		return set_nested(a[b], c, ...)
	end
	a[b] = c
end

local function inherit_metamethods(child, parent)
	if parent then
		for method, value in next, parent do
			if metamethods[method] then
				child[method] = value
			end
		end
	end
	return child
end

local function signed_index(t, n)
	return n and n <= 0 and #t + 1 + n or n
end

local function is_node(value)
	return classes[getmetatable(value)] and true or false
end

-- Recursively calling tostring() adds to the C stack (limit: 200), whereas calling __tostring metamethods directly does not. Occasionally relevant when dealing with very deep nesting.
local tostring
do
	local _tostring = _G.tostring
	
	function tostring(value)
		if is_node(value) then
			return value:__tostring(value)
		end
		return _tostring(value)
	end
end

local function type_or_class(value)
	return classes[getmetatable(value)] or type(value)
end

------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------

local Node = {}
Node.__index = Node

function Node:next(i)
	i = i + 1
	return self[i], i
end

function Node:next_node(i)
	local v
	repeat
		v, i = self:next(i)
	until v == nil or is_node(v)
	return v, i
end

-- Implements recursive iteration over a node tree, using functors to maintain state (which uses a lot less memory than closures). Iterator1 exists only to return the calling node on the first iteration, while Iterator2 uses a stack to store the state of each layer in the tree.

-- When a node is encountered (which may contain other nodes), it is returned on the first iteration, and then any child nodes are returned on each subsequent iteration; the same process is followed if any of those children contain nodes themselves. Once a particular node has been fully traversed, the iterator moves back up one layer and continues with any sibling nodes.

-- Each iteration returns three values: `value`, `node` and `key`. Together, these can be used to manipulate the node tree at any given point without needing to know the full structure. Note that when the input node is returned on the first iteration, `node` and `key` will be nil.

-- By default, the iterator will use the `next` method of each node, but this can be changed with the `next_func` parameter, which accepts a string argument with the name of a next method. This is because trees might consist of several different classes of node, and each might have different next methods that are tailored to their particular structures. In addition, each class of node might have multiple different next methods, which can be named according to their purposes. `next_func` ensures that the iterator uses equivalent next methods between different types of node.

-- Currently, two next methods are available: `next`, which simply iterates over the node conventionally, and `next_node`, which only returns children that are themselves nodes. Custom next methods can be declared by any calling module.
do
	local Iterator1, Iterator2 = {}, {}
	Iterator1.__index = Iterator2 -- Not a typo.
	Iterator2.__index = Iterator2
	
	function Iterator1:__call()
		setmetatable(self, Iterator2)
		return self[1].node
	end
	
	function Iterator2:push(node)
		local layer = {
			k = 0,
			node = node
		}
		self[#self + 1] = layer
		self[-1] = layer
		return self
	end
	
	function Iterator2:pop()
		local len = #self
		self[len] = nil
		self[-1] = self[len - 1]
	end
	
	function Iterator2:catch_values(node, ...)
		local v, k = ...
		if v == nil then
			self:pop()
			if self[-1] then
				return self:__call()
			end
			return
		end
		self[-1].k = k
		if is_node(v) then
			self:push(v)
		end
		return v, node, select(2, ...)
	end
	
	function Iterator2:__call()
		local layer = self[-1]
		local node = layer.node
		return self:catch_values(node, node[self.next_func](node, layer.k))
	end
	
	function Node:__pairs(next_func)
		return setmetatable({
			next_func = next_func or "next"
		}, Iterator1):push(self)
	end
end

function Node:rawpairs()
	return next, self
end

function Node:__tostring()
	local output = {}
	for i = 1, #self do
		insert(output, tostring(self[i]))
	end
	return concat(output)
end

function Node:clone()
	return require("Module:table").deepcopy(self, "keep", true)
end

function Node:new_class(class)
	local t = inherit_metamethods({type = class}, self)
	t.__index = t
	classes[t] = class
	return setmetatable(t, self)
end

function Node:new(t)
	setmetatable(t, nil)
	t.handler = nil
	t.override = nil
	t.route = nil
	return setmetatable(t, self)
end

do
	local Proxy = {}

	function Proxy:__index(k)
		return Proxy[k] or self.__chars[k]
	end

	function Proxy:__newindex(k, v)
		local key = self.__keys[k]
		if key then
			self.__chars[k] = v
			self.__parents[key] = v
		elseif key == false then
			error("Character is immutable.")
		else
			error("Invalid key.")
		end
	end

	function Proxy:build(a, b, c)
		local len = self.__len + 1
		self.__chars[len] = a
		self.__parents[len] = b
		self.__keys[len] = c
		self.__len = len
	end

	function Proxy:iter(i)
		i = i + 1
		local char = self.__chars[i]
		if char then
			return i, self[i], self, self.__parents[i], self.__keys[i]
		end
	end
	
	function Node:new_proxy()
		return setmetatable({
			__node = self,
			__chars = {},
			__parents = {},
			__keys = {},
			__len = 0
		}, Proxy)
	end
end

------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------

local Parser = {}
Parser.__index = Parser

function Parser:read(delta)
	return self.text[self.head + (delta or 0)] or ""
end

function Parser:advance(n)
	self.head = self.head + (n or 1)
end

function Parser:layer(n)
	if n then
		return rawget(self, #self + n)
	end
	return self[-1]
end

function Parser:emit(a, b)
	local layer = self[-1]
	if b then
		insert(layer, signed_index(layer, a), b)
	else
		rawset(layer, #layer + 1, a)
	end
end

function Parser:emit_tokens(a, b)
	local layer = self[-1]
	if b then
		a = signed_index(layer, a)
		for i = 1, #b do
			insert(layer, a + i - 1, b[i])
		end
	else
		local len = #layer
		for i = 1, #a do
			len = len + 1
			rawset(layer, len, a[i])
		end
	end
end

function Parser:remove(n)
	local layer = self[-1]
	if n then
		return remove(layer, signed_index(layer, n))
	end
	local len = #layer
	local token = layer[len]
	layer[len] = nil
	return token
end

function Parser:replace(a, b)
	local layer = self[-1]
	layer[signed_index(layer, a)] = b
end

-- Unlike default table.concat, this respects __tostring metamethods.
function Parser:concat(a, b, c)
	if not a or a > 0 then
		return self:concat(0, a, b)
	end
	local layer = self:layer(a)
	local ret = {}
	for i = signed_index(layer, b) or 1, signed_index(layer, c) or #layer do
		insert(ret, tostring(layer[i]))
	end
	return concat(ret)
end

function Parser:emitted(delta)
	delta = delta or -1
	local i = 0
	while true do
		local layer = self:layer(i)
		if not layer then
			return nil
		end
		local layer_len = #layer
		if -delta <= layer_len then
			return rawget(layer, layer_len + delta + 1)
		end
		delta = delta + layer_len
		i = i - 1
	end
end

function Parser:push(route)
	local layer = {
		head = self.head,
		route = route
	}
	self[#self + 1] = layer
	self[-1] = layer
end

function Parser:push_sublayer(handler, inherit)
	local sublayer = {
		handler = handler,
		sublayer = true
	}
	if inherit then
		local layer = self[-1]
		setmetatable(sublayer, inherit_metamethods({
			__index = layer,
			__newindex = layer
		}, getmetatable(layer)))
	end
	self[#self + 1] = sublayer
	self[-1] = sublayer
end

function Parser:pop()
	local len, layer = #self
	while true do
		layer = self[len]
		self[len] = nil
		len = len - 1
		self[-1] = self[len] or self
		if not layer.sublayer then
			break
		end
		self:emit_tokens(layer)
	end
	return layer
end

function Parser:pop_sublayer()
	local len, layer = #self, self[-1]
	self[len] = nil
	self[-1] = self[len - 1] or self
	return setmetatable(layer, nil)
end

function Parser:get(route, ...)
	local failed_route = get_nested(self.failed_routes, self.head, route)
	if failed_route then
		return false, failed_route
	end
	self:push(route)
	local layer = self[route](self, ...)
	if layer == nil then
		layer = self:traverse()
	end
	if layer.fail then
		return false, layer
	end
	return true, layer
end

function Parser:consume(this, ...)
	local layer = self[-1]
	return (layer.override or layer.handler)(self, this or self:read(), ...)
end

function Parser:fail_route()
	local layer = self:pop()
	layer.fail = true
	set_nested(self, "failed_routes", layer.head, layer.route, layer)
	self.head = layer.head
	return layer
end

function Parser:traverse()
	while true do
		local layer = self:consume()
		if layer then
			return layer
		end
		self:advance()
	end
end

-- Converts a handler into a switch table the first time it's called, which avoids creating unnecessary objects, and prevents any scoping issues caused by parser methods being assigned to table keys before they've been declared.
-- false is used as the default key.
do
	local Switch = {}
	
	function Switch:__call(parser, this)
		return (self[this] or self[false])(parser, this)
	end
	
	function Parser:switch(func, t)
		local layer = self[-1]
		-- Point handler to the new switch table if the calling function is the current handler.
		if layer.handler == func then
			layer.handler = t
		end
		return setmetatable(t, Switch)
	end
end

-- Generate a new parser class object, which is used as the template for any parser objects. These should be customized with additional/modified methods as needed.
function Parser:new_class()
	local t = inherit_metamethods({}, self)
	t.__index = t
	return setmetatable(t, self)
end

-- Generate a new parser object, which is used for a specific parse.
function Parser:new(text)
	return setmetatable({
		text = text,
		head = 1
	}, self)
end

function Parser:parse(data)
	local parser = self:new(data.text)
	local success, tokens = parser:get(unpack(data.route))
	if #parser > 0 then
		-- This shouldn't happen.
		error("Parser exited with non-empty stack.")
	elseif success then
		local node = data.node
		return true, node[1]:new(tokens, unpack(node, 2)), parser
	elseif data.allow_fail then
		return false, nil, parser
	end
	error("Parser exited with bad route.")
end

local export = {}

export.is_node = is_node
export.tostring = tostring
export.type_or_class = type_or_class

function export.new()
	return Parser:new_class(), Node:new_class("node")
end

return export