Module:parser
Jump to navigation
Jump to search
Documentation for this module may be created at Module:parser/doc
local m_table
local concat = table.concat
local getmetatable = getmetatable
local insert = table.insert
local next = next
local rawget = rawget
local rawset = rawset
local remove = table.remove
local select = select
local setmetatable = setmetatable
local type = type
local unpack = unpack
local classes = {}
local metamethods = mw.loadData("Module:parser/data").metamethods
------------------------------------------------------------------------------------
--
-- Helper functions
--
------------------------------------------------------------------------------------
local function get_nested(a, b, ...)
if not a then
return nil
elseif ... then
return get_nested(a[b], ...)
end
return a[b]
end
local function set_nested(a, b, c, ...)
if not (a and b) then
return
elseif c and ... then
a[b] = a[b] or {}
return set_nested(a[b], c, ...)
end
a[b] = c
end
local function inherit_metamethods(child, parent)
if parent then
for method, value in next, parent do
if metamethods[method] then
child[method] = value
end
end
end
return child
end
local function signed_index(t, n)
return n and n <= 0 and #t + 1 + n or n
end
local function is_node(value)
return classes[getmetatable(value)] and true or false
end
-- Recursively calling tostring() adds to the C stack (limit: 200), whereas calling __tostring metamethods directly does not. Occasionally relevant when dealing with very deep nesting.
local tostring
do
local _tostring = _G.tostring
function tostring(value)
if is_node(value) then
return value:__tostring(value)
end
return _tostring(value)
end
end
local function type_or_class(value)
return classes[getmetatable(value)] or type(value)
end
------------------------------------------------------------------------------------
--
-- Nodes
--
------------------------------------------------------------------------------------
local Node = {}
Node.__index = Node
function Node:next(i)
i = i + 1
return self[i], i
end
function Node:next_node(i)
local v
repeat
v, i = self:next(i)
until v == nil or is_node(v)
return v, i
end
-- Implements recursive iteration over a node tree, using functors to maintain state (which uses a lot less memory than closures). Iterator1 exists only to return the calling node on the first iteration, while Iterator2 uses a stack to store the state of each layer in the tree.
-- When a node is encountered (which may contain other nodes), it is returned on the first iteration, and then any child nodes are returned on each subsequent iteration; the same process is followed if any of those children contain nodes themselves. Once a particular node has been fully traversed, the iterator moves back up one layer and continues with any sibling nodes.
-- Each iteration returns three values: `value`, `node` and `key`. Together, these can be used to manipulate the node tree at any given point without needing to know the full structure. Note that when the input node is returned on the first iteration, `node` and `key` will be nil.
-- By default, the iterator will use the `next` method of each node, but this can be changed with the `next_func` parameter, which accepts a string argument with the name of a next method. This is because trees might consist of several different classes of node, and each might have different next methods that are tailored to their particular structures. In addition, each class of node might have multiple different next methods, which can be named according to their purposes. `next_func` ensures that the iterator uses equivalent next methods between different types of node.
-- Currently, two next methods are available: `next`, which simply iterates over the node conventionally, and `next_node`, which only returns children that are themselves nodes. Custom next methods can be declared by any calling module.
do
local Iterator1, Iterator2 = {}, {}
Iterator1.__index = Iterator2 -- Not a typo.
Iterator2.__index = Iterator2
function Iterator1:__call()
setmetatable(self, Iterator2)
return self[1].node
end
function Iterator2:push(node)
local layer = {
k = 0,
node = node
}
self[#self + 1] = layer
self[-1] = layer
return self
end
function Iterator2:pop()
local len = #self
self[len] = nil
self[-1] = self[len - 1]
end
function Iterator2:catch_values(node, ...)
local v, k = ...
if v == nil then
self:pop()
if self[-1] then
return self:__call()
end
return
end
self[-1].k = k
if is_node(v) then
self:push(v)
end
return v, node, select(2, ...)
end
function Iterator2:__call()
local layer = self[-1]
local node = layer.node
return self:catch_values(node, node[self.next_func](node, layer.k))
end
function Node:__pairs(next_func)
return setmetatable({
next_func = next_func or "next"
}, Iterator1):push(self)
end
end
function Node:rawpairs()
return next, self
end
function Node:__tostring()
local output = {}
for i = 1, #self do
insert(output, tostring(self[i]))
end
return concat(output)
end
function Node:clone()
return require("Module:table").deepcopy(self, "keep", true)
end
function Node:new_class(class)
local t = inherit_metamethods({type = class}, self)
t.__index = t
classes[t] = class
return setmetatable(t, self)
end
function Node:new(t)
setmetatable(t, nil)
t.handler = nil
t.override = nil
t.route = nil
return setmetatable(t, self)
end
do
local Proxy = {}
function Proxy:__index(k)
return Proxy[k] or self.__chars[k]
end
function Proxy:__newindex(k, v)
local key = self.__keys[k]
if key then
self.__chars[k] = v
self.__parents[key] = v
elseif key == false then
error("Character is immutable.")
else
error("Invalid key.")
end
end
function Proxy:build(a, b, c)
local len = self.__len + 1
self.__chars[len] = a
self.__parents[len] = b
self.__keys[len] = c
self.__len = len
end
function Proxy:iter(i)
i = i + 1
local char = self.__chars[i]
if char then
return i, self[i], self, self.__parents[i], self.__keys[i]
end
end
function Node:new_proxy()
return setmetatable({
__node = self,
__chars = {},
__parents = {},
__keys = {},
__len = 0
}, Proxy)
end
end
------------------------------------------------------------------------------------
--
-- Parser
--
------------------------------------------------------------------------------------
local Parser = {}
Parser.__index = Parser
function Parser:read(delta)
return self.text[self.head + (delta or 0)] or ""
end
function Parser:advance(n)
self.head = self.head + (n or 1)
end
function Parser:layer(n)
if n then
return rawget(self, #self + n)
end
return self[-1]
end
function Parser:emit(a, b)
local layer = self[-1]
if b then
insert(layer, signed_index(layer, a), b)
else
rawset(layer, #layer + 1, a)
end
end
function Parser:emit_tokens(a, b)
local layer = self[-1]
if b then
a = signed_index(layer, a)
for i = 1, #b do
insert(layer, a + i - 1, b[i])
end
else
local len = #layer
for i = 1, #a do
len = len + 1
rawset(layer, len, a[i])
end
end
end
function Parser:remove(n)
local layer = self[-1]
if n then
return remove(layer, signed_index(layer, n))
end
local len = #layer
local token = layer[len]
layer[len] = nil
return token
end
function Parser:replace(a, b)
local layer = self[-1]
layer[signed_index(layer, a)] = b
end
-- Unlike default table.concat, this respects __tostring metamethods.
function Parser:concat(a, b, c)
if not a or a > 0 then
return self:concat(0, a, b)
end
local layer = self:layer(a)
local ret = {}
for i = signed_index(layer, b) or 1, signed_index(layer, c) or #layer do
insert(ret, tostring(layer[i]))
end
return concat(ret)
end
function Parser:emitted(delta)
delta = delta or -1
local i = 0
while true do
local layer = self:layer(i)
if not layer then
return nil
end
local layer_len = #layer
if -delta <= layer_len then
return rawget(layer, layer_len + delta + 1)
end
delta = delta + layer_len
i = i - 1
end
end
function Parser:push(route)
local layer = {
head = self.head,
route = route
}
self[#self + 1] = layer
self[-1] = layer
end
function Parser:push_sublayer(handler, inherit)
local sublayer = {
handler = handler,
sublayer = true
}
if inherit then
local layer = self[-1]
setmetatable(sublayer, inherit_metamethods({
__index = layer,
__newindex = layer
}, getmetatable(layer)))
end
self[#self + 1] = sublayer
self[-1] = sublayer
end
function Parser:pop()
local len, layer = #self
while true do
layer = self[len]
self[len] = nil
len = len - 1
self[-1] = self[len] or self
if not layer.sublayer then
break
end
self:emit_tokens(layer)
end
return layer
end
function Parser:pop_sublayer()
local len, layer = #self, self[-1]
self[len] = nil
self[-1] = self[len - 1] or self
return setmetatable(layer, nil)
end
function Parser:get(route, ...)
local failed_route = get_nested(self.failed_routes, self.head, route)
if failed_route then
return false, failed_route
end
self:push(route)
local layer = self[route](self, ...)
if layer == nil then
layer = self:traverse()
end
if layer.fail then
return false, layer
end
return true, layer
end
function Parser:consume(this, ...)
local layer = self[-1]
return (layer.override or layer.handler)(self, this or self:read(), ...)
end
function Parser:fail_route()
local layer = self:pop()
layer.fail = true
set_nested(self, "failed_routes", layer.head, layer.route, layer)
self.head = layer.head
return layer
end
function Parser:traverse()
while true do
local layer = self:consume()
if layer then
return layer
end
self:advance()
end
end
-- Converts a handler into a switch table the first time it's called, which avoids creating unnecessary objects, and prevents any scoping issues caused by parser methods being assigned to table keys before they've been declared.
-- false is used as the default key.
do
local Switch = {}
function Switch:__call(parser, this)
return (self[this] or self[false])(parser, this)
end
function Parser:switch(func, t)
local layer = self[-1]
-- Point handler to the new switch table if the calling function is the current handler.
if layer.handler == func then
layer.handler = t
end
return setmetatable(t, Switch)
end
end
-- Generate a new parser class object, which is used as the template for any parser objects. These should be customized with additional/modified methods as needed.
function Parser:new_class()
local t = inherit_metamethods({}, self)
t.__index = t
return setmetatable(t, self)
end
-- Generate a new parser object, which is used for a specific parse.
function Parser:new(text)
return setmetatable({
text = text,
head = 1
}, self)
end
function Parser:parse(data)
local parser = self:new(data.text)
local success, tokens = parser:get(unpack(data.route))
if #parser > 0 then
-- This shouldn't happen.
error("Parser exited with non-empty stack.")
elseif success then
local node = data.node
return true, node[1]:new(tokens, unpack(node, 2)), parser
elseif data.allow_fail then
return false, nil, parser
end
error("Parser exited with bad route.")
end
local export = {}
export.is_node = is_node
export.tostring = tostring
export.type_or_class = type_or_class
function export.new()
return Parser:new_class(), Node:new_class("node")
end
return export