Module:parser: Difference between revisions
Created page with "local m_table local concat = table.concat local getmetatable = getmetatable local insert = table.insert local next = next local rawget = rawget local rawset = rawset local remove = table.remove local select = select local setmetatable = setmetatable local type = type local unpack = unpack local classes = {} local metamethods = mw.loadData("Module:parser/data").metamethods ------------------------------------------------------------------------------------ -- -- Helper..." |
m 1 revision imported |
||
| (2 intermediate revisions by 2 users not shown) | |||
| Line 1: | Line 1: | ||
local | local export = {} | ||
local scribunto_metamethods_module = "Module:Scribunto/metamethods" | |||
local table_deep_copy_module = "Module:table/deepCopy" | |||
local table_get_nested_module = "Module:table/getNested" | |||
local table_set_nested_module = "Module:table/setNested" | |||
local concat = table.concat | local concat = table.concat | ||
local find = string.find | |||
local getmetatable = getmetatable | local getmetatable = getmetatable | ||
local insert = table.insert | local insert = table.insert | ||
| Line 8: | Line 14: | ||
local rawset = rawset | local rawset = rawset | ||
local remove = table.remove | local remove = table.remove | ||
local require = require | |||
local select = select | local select = select | ||
local setmetatable = setmetatable | local setmetatable = setmetatable | ||
local sub = string.sub | |||
local type = type | local type = type | ||
local unpack = unpack | local unpack = unpack or table.unpack -- Lua 5.2 compatibility | ||
local node_classes = {} | |||
local | local function deep_copy(...) | ||
local | deep_copy = require(table_deep_copy_module) | ||
return deep_copy(...) | |||
end | |||
local function get_nested(...) | |||
get_nested = require(table_get_nested_module) | |||
return get_nested(...) | |||
end | |||
local function set_nested(...) | |||
set_nested = require(table_set_nested_module) | |||
return set_nested(...) | |||
end | |||
--[==[ | |||
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | |||
local metamethods | |||
local function get_metamethods() | |||
-- Use require, since lookup times are much slower with mw.loadData. | |||
metamethods, get_metamethods = require(scribunto_metamethods_module), nil | |||
return metamethods | |||
end | |||
------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ||
| Line 21: | Line 52: | ||
-- | -- | ||
------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------ | ||
local function inherit_metamethods(child, parent) | local function inherit_metamethods(child, parent) | ||
if parent then | if parent then | ||
for method, value in next, parent do | for method, value in next, parent do | ||
if metamethods[method] then | if child[method] == nil and (metamethods or get_metamethods())[method] ~= nil then | ||
child[method] = value | child[method] = value | ||
end | end | ||
| Line 57: | Line 69: | ||
local function is_node(value) | local function is_node(value) | ||
return | if value == nil then | ||
return false | |||
end | |||
local mt = getmetatable(value) | |||
return not (mt == nil or node_classes[mt] == nil) | |||
end | |||
local function node_class(value) | |||
if value == nil then | |||
return nil | |||
end | |||
local mt = getmetatable(value) | |||
if mt == nil then | |||
return nil | |||
end | |||
return mt ~= nil and node_classes[mt] or nil | |||
end | |||
local function class_else_type(value) | |||
if value == nil then | |||
return type(value) | |||
end | |||
local mt = getmetatable(value) | |||
if mt == nil then | |||
return type(value) | |||
end | |||
local class = node_classes[mt] | |||
return class == nil and type(value) or class | |||
end | end | ||
-- Recursively calling tostring() adds to the C stack (limit: 200), whereas calling __tostring metamethods directly does not. Occasionally relevant when dealing with very deep nesting. | -- Recursively calling tostring() adds to the C stack (limit: 200), whereas | ||
-- calling __tostring metamethods directly does not. Occasionally relevant when | |||
-- dealing with very deep nesting. | |||
local tostring | local tostring | ||
do | do | ||
| Line 71: | Line 112: | ||
return _tostring(value) | return _tostring(value) | ||
end | end | ||
end | end | ||
| Line 88: | Line 125: | ||
function Node:next(i) | function Node:next(i) | ||
i = i + 1 | i = i + 1 | ||
return self[i], i | return self[i], self, i | ||
end | end | ||
--[==[ | |||
Implements recursive iteration over a node tree. | |||
By default, when a node is encountered (which may contain other nodes), it is returned on the first iteration, and then any child nodes are returned on each subsequent iteration; the same process is then followed if any of those children contain nodes themselves. Once a particular node has been fully traversed, the iterator then continues with any sibling nodes. The iterator will use the `next` method of each node to traverse it, which may differ depending on the node class. | |||
Each iteration returns three values: `value`, `node` and `key`. Together, these can be used to manipulate the node tree at any given point without needing to know the full structure. Note that when the input node is returned on the first iteration, `node` and `key` will be nil. | |||
The optional argument `test` can be used to limit the return values. This should be a function that returns a boolean value, where a return value of true means that the child will be returned by the iterator. If a node is not returned by the iterator, it will still be traversed, as it may contain children that should be returned. | |||
The method `iterate_nodes` is provided as a special instance of iterate which uses `is_node` as the test.]==] | |||
function Node:iterate(test) | |||
local node, k, n, nodes, keys, returned_self = self, 0, 0 | |||
-- Special case if `test` is `is_node`. | |||
local is_node_is_test = test == is_node | |||
function | return function() | ||
if not returned_self then | |||
returned_self = true | |||
if test == nil or test(self) then | |||
return self | |||
return self | |||
end | end | ||
end | end | ||
-- Get `v`, which is the value at the last-returned key of the current node; if `v` is a node, it will be iterated over (i.e. recursive iteration). By default, `v` will be the last-returned value, but looking it up here means that any modifications made to the node during the loop will be taken into account. This makes it possible to swap one node out for something else (e.g. another node), or to remove it entirely, without being locked into recursively iterating over the old node; instead, the new node (if any) will be iterated over. This means node trees can be modified on-the-fly during the course of a single loop. | |||
if is_node(v) then | local v, node_check = node[k], true | ||
while true do | |||
-- If `v` is a node, memoize the current node and key, then iterate over it. | |||
if node_check and is_node(v) then | |||
-- `n` is the current memo level. | |||
n = n + 1 | |||
if nodes then | |||
nodes[n], keys[n] = node, k | |||
else | |||
nodes, keys = {node}, {k} | |||
end | |||
node, k = v, 0 | |||
end | |||
v, node, k = node:next(k) | |||
-- If `v` is nil, move down one level, then continue iterating the node on that level (if any), or otherwise terminate the loop. | |||
if v == nil then | |||
if n == 0 then | |||
return nil | |||
end | |||
node, k, n = nodes[n], keys[n], n - 1 | |||
elseif test == nil or test(v) then | |||
return v, node, k | |||
-- If `test` is `is_node`, there's no point checking it again on the next loop. | |||
elseif node_check and is_node_is_test then | |||
node_check = false | |||
end | |||
end | end | ||
end | end | ||
end | |||
function Node:iterate_nodes(...) | |||
local | local args_n = select("#", ...) | ||
return self: | if args_n == 0 then | ||
return self:iterate(is_node) | |||
elseif args_n == 1 then | |||
local class = ... | |||
return self:iterate(function(value) | |||
return node_class(value) == class | |||
end) | |||
end | end | ||
local classes = {} | |||
for i = 1, args_n do | |||
classes[select(i, ...)] = true | |||
end | end | ||
return self:iterate(function(value) | |||
return classes[node_class(value)] | |||
end) | |||
end | end | ||
| Line 176: | Line 209: | ||
function Node:clone() | function Node:clone() | ||
return | return deep_copy(self, "keep", true) | ||
end | end | ||
function Node:new_class(class) | function Node:new_class(class) | ||
local t = | local t = {type = class} | ||
t.__index = t | t.__index = t | ||
t = inherit_metamethods(t, self) | |||
node_classes[t] = class | |||
return setmetatable(t, self) | return setmetatable(t, self) | ||
end | end | ||
function Node:new(t) | function Node:new(t) | ||
rawset(t, "_parse_data", nil) | |||
return setmetatable(t, self) | return setmetatable(t, self) | ||
end | end | ||
| Line 198: | Line 229: | ||
function Proxy:__index(k) | function Proxy:__index(k) | ||
local v = Proxy[k] | |||
if v ~= nil then | |||
return v | |||
end | |||
return self.__chars[k] | |||
end | end | ||
| Line 224: | Line 259: | ||
i = i + 1 | i = i + 1 | ||
local char = self.__chars[i] | local char = self.__chars[i] | ||
if char then | if char ~= nil then | ||
return i, self[i], self, self.__parents[i], self.__keys[i] | return i, self[i], self, self.__parents[i], self.__keys[i] | ||
end | end | ||
| Line 238: | Line 273: | ||
}, Proxy) | }, Proxy) | ||
end | end | ||
end | |||
function export.node() | |||
return Node:new_class("node") | |||
end | end | ||
| Line 249: | Line 288: | ||
Parser.__index = Parser | Parser.__index = Parser | ||
function Parser: | function Parser:get_layer(n) | ||
if n ~= nil then | |||
if n then | |||
return rawget(self, #self + n) | return rawget(self, #self + n) | ||
end | end | ||
return self | return self.current_layer | ||
end | end | ||
function Parser:emit(a, b) | function Parser:emit(a, b) | ||
local layer = self | local layer = self.current_layer | ||
if b then | if b ~= nil then | ||
insert(layer, signed_index(layer, a), b) | insert(layer, signed_index(layer, a), b) | ||
else | else | ||
| Line 274: | Line 305: | ||
function Parser:emit_tokens(a, b) | function Parser:emit_tokens(a, b) | ||
local layer = self | local layer = self.current_layer | ||
if b then | if b ~= nil then | ||
a = signed_index(layer, a) | a = signed_index(layer, a) | ||
for i = 1, #b do | for i = 1, #b do | ||
| Line 290: | Line 321: | ||
function Parser:remove(n) | function Parser:remove(n) | ||
local layer = self | local layer = self.current_layer | ||
if n then | if n ~= nil then | ||
return remove(layer, signed_index(layer, n)) | return remove(layer, signed_index(layer, n)) | ||
end | end | ||
| Line 301: | Line 332: | ||
function Parser:replace(a, b) | function Parser:replace(a, b) | ||
local layer = self | local layer = self.current_layer | ||
layer[signed_index(layer, a)] = b | layer[signed_index(layer, a)] = b | ||
end | end | ||
| Line 307: | Line 338: | ||
-- Unlike default table.concat, this respects __tostring metamethods. | -- Unlike default table.concat, this respects __tostring metamethods. | ||
function Parser:concat(a, b, c) | function Parser:concat(a, b, c) | ||
if | if a == nil or a > 0 then | ||
return self:concat(0, a, b) | return self:concat(0, a, b) | ||
end | end | ||
local layer = self: | local layer, ret, n = self:get_layer(a), {}, 0 | ||
for i = b and signed_index(layer, b) or 1, c and signed_index(layer, c) or #layer do | |||
for i = signed_index(layer, b) or 1, signed_index(layer, c) or #layer do | n = n + 1 | ||
ret[n] = tostring(layer[i]) | |||
end | end | ||
return concat(ret) | return concat(ret) | ||
| Line 319: | Line 350: | ||
function Parser:emitted(delta) | function Parser:emitted(delta) | ||
delta = delta | if delta == nil then | ||
local i = 0 | delta = -1 | ||
end | |||
local get_layer, i = self.get_layer, 0 | |||
while true do | while true do | ||
local layer = self | local layer = get_layer(self, i) | ||
if | if layer == nil then | ||
return nil | return nil | ||
end | end | ||
| Line 336: | Line 369: | ||
function Parser:push(route) | function Parser:push(route) | ||
local layer = { | local layer = {_parse_data = { | ||
head = self.head, | head = self.head, | ||
route = route | route = route, | ||
} | }} | ||
self[#self + 1] = layer | self[#self + 1] = layer | ||
self | self.current_layer = layer | ||
end | end | ||
function Parser:push_sublayer(handler, inherit) | function Parser:push_sublayer(handler, inherit) | ||
local pdata = { | |||
handler = handler, | |||
sublayer = true, | |||
} | |||
local sublayer = { | local sublayer = { | ||
_parse_data = pdata, | |||
} | } | ||
if inherit then | if inherit then | ||
local | local layer_parse_data = self.current_layer._parse_data | ||
setmetatable( | setmetatable(pdata, inherit_metamethods({ | ||
__index = | __index = layer_parse_data, | ||
__newindex = | __newindex = layer_parse_data | ||
}, getmetatable( | }, getmetatable(layer_parse_data))) | ||
end | end | ||
self[#self + 1] = sublayer | self[#self + 1] = sublayer | ||
self | self.current_layer = sublayer | ||
end | end | ||
| Line 366: | Line 402: | ||
self[len] = nil | self[len] = nil | ||
len = len - 1 | len = len - 1 | ||
self | if len == 0 then | ||
self.current_layer = self | |||
break | |||
elseif layer._parse_data.sublayer == nil then | |||
self.current_layer = self[len] | |||
break | break | ||
end | end | ||
self:emit_tokens(layer) | self:emit_tokens(layer) | ||
end | end | ||
return layer | return setmetatable(layer, nil) | ||
end | end | ||
function Parser:pop_sublayer() | function Parser:pop_sublayer() | ||
local len, layer = #self, self | local len, layer = #self, self.current_layer | ||
self[len] = nil | self[len] = nil | ||
self | self.current_layer = len == 1 and self or self[len - 1] | ||
return setmetatable(layer, nil) | return setmetatable(layer, nil) | ||
end | end | ||
function Parser:get(route, ...) | function Parser:get(route, ...) | ||
self:push(route) | self:push(route) | ||
local layer = | local layer = route(self, ...) | ||
if layer == nil then | if layer == nil then | ||
layer = self:traverse() | layer = self:traverse() | ||
end | end | ||
return layer | |||
end | end | ||
function Parser: | function Parser:try(route, ...) | ||
local | local failed_routes = self.failed_routes | ||
if failed_routes ~= nil then | |||
local failed_layer = get_nested(failed_routes, route, self.head) | |||
if failed_layer ~= nil then | |||
return false, failed_layer | |||
end | |||
end | |||
local layer = self:get(route, ...) | |||
return not layer._parse_data.fail, layer | |||
end | end | ||
function Parser:fail_route() | function Parser:fail_route() | ||
local layer = self:pop() | local layer = self:pop() | ||
layer.fail = true | local pdata = layer._parse_data | ||
set_nested(self, "failed_routes", | pdata.fail = true | ||
self.head = | local layer_head = pdata.head | ||
set_nested(self, layer, "failed_routes", pdata.route, layer_head) | |||
self.head = layer_head | |||
return layer | return layer | ||
end | end | ||
function Parser:traverse() | function Parser:traverse() | ||
local consume, advance = self.consume, self.advance | |||
while true do | while true do | ||
local layer = | local layer = consume(self) | ||
if layer then | if layer ~= nil then | ||
return layer | return layer | ||
end | end | ||
advance(self) | |||
end | end | ||
end | end | ||
| Line 431: | Line 473: | ||
function Parser:switch(func, t) | function Parser:switch(func, t) | ||
local | local pdata = self.current_layer._parse_data | ||
-- Point handler to the new switch table if the calling function is the current handler. | -- Point handler to the new switch table if the calling function is the current handler. | ||
if | if pdata.handler == func then | ||
pdata.handler = t | |||
end | end | ||
return setmetatable(t, Switch) | return setmetatable(t, Switch) | ||
| Line 442: | Line 484: | ||
-- Generate a new parser class object, which is used as the template for any parser objects. These should be customized with additional/modified methods as needed. | -- Generate a new parser class object, which is used as the template for any parser objects. These should be customized with additional/modified methods as needed. | ||
function Parser:new_class() | function Parser:new_class() | ||
local t = | local t = {} | ||
t.__index = t | t.__index = t | ||
return setmetatable(t, self) | return setmetatable(inherit_metamethods(t, self), self) | ||
end | end | ||
| Line 457: | Line 499: | ||
function Parser:parse(data) | function Parser:parse(data) | ||
local parser = self:new(data.text) | local parser = self:new(data.text) | ||
local success, tokens = parser: | local success, tokens = parser:try(unpack(data.route)) | ||
if #parser > 0 then | if #parser > 0 then | ||
-- This shouldn't happen. | -- This shouldn't happen. | ||
| Line 467: | Line 509: | ||
return false, nil, parser | return false, nil, parser | ||
end | end | ||
error("Parser exited with | error("Parser exited with failed route.") | ||
end | end | ||
export.class_else_type = class_else_type | |||
export.is_node = is_node | export.is_node = is_node | ||
export.tostring = tostring | export.tostring = tostring | ||
function export. | local ArrayParser = Parser:new_class() | ||
return Parser:new_class(), | |||
function ArrayParser:read(delta) | |||
local v = self.text[self.head + (delta or 0)] | |||
return v == nil and "" or v | |||
end | |||
function ArrayParser:advance(n) | |||
self.head = self.head + (n == nil and 1 or n) | |||
end | |||
function ArrayParser:jump(head) | |||
self.head = head | |||
end | |||
function ArrayParser:consume(this, ...) | |||
if this == nil then | |||
this = self:read() | |||
end | |||
local pdata = self.current_layer._parse_data | |||
return pdata.handler(self, this, ...) | |||
end | |||
function export.array_parser() | |||
return ArrayParser:new_class() | |||
end | |||
local StringParser = Parser:new_class() | |||
function StringParser:read(i, j) | |||
local head, i = self.head, i or 0 | |||
return sub(self.text, head + i, head + (j or i)) | |||
end | |||
function StringParser:advance(n) | |||
self.head = self.head + (n or self.current_layer._parse_data.step or 1) | |||
end | |||
function StringParser:jump(head) | |||
local pdata = self.current_layer._parse_data | |||
self.head, pdata.next, pdata.next_len = head, nil, nil | |||
end | |||
-- If `ignore_nonmatch` is set, any non-match segment before the match will be ignored. | |||
function StringParser:set_pattern(pattern, ignore_nonmatch) | |||
local pdata = self.current_layer._parse_data | |||
pdata.pattern, pdata.next, pdata.next_len = "(" .. pattern .. ")", nil, nil | |||
if ignore_nonmatch then | |||
pdata.ignore_nonmatch = true | |||
end | |||
end | |||
function StringParser:consume() | |||
local pdata = self.current_layer._parse_data | |||
local this = pdata.next | |||
-- Use `next` and `next_len` from the previous iteration, if available. | |||
if this then | |||
pdata.step, pdata.next, pdata.next_len = pdata.next_len, nil, nil | |||
return pdata.handler(self, this) | |||
end | |||
local text, head, loc1, loc2 = self.text, self.head | |||
loc1, loc2, this = find(text, pdata.pattern, head) | |||
-- If there was no match, default to find(text, "$", head), with `this` as | |||
-- the empty string. | |||
if not loc1 then | |||
this, loc1 = "", #text + 1 | |||
loc2 = loc1 - 1 -- zero-length matches cause loc2 to be less than loc1 | |||
end | |||
-- If `this` is at `head`, consume it. | |||
if loc1 == head then | |||
pdata.step = loc2 - loc1 + 1 | |||
-- If `ignore_nonmatch` is set, ignore everything before `this`, then | |||
-- consume it. | |||
elseif pdata.ignore_nonmatch then | |||
self.head, pdata.step = loc1, loc2 - loc1 + 1 | |||
-- Otherwise, consume everything before `this`, and memoize the match and | |||
-- match length; the length is dealt with separately, as it could be 0 if | |||
-- `next` is an index (e.g. if the pattern is the empty string). | |||
else | |||
this, pdata.step, pdata.next, pdata.next_len = sub(text, head, loc1 - 1), loc1 - head, this, loc2 - loc1 + 1 | |||
end | |||
return pdata.handler(self, this) | |||
end | |||
function export.string_parser() | |||
return StringParser:new_class() | |||
end | end | ||
return export | return export | ||