Module:parse interface: Difference between revisions

Jump to navigation Jump to search
no edit summary
(Created page with "local export = {} local string_utilities_module = "Module:string utilities" local parse_utilities_module = "Module:parse utilities" local table_module = "Module:table" --[==[ Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions ar...")
 
No edit summary
 
Line 5: Line 5:
local table_module = "Module:table"
local table_module = "Module:table"


--[==[
--[=[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures
modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no
overhead after the first call, since the target functions are called directly in any subsequent calls.
]=]
local function rfind(...)
local function rfind(...)
rfind = require(string_utilities_module).find
rfind = require(string_utilities_module).find
Line 30: Line 33:
parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
return parse_inline_modifiers(...)
return parse_inline_modifiers(...)
end
local function parse_term_with_lang(...)
parse_term_with_lang = require(parse_utilities_module).parse_term_with_lang
return parse_term_with_lang(...)
end
end


Line 35: Line 43:
term_contains_top_level_html = require(parse_utilities_module).term_contains_top_level_html
term_contains_top_level_html = require(parse_utilities_module).term_contains_top_level_html
return term_contains_top_level_html(...)
return term_contains_top_level_html(...)
end
local function escape_comma_whitespace(...)
escape_comma_whitespace = require(parse_utilities_module).escape_comma_whitespace
return escape_comma_whitespace(...)
end
local function unescape_comma_whitespace(...)
unescape_comma_whitespace = require(parse_utilities_module).unescape_comma_whitespace
return unescape_comma_whitespace(...)
end
end


Line 51: Line 69:
str_decode_entities = require(string_utilities_module).decode_entities
str_decode_entities = require(string_utilities_module).decode_entities
return str_decode_entities(...)
return str_decode_entities(...)
end
--[==[
This is an almost drop-in replacement for split_on_comma() in [[Module:parse utilities]], with optimizations to avoid
loading and running the while algorithm in [[Module:parse utilities]] except when necessary.
]==]
function export.split_on_comma(val)
if val:find(",%s") or (val:find(",") and val:find("[\\%[<]")) then
-- Comma after whitespace not split; nor are backslash-escaped commas or commas inside of square or
-- angle brackets. If we see any of these, use the more sophisticated algorithm in
-- [[Module:parse utilities]]. Otherwise it's safe to just split on commas directly. This optimization
-- avoids loading [[Module:parse utilities]] unnecessarily.
return split_on_comma(val)
else
return rsplit(val, ",")
end
end
--[==[
This is similar to parse_term_with_lang() in [[Module:parse utilities]], but if there is no colon + non-space in the
term, it will be returned directly and not parsed into link/display format. If you need the link/display arguments
even in the absence of a language prefix, call [[Module:parse utilities]] directly.
]==]
function export.parse_term_with_lang(data)
if data.term:find(":[^ ]") then
return parse_term_with_lang(data)
else
return data.term, nil, nil, nil
end
end
end


Line 92: Line 141:
end
end
local retval
local retval
if splitchar == "," and not escape_fun and not unescape_fun then
if splitchar == "," and escape_fun == nil and unescape_fun == nil then
if val:find(",<") then
if val:find(",</") then
-- This happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an
-- This happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an
-- initialism expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas.
-- initialism expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas.
retval = {val}
retval = {val}
elseif val:find(",%s") or (val:find(",") and val:find("[\\%[<]")) then
-- Comma after whitespace not split; nor are backslash-escaped commas or commas inside of square or
-- angle brackets. If we see any of these, use the more sophisticated algorithm in
-- [[Module:parse utilities]]. Otherwise it's safe to just split on commas directly. This optimization
-- avoids loading [[Module:parse utilities]] unnecessarily.
retval = split_on_comma(val)
else
else
retval = rsplit(val, ",")
retval = export.split_on_comma(val)
end
end
for i, split in ipairs(retval) do
for i, split in ipairs(retval) do
Line 113: Line 156:
end
end
elseif rfind(val, splitchar) then
elseif rfind(val, splitchar) then
if val:find(",<") then
if val:find(",</") then
-- This happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an
-- This happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an
-- initialism expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas.
-- initialism expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas.
retval = {val}
retval = {val}
elseif escape_fun or unescape_fun or val:find(",%s") or val:find("[\\%[<]") then
elseif escape_fun or unescape_fun or val:find(",%s") or val:find("[\\%[<]") then
retval = split_escaping(val, splitchar, preserve_splitchar, escape_fun, unescape_fun)
local defaulted_escape_fun, defaulted_unescape_fun
if escape_fun == nil then
defaulted_escape_fun = escape_comma_whitespace
end
if unescape_fun == nil then
defaulted_unescape_fun = unescape_comma_whitespace
end
retval = split_escaping(val, splitchar, preserve_splitchar, defaulted_escape_fun, defaulted_unescape_fun)
elseif preserve_splitchar then
elseif preserve_splitchar then
retval = rsplit(val, "(" .. splitchar .. ")")
retval = rsplit(val, "(" .. splitchar .. ")")

Navigation menu