Module:parameter utilities: Difference between revisions

Jump to navigation Jump to search
no edit summary
(Created page with "local export = {} local dump = mw.dumpObject local parameters_module = "Module:parameters" local parse_utilities_module = "Module:parse utilities" local table_module = "Module:table" local function track(page, track_module) return require("Module:debug/track")((track_module or "parameter utilities") .. "/" .. page) end -- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided). -- This is for logic errors in...")
 
No edit summary
Line 1: Line 1:
local export = {}
local export = {}


local dump = mw.dumpObject
local parameters_module = "Module:parameters"
local parameters_module = "Module:parameters"
local parse_interface_module = "Module:parse interface"
local parse_utilities_module = "Module:parse utilities"
local parse_utilities_module = "Module:parse utilities"
local table_module = "Module:table"
local table_module = "Module:table"


local function track(page, track_module)
local dump = mw.dumpObject
return require("Module:debug/track")((track_module or "parameter utilities") .. "/" .. page)
 
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
 
local function length(...)
length = require(table_module).length
return length(...)
end
 
local function list_to_set(...)
list_to_set = require(table_module).listToSet
return list_to_set(...)
end
 
local function parse_term_with_lang(...)
parse_term_with_lang = require(parse_utilities_module).parse_term_with_lang
return parse_term_with_lang(...)
end
 
local function parse_inline_modifiers(...)
parse_inline_modifiers = require(parse_interface_module).parse_inline_modifiers
return parse_inline_modifiers(...)
end
 
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end
 
local function shallow_copy(...)
shallow_copy = require(table_module).shallowCopy
return shallow_copy(...)
end
end
----------------- end loaders ----------------


-- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided).
-- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided).
Line 272: Line 306:


local function merge_param_mod_settings(orig, additions)
local function merge_param_mod_settings(orig, additions)
local merged = require(table_module).shallowcopy(orig)
local merged = shallow_copy(orig)
for k, v in pairs(additions) do
for k, v in pairs(additions) do
merged[k] = v
merged[k] = v
Line 454: Line 488:
local include_set
local include_set
if spec.include then
if spec.include then
include_set = require(table_module).listToSet(spec.include)
include_set = list_to_set(spec.include)
end
end
local exclude_set
local exclude_set
if spec.exclude then
if spec.exclude then
exclude_set = require(table_module).listToSet(spec.exclude)
exclude_set = list_to_set(spec.exclude)
end
end
for _, group in ipairs(groups) do
for _, group in ipairs(groups) do
Line 549: Line 583:
for param_mod, param_mod_spec in pairs(param_mods) do
for param_mod, param_mod_spec in pairs(param_mods) do
local has_extra_specs = false
local has_extra_specs = false
for k, _ in pairs(param_mod_spec) do
for k in pairs(param_mod_spec) do
if not param_mod_spec_key_is_builtin(k) then
if not param_mod_spec_key_is_builtin(k) then
has_extra_specs = true
has_extra_specs = true
Line 586: Line 620:
local function fetch_argument(args, index_or_value)
local function fetch_argument(args, index_or_value)
if type(index_or_value) == "string" then
if type(index_or_value) == "string" then
local index_without_default = index_or_value:match("^(.*)%.default$")
if index_or_value:sub(-8) == ".default" then
if index_without_default then
local index_without_default = index_or_value:sub(1, -9)
local arg_obj = fetch_argument(args, index_without_default)
local arg_obj = fetch_argument(args, index_without_default)
if type(arg_obj) ~= "table" then
if type(arg_obj) ~= "table" then
Line 595: Line 629:
return arg_obj.default
return arg_obj.default
end
end
if index_or_value:find("^[0-9]+$") then
if index_or_value:match("^%d+$") then
index_or_value = tonumber(index_or_value)
index_or_value = tonumber(index_or_value)
end
end
Line 670: Line 704:
   if specified, it is used to supply the default for the `sc` property of returned items if not otherwise set (e.g. by
   if specified, it is used to supply the default for the `sc` property of returned items if not otherwise set (e.g. by
   the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier).
   the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier).
* `disallow_custom_separators`: If specified, disallow specifying a bare semicolon as an item value to indicate that the
* `disallow_custom_separators`: If specified, disallow specifying custom separators (semicolon, underscore, tilde; see
  item's previous separator should be a semicolon. By default, the previous separator of each item is considered to be
  the internal `special_separators` table) as an item value to override the default separator. By default, the previous
  an empty string (for the first item) and otherwise a comma + space, unless either the preceding item is a bare
  separator of each item is considered to be an empty string (for the first item) and otherwise the value of
  semicolon (which causes the following item's previous separator to be a semicolon + space) or an item has an embedded
  `default_separator` (normally a comma + space), unless either the preceding item is one of the values listed in
  comma in it (which causes ''all'' items other than the first to have their previous separator be a semicolon + space).
  `special_separators`, such as a bare semicolon (which causes the following item's previous separator to be a semicolon
  The previous separator of each item is set on the item's `separator` property. Bare semicolons do not count when
  + space) or an item has an embedded comma in it (which causes ''all'' items other than the first to have their
  indexing items using separate parameters. For example, the following is correct:
  previous separator be a semicolon + space). The previous separator of each item is set on the item's `separator`
  property. Bare semicolons and other separator arguments do not count when indexing items using separate parameters.
  For example, the following is correct:
   ** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}}
   ** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}}
   If `disallow_custom_separators` is specified, however, the `separator` property is not set and bare semicolons do not
   If `disallow_custom_separators` is specified, however, the `separator` property is not set and separator arguments are
  get any special treatment.
  not recognized.
* `default_separator`: Override the default separator (normally {", "}).
* `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and
* `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and
   not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term
   not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term
Line 698: Line 735:
   currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item
   currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item
   signal the end of items and the start of labels.
   signal the end of items and the start of labels.
* `splitchar` is a Lua pattern. If specified, each user-specified argument can consist of multiple delimiter-separated
  subitems, each of which may be followed by inline modifiers. In this case, each element in the returned list of items
  is no longer an object describing an item, but instead an object with a `terms` field, whose value is a list
  describing the subitems (whose format is the same as the normal format of an item in the top-level list when
  `splitchar` is not specified). Each subitem object will have a `delimiter` field holding the actual delimiter
  occurring before the subitem, which is useful in the case where `splitchar` matches multiple possible characters. In
  this case, it is possible to specify that a given modifier can only occur after the last subitem and effectively
  modifies the whole collection of subitems by setting `overall = true` on the modifier. In this case, the modifier's
  value will be stored in the top-level object (the object with the `terms` field specifying the subitems). Likewise,
  any modifiers specified in the form of separate parameters will be treated as overall; if you want them to apply to
  the subitems, it is your responsibility to set the subitem properties appropriately. Note that splitting on delimiters
  will not happen in certain protected sequences (by default comma+whitespace; see below). In addition, the algorithm to
  split on delimiters is sensitive to inline modifier syntax and will not be confused by delimiters inside of inline
  modifiers or inside of square brackets, which do not trigger splitting (whether or not contained within protected
  sequences). Note that when `splitchar` is set, the code always sets `preserve_splitchar` in the call to
  `parse_inline_modifiers()`, meaning that the delimiter preceding the subitems is always available on the `delimiter`
  key of the corresponding objects.
* `escape_fun` and `unescape_fun` are as in split_escaping() and split_alternating_runs_escaping() in
  [[Module:parse utilities]] and control the protected sequences that won't be split when `splitchar` is specified (see
  previous item). By default, `escape_comma_whitespace` and `unescape_comma_whitespace` are used, so that
  comma+whitespace sequences won't be split.


Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one
Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one
Line 739: Line 797:
internal_error("There must be a spec in `data.params` corresponding to `data.termarg`")
internal_error("There must be a spec in `data.params` corresponding to `data.termarg`")
end
end
if not termarg_spec.list then
if termarg_spec == true or not termarg_spec.list then
internal_error("Term spec in `data.params` must have `list` set", termarg_spec)
internal_error("Term spec in `data.params` must have `list` set", termarg_spec)
end
end
if not termarg_spec.allow_holes and not termarg_spec.disallow_holes then
if termarg_spec == true or not (termarg_spec.allow_holes or termarg_spec.disallow_holes) then
internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set", termarg_spec)
internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set", termarg_spec)
end
end
export.augment_params_with_modifiers(data.params, data.param_mods)
export.augment_params_with_modifiers(data.params, data.param_mods)
args = require(parameters_module).process(data.raw_args, data.params)
args = process_params(data.raw_args, data.params)
else
else
args = data.processed_args or data.args
args = data.processed_args or data.args
Line 764: Line 822:
local term_args = args[data.termarg]
local term_args = args[data.termarg]
-- As a special case, the term args might not have a `maxindex` field because they might have
-- As a special case, the term args might not have a `maxindex` field because they might have
-- been declared with `disallow_holes = true`, so fall back to the actual length of the list.
-- been declared with `disallow_holes = true`, so fall back to the actual length of the list
local maxmaxindex = term_args.maxindex or #term_args
-- using the length function, since # can be unpredictable with arbitrary tables.
for k, v in pairs(args) do
local maxmaxindex = term_args.maxindex or length(term_args)
for _, v in pairs(args) do
if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then
if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then
maxmaxindex = v.maxindex
maxmaxindex = v.maxindex
Line 773: Line 832:




local items = {}
local items, lang_cache, use_semicolon = {}, {}
local ind = 0
local lang = fetch_argument(args, data.lang)
local lang = fetch_argument(args, data.lang)
local sc = fetch_argument(args, data.sc)
local lang_cache = {}
if lang then
if lang then
lang_cache[lang:getCode()] = lang
lang_cache[lang:getCode()] = lang
end
end
local use_semicolon = false
local sc = fetch_argument(args, data.sc)
local term_dest = data.term_dest or "term"
local term_dest = data.term_dest or "term"


-- FIXME: this is vulnerable to abusive inputs like 1000000=.
local itemno = 0
local itemno = 0
for i = 1, maxmaxindex do
for i = 1, maxmaxindex do
Line 818: Line 875:
-- If any of the params used for formatting this term is present, create a term and add it to the list.
-- If any of the params used for formatting this term is present, create a term and add it to the list.
if not data.dont_skip_items and not any_param_at_index then
if not data.dont_skip_items and not any_param_at_index then
track("skipped-term", data.track_module)
else
else
if not term then
track("missing-term", data.track_module)
end
local termobj = {
local termobj = {
itemno = itemno,
itemno = itemno,
Line 840: Line 893:
end
end


local function generate_obj(term, parse_err)
-- Add 1 because first term index starts at 2.
local paramname = data.termarg + i - 1
 
local function generate_subobj(termobj, term, parse_err)
if data.parse_lang_prefix and term:find(":") then
if data.parse_lang_prefix and term:find(":") then
local actual_term, termlangs = require(parse_utilities_module).parse_term_with_lang {
local actual_term, termlangs = parse_term_with_lang {
term = term,
term = term,
parse_err = parse_err,
parse_err = parse_err,
Line 868: Line 924:
end
end


-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude top-level HTML entry with <span ...>,
local function generate_obj(term, parse_err)
-- <br/> or similar in it, often caused by wrapping an argument in {{m|...}} or similar.
return generate_subobj(data.splitchar and {} or termobj, term, parse_err)
if term and term:find("<") and not require(parse_utilities_module).term_contains_top_level_html(term) then
end
require(parse_utilities_module).parse_inline_modifiers(term, {
 
-- Add 1 because first term index starts at 2.
if term then
paramname = data.termarg + i - 1,
parse_inline_modifiers(term, {
paramname = paramname,
param_mods = data.param_mods,
param_mods = data.param_mods,
generate_obj = generate_obj,
generate_obj = generate_obj,
splitchar = data.splitchar,
preserve_splitchar = true,
escape_fun = data.escape_fun,
unescape_fun = data.unescape_fun,
outer_container = data.splitchar and termobj or nil,
})
})
elseif term then
generate_obj(term)
end
end
-- Set these after parsing inline modifiers, not in generate_obj(), otherwise we'll get an error in
-- parse_inline_modifiers() if we try to use <lang:...> or <sc:...> as inline modifiers.
termobj.lang = termobj.lang or lang
termobj.sc = termobj.sc or sc


if not data.disallow_custom_separators then
local function postprocess_termobj(termobj)
-- If the displayed term (from .term/etc. or .alt) has an embedded comma, use a semicolon to join
-- Set these after parsing inline modifiers, not in generate_obj(), otherwise we'll get an error in
-- the terms.
-- parse_inline_modifiers() if we try to use <lang:...> or <sc:...> as inline modifiers.
local term_text = termobj[term_dest] or termobj.alt
termobj.lang = termobj.lang or lang
if not use_semicolon and term_text then
termobj.sc = termobj.sc or sc
if term_text:find(",", 1, true) then
 
if not data.disallow_custom_separators and not use_semicolon then
if data.splitchar and termobj.delimiter == "," then
use_semicolon = true
use_semicolon = true
else
-- If the displayed term (from .term/etc. or .alt) has an embedded comma, use a semicolon to
-- join the terms.
local term_text = termobj[term_dest] or termobj.alt
if term_text and term_text:find(",") then
use_semicolon = true
end
end
end
end
end
end
if data.splitchar then
for _, subobj in ipairs(termobj.terms) do
postprocess_termobj(subobj)
end
else
postprocess_termobj(termobj)
end
end


Line 902: Line 975:


if not data.disallow_custom_separators then
if not data.disallow_custom_separators then
-- Set the default separator of all those items for which a separator wasn't explicitly given to comma
-- Set the default separator of all those items for which a separator wasn't explicitly given to the default
-- (or semicolon if any items have embedded commas).
-- separator, defaulting to comma + space; but if any items have embedded commas, set the separator to
for i, item in ipairs(items) do
-- semicolon + space.
for _, item in ipairs(items) do
if not item.separator then
if not item.separator then
item.separator = use_semicolon and "; " or ", "
item.separator = use_semicolon and "; " or data.default_separator or ", "
end
end
end
end

Navigation menu