48,404
edits
(Created page with "local export = {} local dump = mw.dumpObject local parameters_module = "Module:parameters" local parse_utilities_module = "Module:parse utilities" local table_module = "Module:table" local function track(page, track_module) return require("Module:debug/track")((track_module or "parameter utilities") .. "/" .. page) end -- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided). -- This is for logic errors in...") |
No edit summary |
||
| Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local parameters_module = "Module:parameters" | local parameters_module = "Module:parameters" | ||
local parse_interface_module = "Module:parse interface" | |||
local parse_utilities_module = "Module:parse utilities" | local parse_utilities_module = "Module:parse utilities" | ||
local table_module = "Module:table" | local table_module = "Module:table" | ||
local function | local dump = mw.dumpObject | ||
return require( | |||
--[==[ | |||
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | |||
local function length(...) | |||
length = require(table_module).length | |||
return length(...) | |||
end | |||
local function list_to_set(...) | |||
list_to_set = require(table_module).listToSet | |||
return list_to_set(...) | |||
end | |||
local function parse_term_with_lang(...) | |||
parse_term_with_lang = require(parse_utilities_module).parse_term_with_lang | |||
return parse_term_with_lang(...) | |||
end | |||
local function parse_inline_modifiers(...) | |||
parse_inline_modifiers = require(parse_interface_module).parse_inline_modifiers | |||
return parse_inline_modifiers(...) | |||
end | |||
local function process_params(...) | |||
process_params = require(parameters_module).process | |||
return process_params(...) | |||
end | |||
local function shallow_copy(...) | |||
shallow_copy = require(table_module).shallowCopy | |||
return shallow_copy(...) | |||
end | end | ||
----------------- end loaders ---------------- | |||
-- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided). | -- Throw an error prefixed with the words "Internal error" (and suffixed with a dumped version of `spec`, if provided). | ||
| Line 272: | Line 306: | ||
local function merge_param_mod_settings(orig, additions) | local function merge_param_mod_settings(orig, additions) | ||
local merged = | local merged = shallow_copy(orig) | ||
for k, v in pairs(additions) do | for k, v in pairs(additions) do | ||
merged[k] = v | merged[k] = v | ||
| Line 454: | Line 488: | ||
local include_set | local include_set | ||
if spec.include then | if spec.include then | ||
include_set = | include_set = list_to_set(spec.include) | ||
end | end | ||
local exclude_set | local exclude_set | ||
if spec.exclude then | if spec.exclude then | ||
exclude_set = | exclude_set = list_to_set(spec.exclude) | ||
end | end | ||
for _, group in ipairs(groups) do | for _, group in ipairs(groups) do | ||
| Line 549: | Line 583: | ||
for param_mod, param_mod_spec in pairs(param_mods) do | for param_mod, param_mod_spec in pairs(param_mods) do | ||
local has_extra_specs = false | local has_extra_specs = false | ||
for k | for k in pairs(param_mod_spec) do | ||
if not param_mod_spec_key_is_builtin(k) then | if not param_mod_spec_key_is_builtin(k) then | ||
has_extra_specs = true | has_extra_specs = true | ||
| Line 586: | Line 620: | ||
local function fetch_argument(args, index_or_value) | local function fetch_argument(args, index_or_value) | ||
if type(index_or_value) == "string" then | if type(index_or_value) == "string" then | ||
if index_or_value:sub(-8) == ".default" then | |||
local index_without_default = index_or_value:sub(1, -9) | |||
local arg_obj = fetch_argument(args, index_without_default) | local arg_obj = fetch_argument(args, index_without_default) | ||
if type(arg_obj) ~= "table" then | if type(arg_obj) ~= "table" then | ||
| Line 595: | Line 629: | ||
return arg_obj.default | return arg_obj.default | ||
end | end | ||
if index_or_value: | if index_or_value:match("^%d+$") then | ||
index_or_value = tonumber(index_or_value) | index_or_value = tonumber(index_or_value) | ||
end | end | ||
| Line 670: | Line 704: | ||
if specified, it is used to supply the default for the `sc` property of returned items if not otherwise set (e.g. by | if specified, it is used to supply the default for the `sc` property of returned items if not otherwise set (e.g. by | ||
the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier). | the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier). | ||
* `disallow_custom_separators`: If specified, disallow specifying | * `disallow_custom_separators`: If specified, disallow specifying custom separators (semicolon, underscore, tilde; see | ||
the internal `special_separators` table) as an item value to override the default separator. By default, the previous | |||
separator of each item is considered to be an empty string (for the first item) and otherwise the value of | |||
`default_separator` (normally a comma + space), unless either the preceding item is one of the values listed in | |||
`special_separators`, such as a bare semicolon (which causes the following item's previous separator to be a semicolon | |||
+ space) or an item has an embedded comma in it (which causes ''all'' items other than the first to have their | |||
previous separator be a semicolon + space). The previous separator of each item is set on the item's `separator` | |||
property. Bare semicolons and other separator arguments do not count when indexing items using separate parameters. | |||
For example, the following is correct: | |||
** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}} | ** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}} | ||
If `disallow_custom_separators` is specified, however, the `separator` property is not set and | If `disallow_custom_separators` is specified, however, the `separator` property is not set and separator arguments are | ||
not recognized. | |||
* `default_separator`: Override the default separator (normally {", "}). | |||
* `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and | * `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and | ||
not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term | not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term | ||
| Line 698: | Line 735: | ||
currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item | currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item | ||
signal the end of items and the start of labels. | signal the end of items and the start of labels. | ||
* `splitchar` is a Lua pattern. If specified, each user-specified argument can consist of multiple delimiter-separated | |||
subitems, each of which may be followed by inline modifiers. In this case, each element in the returned list of items | |||
is no longer an object describing an item, but instead an object with a `terms` field, whose value is a list | |||
describing the subitems (whose format is the same as the normal format of an item in the top-level list when | |||
`splitchar` is not specified). Each subitem object will have a `delimiter` field holding the actual delimiter | |||
occurring before the subitem, which is useful in the case where `splitchar` matches multiple possible characters. In | |||
this case, it is possible to specify that a given modifier can only occur after the last subitem and effectively | |||
modifies the whole collection of subitems by setting `overall = true` on the modifier. In this case, the modifier's | |||
value will be stored in the top-level object (the object with the `terms` field specifying the subitems). Likewise, | |||
any modifiers specified in the form of separate parameters will be treated as overall; if you want them to apply to | |||
the subitems, it is your responsibility to set the subitem properties appropriately. Note that splitting on delimiters | |||
will not happen in certain protected sequences (by default comma+whitespace; see below). In addition, the algorithm to | |||
split on delimiters is sensitive to inline modifier syntax and will not be confused by delimiters inside of inline | |||
modifiers or inside of square brackets, which do not trigger splitting (whether or not contained within protected | |||
sequences). Note that when `splitchar` is set, the code always sets `preserve_splitchar` in the call to | |||
`parse_inline_modifiers()`, meaning that the delimiter preceding the subitems is always available on the `delimiter` | |||
key of the corresponding objects. | |||
* `escape_fun` and `unescape_fun` are as in split_escaping() and split_alternating_runs_escaping() in | |||
[[Module:parse utilities]] and control the protected sequences that won't be split when `splitchar` is specified (see | |||
previous item). By default, `escape_comma_whitespace` and `unescape_comma_whitespace` are used, so that | |||
comma+whitespace sequences won't be split. | |||
Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one | Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one | ||
| Line 739: | Line 797: | ||
internal_error("There must be a spec in `data.params` corresponding to `data.termarg`") | internal_error("There must be a spec in `data.params` corresponding to `data.termarg`") | ||
end | end | ||
if not termarg_spec.list then | if termarg_spec == true or not termarg_spec.list then | ||
internal_error("Term spec in `data.params` must have `list` set", termarg_spec) | internal_error("Term spec in `data.params` must have `list` set", termarg_spec) | ||
end | end | ||
if not termarg_spec.allow_holes | if termarg_spec == true or not (termarg_spec.allow_holes or termarg_spec.disallow_holes) then | ||
internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set", termarg_spec) | internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set", termarg_spec) | ||
end | end | ||
export.augment_params_with_modifiers(data.params, data.param_mods) | export.augment_params_with_modifiers(data.params, data.param_mods) | ||
args = | args = process_params(data.raw_args, data.params) | ||
else | else | ||
args = data.processed_args or data.args | args = data.processed_args or data.args | ||
| Line 764: | Line 822: | ||
local term_args = args[data.termarg] | local term_args = args[data.termarg] | ||
-- As a special case, the term args might not have a `maxindex` field because they might have | -- As a special case, the term args might not have a `maxindex` field because they might have | ||
-- been declared with `disallow_holes = true`, so fall back to the actual length of the list. | -- been declared with `disallow_holes = true`, so fall back to the actual length of the list | ||
local maxmaxindex = term_args.maxindex or | -- using the length function, since # can be unpredictable with arbitrary tables. | ||
for | local maxmaxindex = term_args.maxindex or length(term_args) | ||
for _, v in pairs(args) do | |||
if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then | if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then | ||
maxmaxindex = v.maxindex | maxmaxindex = v.maxindex | ||
| Line 773: | Line 832: | ||
local items = {} | local items, lang_cache, use_semicolon = {}, {} | ||
local lang = fetch_argument(args, data.lang) | local lang = fetch_argument(args, data.lang) | ||
if lang then | if lang then | ||
lang_cache[lang:getCode()] = lang | lang_cache[lang:getCode()] = lang | ||
end | end | ||
local | local sc = fetch_argument(args, data.sc) | ||
local term_dest = data.term_dest or "term" | local term_dest = data.term_dest or "term" | ||
-- FIXME: this is vulnerable to abusive inputs like 1000000=. | |||
local itemno = 0 | local itemno = 0 | ||
for i = 1, maxmaxindex do | for i = 1, maxmaxindex do | ||
| Line 818: | Line 875: | ||
-- If any of the params used for formatting this term is present, create a term and add it to the list. | -- If any of the params used for formatting this term is present, create a term and add it to the list. | ||
if not data.dont_skip_items and not any_param_at_index then | if not data.dont_skip_items and not any_param_at_index then | ||
else | else | ||
local termobj = { | local termobj = { | ||
itemno = itemno, | itemno = itemno, | ||
| Line 840: | Line 893: | ||
end | end | ||
local function | -- Add 1 because first term index starts at 2. | ||
local paramname = data.termarg + i - 1 | |||
local function generate_subobj(termobj, term, parse_err) | |||
if data.parse_lang_prefix and term:find(":") then | if data.parse_lang_prefix and term:find(":") then | ||
local actual_term, termlangs = | local actual_term, termlangs = parse_term_with_lang { | ||
term = term, | term = term, | ||
parse_err = parse_err, | parse_err = parse_err, | ||
| Line 868: | Line 924: | ||
end | end | ||
local function generate_obj(term, parse_err) | |||
return generate_subobj(data.splitchar and {} or termobj, term, parse_err) | |||
if term | end | ||
if term then | |||
paramname = | parse_inline_modifiers(term, { | ||
paramname = paramname, | |||
param_mods = data.param_mods, | param_mods = data.param_mods, | ||
generate_obj = generate_obj, | generate_obj = generate_obj, | ||
splitchar = data.splitchar, | |||
preserve_splitchar = true, | |||
escape_fun = data.escape_fun, | |||
unescape_fun = data.unescape_fun, | |||
outer_container = data.splitchar and termobj or nil, | |||
}) | }) | ||
end | end | ||
local function postprocess_termobj(termobj) | |||
-- | -- Set these after parsing inline modifiers, not in generate_obj(), otherwise we'll get an error in | ||
-- | -- parse_inline_modifiers() if we try to use <lang:...> or <sc:...> as inline modifiers. | ||
termobj.lang = termobj.lang or lang | |||
if not use_semicolon | termobj.sc = termobj.sc or sc | ||
if | |||
if not data.disallow_custom_separators and not use_semicolon then | |||
if data.splitchar and termobj.delimiter == "," then | |||
use_semicolon = true | use_semicolon = true | ||
else | |||
-- If the displayed term (from .term/etc. or .alt) has an embedded comma, use a semicolon to | |||
-- join the terms. | |||
local term_text = termobj[term_dest] or termobj.alt | |||
if term_text and term_text:find(",") then | |||
use_semicolon = true | |||
end | |||
end | end | ||
end | end | ||
end | |||
if data.splitchar then | |||
for _, subobj in ipairs(termobj.terms) do | |||
postprocess_termobj(subobj) | |||
end | |||
else | |||
postprocess_termobj(termobj) | |||
end | end | ||
| Line 902: | Line 975: | ||
if not data.disallow_custom_separators then | if not data.disallow_custom_separators then | ||
-- Set the default separator of all those items for which a separator wasn't explicitly given to | -- Set the default separator of all those items for which a separator wasn't explicitly given to the default | ||
-- | -- separator, defaulting to comma + space; but if any items have embedded commas, set the separator to | ||
for | -- semicolon + space. | ||
for _, item in ipairs(items) do | |||
if not item.separator then | if not item.separator then | ||
item.separator = use_semicolon and "; " or ", " | item.separator = use_semicolon and "; " or data.default_separator or ", " | ||
end | end | ||
end | end | ||