Module:parameter utilities: Difference between revisions

no edit summary
No edit summary
No edit summary
Line 1: Line 1:
local export = {}
local export = {}


local functions_module = "Module:fun"
local parameters_module = "Module:parameters"
local parameters_module = "Module:parameters"
local parse_interface_module = "Module:parse interface"
local parse_interface_module = "Module:parse interface"
Line 7: Line 8:


local dump = mw.dumpObject
local dump = mw.dumpObject
local error = error
local insert = table.insert
local ipairs = ipairs
local next = next
local pairs = pairs
local require = require
local tonumber = tonumber
local type = type


--[==[
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures
modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no
overhead after the first call, since the target functions are called directly in any subsequent calls.
]==]


local function length(...)
local function is_callable(...)
length = require(table_module).length
is_callable = require(functions_module).is_callable
return length(...)
return is_callable(...)
end
end


Line 39: Line 51:
shallow_copy = require(table_module).shallowCopy
shallow_copy = require(table_module).shallowCopy
return shallow_copy(...)
return shallow_copy(...)
end
local function table_len(...)
table_len = require(table_module).length
return table_len(...)
end
end


Line 57: Line 74:
["_"] = " ",
["_"] = " ",
["~"] = " ~ ",
["~"] = " ~ ",
["→"] = " → ",
}
}


--[==[ intro:
--[==[ intro:
The purpose of this module is to facilitate implementation of a template that takes a list of items with associated
The purpose of this module is to facilitate implementation of templates that can have arguments specified either through
properties, which can be specified either through separate parameters (e.g. {{para|t2}}, {{para|pos3}}) or inline
inline modifiers or separate parameters. There are two types of templates supported: those that take a list of items
modifiers (`<t:...>`, `<pos:...>`, etc.). Some examples of templates that work this way are {{tl|alter}}/{{tl|alt}};
with associated properties, which can be specified either through indexed separate parameters (e.g. {{para|t2}},
{{tl|synonyms}}/{{tl|syn}}, {{tl|antonyms}}/{{tl|ant}}, and other "nyms" templates; {{tl|col}}, {{tl|col2}},
{{para|pos3}}) or inline modifiers (`<t:...>`, `<pos:...>`, etc.); and those that take a single term, whose properties
{{tl|col3}}, {{tl|col4}} and other columns templates; {{tl|descendant}}/{{tl|desc}}; {{tl|affix}}/{{tl|af}},
can be specified through non-indexed separate parameters (e.g. {{para|t}} or {{para|pos}}) or inline modifiers. Both
{{tl|prefix}}/{{tl|pre}} and related *fix templates; {{tl|affixusex}}/{{tl|afex}} and related templates; {{tl|IPA}};
types of templates can optionally have subitems in the term parameter(s), where the subitems are typically (but not
{{tl|homophones}}; {{tl|rhymes}}; and several others. This module can be thought of as a combination of
necessarily) separated with commas and each subitem can have its own inline modifiers.
[[Module:parameters]] (which parses template parameters, and in particular handles the separate parameter versions of
 
the properties) and `parse_inline_modifiers()` in [[Module:parse utilities]] (which parses inline modifiers).
Some examples of templates that take a list of items are {{tl|alter}}/{{tl|alt}}; {{tl|synonyms}}/{{tl|syn}},
{{tl|antonyms}}/{{tl|ant}}, and other "nyms" templates; {{tl|col}}, {{tl|col2}}, {{tl|col3}}, {{tl|col4}} and other
column templates; {{tl|descendant}}/{{tl|desc}}; {{tl|affix}}/{{tl|af}}, {{tl|prefix}}/{{tl|pre}} and related *fix
templates; {{tl|affixusex}}/{{tl|afex}} and related templates; {{tl|IPA}}; {{tl|homophones}}; {{tl|rhymes}}; and several
others.
 
Examples of templates that take a single item are form-of templates ({{tl|inflection of}}/{{tl|infl of}},
{{tl|form of}}, and specific templates such as {{tl|alt form}}/{{tl|alternative form of}},
{{tl|abbr of}}/{{tl|abbreviation of}}, {{tl|clipping of}}, and many others); for etymology templates
({{tl|bor}}/{{tl|borrowed}}, {{tl|der}}/{{tl|derived}}, etc. as well as `misc_variant` templates like {{tl|ellipsis}},
{{tl|abbrev}}, {{tl|clipping}}, {{tl|reduplication}} and the like); and other templates that take an argument structure
similar to {{tl|l}} or {{tl|m}}.
 
This module can be thought of as a combination of [[Module:parameters]] (which parses template parameters, and in
particular handles the separate parameter versions of the properties) and `parse_inline_modifiers()` in
[[Module:parse utilities]] (which parses inline modifiers).


The main entry point is `process_list_arguments()`, which takes an object specifying various properties and returns a
The two main entry points are `parse_list_with_inline_modifiers_and_separate_params()` (for templates that take a list
list of objects, one per item specified by the user, where the individual objects are much like the objects returned by
of items) and `parse_term_with_inline_modifiers_and_separate_params()` (for templates that take a single item). However,
`parse_inline_modifiers()`. However, there are other functions provided, in particular to initialize the `param_mods`
there are other functions provided, e.g. to initialize the `param_mods` structure that is passed to the two entry
structured that is passed to `process_list_arguments()`.
points.


The typical workflow for using this module looks as follows (a slightly simplified version of the code in
The typical workflow for using `parse_list_with_inline_modifiers_and_separate_params()` looks as follows (a slightly
[[Module:nyms]]):
simplified version of the code in [[Module:nyms]]):
{
{
local export = {}
local export = {}
Line 92: Line 125:
-- omit terms, typically by specifying the term using |altN= or <alt:...> so that they remain unlinked) or
-- omit terms, typically by specifying the term using |altN= or <alt:...> so that they remain unlinked) or
-- `disallow_holes = true` (if omitting terms is not allowed). (If neither `allow_holes` nor `disallow_holes` is
-- `disallow_holes = true` (if omitting terms is not allowed). (If neither `allow_holes` nor `disallow_holes` is
-- specified, an error is thrown in process_list_arguments().)
-- specified, an error is thrown in parse_list_with_inline_modifiers_and_separate_params().)
local params = {
local params = {
[1] = {required = true, type = "language", default = "und"},
[1] = {required = true, type = "language", default = "und"},
Line 98: Line 131:
}
}


    local m_param_utils = require(parameter_utilities_module)
local m_param_utils = require(parameter_utilities_module)


-- This constructs the `param_mods` structure by adding well-known groups of parameters (such as all the parameters
-- This constructs the `param_mods` structure by adding well-known groups of parameters (such as all the parameters
Line 120: Line 153:
-- This processes the raw arguments in `parent_args`, parses inline modifiers and creates corresponding objects
-- This processes the raw arguments in `parent_args`, parses inline modifiers and creates corresponding objects
-- containing the property values specified either through inline modifiers or separate parameters.
-- containing the property values specified either through inline modifiers or separate parameters.
local items, args = m_param_utils.process_list_arguments {
local items, args = m_param_utils.parse_list_with_inline_modifiers_and_separate_params {
params = params,
params = params,
param_mods = param_mods,
param_mods = param_mods,
Line 567: Line 600:
if overall_only then
if overall_only then
for param_mod, param_mod_spec in pairs(param_mods) do
for param_mod, param_mod_spec in pairs(param_mods) do
if param_mod_spec.separate_no_index then
if overall_only == "always" or param_mod_spec.separate_no_index then
local param_spec = {}
local param_spec = {}
for k, v in pairs(param_mod_spec) do
for k, v in pairs(param_mod_spec) do
if k ~= "separate_no_index" and not param_mod_spec_key_is_builtin(k) then
if k ~= "separate_no_index" and k ~= "require_index" and not param_mod_spec_key_is_builtin(k) then
param_spec[k] = v
param_spec[k] = v
end
end
Line 578: Line 611:
end
end
else
else
local list_with_holes = { list = true, allow_holes = true }
local list_with_holes
-- Add parameters for each term modifier.
-- Add parameters for each term modifier.
for param_mod, param_mod_spec in pairs(param_mods) do
for param_mod, param_mod_spec in pairs(param_mods) do
local has_extra_specs = false
local param_spec
for k in pairs(param_mod_spec) do
for k, v in pairs(param_mod_spec) do
if not param_mod_spec_key_is_builtin(k) then
if not param_mod_spec_key_is_builtin(k) then
has_extra_specs = true
if param_spec == nil then
break
param_spec = {list = true}
end
param_spec[k] = v
end
end
end
end
if not has_extra_specs then
if param_spec == nil then
params[param_mod] = list_with_holes
if list_with_holes == nil then
else
list_with_holes = {list = true, allow_holes = true}
local param_spec = mw.clone(list_with_holes)
for k, v in pairs(param_mod_spec) do
if not param_mod_spec_key_is_builtin(k) then
param_spec[k] = v
end
end
end
params[param_mod] = param_spec
param_spec = list_with_holes
elseif param_spec.alias_of == nil then
param_spec.allow_holes = true
end
end
params[param_mod] = param_spec
end
end
end
end
Line 607: Line 640:
Note that `lang` and `sc` are considered properties of the item, although `lang` is set when there's a language
Note that `lang` and `sc` are considered properties of the item, although `lang` is set when there's a language
prefix and both `lang` and `sc` may be set from default values specified in the `data` structure passed into
prefix and both `lang` and `sc` may be set from default values specified in the `data` structure passed into
`process_list_arguments()`. If you don't want these treated as property keys, you need to check for them yourself.
`parse_list_with_inline_modifiers_and_separate_params()` and `parse_term_with_inline_modifiers_and_separate_params()`.
If you don't want these treated as property keys, you need to check for them yourself.
]==]
]==]
function export.item_key_is_property(k)
function export.item_key_is_property(k)
Line 616: Line 650:
-- Fetch the argument in `args` corresponding to `index_or_value`, which may be a string of the form "foo.default"
-- Fetch the argument in `args` corresponding to `index_or_value`, which may be a string of the form "foo.default"
-- (requesting the value of `args["foo"].default`); a string or number (requesting the value at that key); a function of
-- (requesting the value of `args["foo"].default`); a string or number (requesting the value at that key); a function of
-- one argument (`args`), which returns the argument value; or the value itself.
-- one argument (`args`), which returns the argument value; or the value itself. Return the resulting value and the
-- parameter in `args` that the value came from, or nil if unknown (i.e. a function or direct value was specified).
local function fetch_argument(args, index_or_value)
local function fetch_argument(args, index_or_value)
if type(index_or_value) == "string" then
if not index_or_value then
return index_or_value, nil
end
local index_or_value_type = type(index_or_value)
if index_or_value_type == "string" then
if index_or_value:sub(-8) == ".default" then
if index_or_value:sub(-8) == ".default" then
local index_without_default = index_or_value:sub(1, -9)
local index_without_default = index_or_value:sub(1, -9)
Line 626: Line 665:
format(index_without_default), arg_obj)
format(index_without_default), arg_obj)
end
end
return arg_obj.default
return arg_obj.default, index_without_default
end
end
if index_or_value:match("^%d+$") then
if index_or_value:match("^%d+$") then
index_or_value = tonumber(index_or_value)
index_or_value = tonumber(index_or_value)
end
end
return args[index_or_value]
return args[index_or_value], index_or_value
elseif type(index_or_value) == "number" then
elseif index_or_value_type == "number" then
return args[index_or_value]
return args[index_or_value], index_or_value
elseif type(index_or_value) == "function" then
elseif is_callable(index_or_value) then
return index_or_value(args)
return index_or_value(args), nil
end
return index_or_value, nil
end
 
function export.generate_obj_maybe_parsing_lang_prefix(data)
local term = data.term
local term_dest = data.term_dest or "term"
local termobj = data.termobj or {}
if data.parse_lang_prefix and term:find(":", nil, true) then
local actual_term, termlangs = parse_term_with_lang {
term = term,
parse_err = data.parse_err,
paramname = data.paramname,
allow_bad = data.allow_bad_lang_prefix,
allow_multiple = data.allow_multiple_lang_prefixes,
lang_cache = data.lang_cache,
}
termobj[term_dest] = actual_term ~= "" and actual_term or nil
if termlangs then
-- If we couldn't parse a language code, don't overwrite an existing setting in `lang`
-- that may have originated from a separate |langN= param.
if data.allow_multiple_lang_prefixes then
termobj.termlangs = termlangs
termobj.lang = termlangs and termlangs[1] or nil
else
termobj.termlang = termlangs
termobj.lang = termlangs
end
end
else
else
return index_or_value
termobj[term_dest] = term ~= "" and term or nil
end
return termobj
end
 
-- Subfunction of parse_list_with_inline_modifiers_and_separate_params() and
-- parse_term_with_inline_modifiers_and_separate_params(), validating certain argument-related fields that are shared
-- among the two functions.
local function validate_argument_related_fields(data)
if not data.termarg then
internal_error("`data.termarg` must be given, indicating which argument contains the terms to be parsed", data)
end
if not data.param_mods then
internal_error("`data.param_mods` must be given, indicating the allowed inline modifiers and separate " ..
"parameters to copy", data)
end
local subitem_param_handling = data.subitem_param_handling or "only"
if subitem_param_handling ~= "only" and subitem_param_handling ~= "first" and subitem_param_handling ~= "last" then
internal_error("Unrecognized value for `data.subitem_param_handling`, should be 'first', 'last' or 'only'",
subitem_param_handling)
end
if data.raw_args then
if data.processed_args then
internal_error("Only one of `data.raw_args` and `data.processed_args` can be specified", data)
end
if not data.params then
internal_error("When `data.raw_args` is specified, so must `data.params`, so that the raw arguments " ..
"can be parsed", data)
end
if data.params[data.termarg] == nil then
internal_error("There must be a spec in `data.params` corresponding to `data.termarg`", data)
end
else
if not data.processed_args then
internal_error("Either `data.raw_args` or `data.processed_args` must be specified", data)
end
if data.params then
internal_error("When `data.processed_args` is specified, `data.params` should not be specified", data)
end
end
end
 
local function argval_missing(val)
return val == nil or type(val) == "table" and next(val) == nil
end
 
-- Subfunction of parse_list_with_inline_modifiers_and_separate_params() and
-- parse_term_with_inline_modifiers_and_separate_params(). After parsing inline modifiers, copy the separate parameters
-- to the generated object (or to the appropriate subobject if there are multiple). `data` contains the following
-- fields:
--
-- `args`: The separate-parameter argument structure.
-- `param_mods`: The structure describing the inline modifiers.
-- `itemno`: The logical item number of the term being processed, or nil if there's only a single term.
-- `termobj`: The object to store the inline modifiers into. If there are subitems, they are in the `terms` field;
--    otherwise the properties are stored directly into `termobj`.
-- `has_subitems`: True if there are subitems.
-- `lang`: Language object to store into all items.
-- `sc`: Script object to store into all items, or nil.
-- `subitem_param_handling`: "only", "first" or "last", indicating what to do if there are multiple subitems.
-- `allow_conflicting_inline_mods_and_separate_params`: If true, specifying a value for both an inline modifier and
--    corresponding separate parameter is allowed, and the inline modifier takes precedence. Otherwise, an error
--    occurs.
-- `postprocess_termobj`: Optional function called on all items at the end, to do any postprocessing. Called with one
--    argument, the object to postprocess.
local function copy_separate_params_to_termobj_and_postprocess(data)
local args, param_mods, itemno, termobj = data.args, data.param_mods, data.itemno, data.termobj
 
local function set_lang_and_sc(termobj)
-- Set these after parsing inline modifiers, not in generate_obj(), otherwise we'll get an error in
-- parse_inline_modifiers() if we try to use <lang:...> or <sc:...> as inline modifiers.
termobj.lang = termobj.lang or data.lang
termobj.sc = termobj.sc or data.sc
end
 
local function fetch_separate_param(args, paramkey, itemno)
local argval = args[paramkey]
-- Careful with argument values that may be `false`.
if argval and itemno then
argval = argval[itemno]
end
return argval
end
 
-- Copy separate parameters to a given object.
local function copy_separate_params_to_termobj(fetch_destobj)
for param_mod, param_mod_spec in pairs(param_mods) do
local dest = param_mod_spec.item_dest or param_mod
-- Don't do anything with the `sc` param, which will get overwritten below; we don't
-- want it to cause an error if there are multiple subitems.
if dest ~= "sc" then
local argval = fetch_separate_param(args, param_mod, itemno)
if not argval_missing(argval) then
local destobj = fetch_destobj(param_mod, dest)
-- Don't overwrite a value already set by an inline modifier.
if argval_missing(destobj[dest]) then
destobj[dest] = argval
elseif not data.allow_conflicting_inline_mods_and_separate_params then
error(("Can't specify a value for separate parameter %s%s= because there is " ..
"already an inline modifier <%s:...> specifying a value for the term"):format(
param_mod, itemno or "", param_mod))
end
end
end
end
end
 
if data.has_subitems then
-- If there are any separate indexed parameters, we need to copy them to the first, last or only
-- subitem, depending on the value of `data.subitem_param_handling` (which defaults to 'only',
-- meaning it's an error if there are multiple subitems). Do this before calling
-- postprocess_termobj() because the latter sets .lang and .sc and we want the user to be able to
-- set separate langN= and scN= parameters.
 
-- If there was no term, `termobj.terms` will not exist; make it exist to make the callers' lives easier.
if not termobj.terms then
termobj.terms = {}
end
-- Compute whether any of the separate indexed params exist for this index.
local any_param_at_index
for param_mod in pairs(param_mods) do
local argval = fetch_separate_param(args, param_mod, itemno)
if not argval_missing(argval) then
any_param_at_index = true
break
end
end
 
-- If there was no term, but there's a separate parameter, we need to create an empty subitem.
if any_param_at_index and not termobj.terms[1] then
termobj.terms[1] = {}
end
local function fetch_destobj(param_mod, dest)
if data.subitem_param_handling == "only" and termobj.terms[2] then
error(("Can't specify a value for separate parameter %s%s= because there are " ..
"multiple subitems (%s) in the term; use an inline modifier"):format(
param_mod, itemno or "", #termobj.terms))
end
local termind
-- q/a/l need to go at the beginning and qq/aa/ll/refs at the end, regardless; otherwise, respect
-- `data.subitem_param_handling`.
if dest == "q" or dest == "a" or dest == "l" then
termind = 1
elseif dest == "qq" or dest == "aa" or dest == "ll" or dest == "refs" then
termind = #termobj.terms
elseif data.subitem_param_handling == "only" or data.subitem_param_handling == "first" then
termind = 1
else
termind = #termobj.terms
end
return termobj.terms[termind]
end
 
copy_separate_params_to_termobj(fetch_destobj)
for _, subitem in ipairs(termobj.terms) do
set_lang_and_sc(subitem)
if data.postprocess_termobj then
data.postprocess_termobj(subitem, data)
end
end
else
-- Copy all the parsed term-specific parameters into `termobj`.
copy_separate_params_to_termobj(function(param_mod, dest) return termobj end)
set_lang_and_sc(termobj)
if data.postprocess_termobj then
data.postprocess_termobj(termobj, data)
end
end
end
 
local function postprocess_termobj(item, data)
if not (data.disallow_custom_separators or data.use_semicolon) then
if data.has_subitems and item.delimiter == "," then
data.use_semicolon = true
else
-- If the displayed term (from .term/etc. or .alt) has an embedded comma, use a semicolon to
-- join the terms.
local term_text = item[data.term_dest] or item.alt
if term_text and term_text:find(",", nil, true) then
data.use_semicolon = true
end
end
end
end
end
end


--[==[
--[==[
Parse inline modifiers and create corresponding item objects containing the property values specified either through
Parse a list of terms, each of which may have properties specified using inline modifiers or separate parameters. This
inline modifiers or separate parameters. `data` is an object containing the following properties:
function is intended for parsing the arguments of templates like {{tl|syn}}, {{tl|ant}} and related ''*nym'' templates;
alternative-form templates {{tl|alt}}/{{tl|alter}}; affix templates like {{tl|af}}/{{tl|affix}},
{{tl|com}}/{{tl|compound}}, etc.; affix usex templates like {{tl|afex}}/{{tl|affixusex}}; name templates like
{{tl|name translit}}; column templates like {{tl|col}}; pronunciation templates like {{tl|rhyme}}/{{tl|rhymes}} and
{{tl|hmp}}/{{tl|homophones}}; etc. In these templates there are one or more terms specified using numeric parameters, and
associated separate parameters specifying per-term properties such as {{para|t1}}, {{para|t2}}, {{para|t3}}, ... for the
gloss of the first, second, third, ... term respectively. All such properties can also be specified through inline
modifiers attached directly to each term (`<t:...>`, `<pos:...>`, etc.). Normally it is an error if both an inline
modifier and separate parameter for the same value are given, but this can be overridden (in which case inline modifiers
take precedence over separate parameters when both occur).
 
For an example of a typical workflow involving this function, see the comment at the top of this file.
 
Some notable properties of this function:
# Processing of the raw frame parent args using `process()` in [[Module:parameters]] can occur either inside of this
  function (the usual workflow) or outside of this function (for more complex cases). In the former case the raw parent
  args are passed in along with a partially built `params` structure of the sort required by [[Module:parameters]],
  containing only the term list itself along with any other parameters that are '''not''' term properties (such as
  a language code in {{para|1}} and boolean flags like {{para|nocat}}, {{para|nocap}}, etc.). This structure is
  ''augmented'' with list parameters, one for each per-term property, and [[Module:parameters]] is invoked. In the
  latter case where raw argument processing is done by the caller, they must build the partial `params` structure;
  augment it themselves using `augment_params_with_modifiers()`; call [[Module:parameters]] themselves; and pass in the
  processed arguments. In both cases, the return value of this function contains two values, a list of objects, one per
  term, specifying the term and all properties; and the processed arguments structure, so that the non-term-property
  arguments can be processed as appropriate.
# Optionally, each term can consist of a number of ''subitems'' separated by delimiters (usually a comma, but the
  possible delimiter or delimiters are controllable). Each subitem can have its own inline modifiers. This functionality
  is used, for example, by {{tl|col}} and variants, which allow each row to have comma-separated or tilde-separated
  subitems. When this feature is invoked, the format of the per-term object changes; instead of directly being an object
  describing the term and its properties, it is an object with a `terms` field containing a list of per-subitem objects
  along with other top-level fields describing per-term properties. By default, if there are separate parameters
  specified along with multiple subitems, an error occurs, but this is controllable; currently, you can request that the
  parameters be assigned to the first or last subitem.
# By default, special ''separator'' arguments may be present, mixed in among regular term arguments. Examples of such
  separator arguments are (by default; this can be overridden) a bare semicolon, specifying that the terms on either
  side should be separated by a semicolon instead of a comma (indicating a higher-level grouping); a bare tilde,
  replacing the comma separator with a tilde (indicating that the terms on either side are alternants); and a bare
  underscore, replacing the comma separator with a space. Separator arguments are ignored when numbering the separate
  parameters. You disable the separator argument handling entirely if it doesn't make sense to have this (e.g. in
  {{tl|af}}/{{tl|affix}}, where the separator is always a {{cd|+}} sign).
 
`data` is an object containing several possible fields.
 
1. Fields that are required or recommended (usually related to argument processing):
* `raw_args` ('''required''' unless `processed_args` is specified): The raw arguments, normally fetched from
* `raw_args` ('''required''' unless `processed_args` is specified): The raw arguments, normally fetched from
   {frame:getParent().args}. They are parsed using `process()` in [[Module:parameters]].
   {frame:getParent().args}. They are parsed using `process()` in [[Module:parameters]]. Most callers pass in raw
  arguments.
* `processed_args`: The object of parsed arguments returned by `process()` in [[Module:parameters]]. One (but not both)
* `processed_args`: The object of parsed arguments returned by `process()` in [[Module:parameters]]. One (but not both)
   of `raw_args` and `processed_args` must be set.
   of `raw_args` and `processed_args` must be set.
Line 664: Line 958:
   missing items with corresponding properties as well as missing items without corresponding properties (which are
   missing items with corresponding properties as well as missing items without corresponding properties (which are
   skipped entirely). To find out the exact properties tracked and the name of the tracking pages, read the code.
   skipped entirely). To find out the exact properties tracked and the name of the tracking pages, read the code.
* `lang` ('''recommended'''): The language object for the language of the items, or the name of the argument to fetch
  the object from. It is not strictly necessary to specify this, as this function only initializes items based on inline
  modifiers and separate arguments and doesn't actually format the resulting items. However, if specified, it is used
  for certain purposes:
  *# It specifies the default for the `lang` property of returned objects if not otherwise set (e.g. by a language
prefix).
  *# It is used to initialize an internal cache for speeding up language-code parsing (primarily useful if the same
language code may appear in several items, such as with {{tl|col}} and related templates).
  The value of `lang` can be any of the following:
  * If a string of the form "foo.default", it is assumed to be requesting the value of `args["foo"].default`.
  * Otherwise, if a string or number, it is assumed to be requesting the value of `args` at that key. Note that if the
string is in the form of a number (e.g. "3"), it is normalized to a number prior to fetching (this also happens with
a spec like "2.default").
  * Otherwise, if a function, it is assumed to be a function to return the argument value given `args`, which is passed
to the function as its only argument.
  * Otherwise, it is used directly.
* `sc` ('''recommended'''): The script object for the items, or the name of the argument to fetch the object from. The
  possible values and their handling are the same as with `lang`. In general, as with `lang`, it is not strictly
  necessary to specify this. However, if specified, it is used to supply the default for the `sc` property of returned
  items if not otherwise set (e.g. by the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier). The most
  common value is {"sc.default"}.
2. Other argument-related fields:
* `process_args_before_parsing`: An optional function to apply further processing to the processed `args` structure
* `process_args_before_parsing`: An optional function to apply further processing to the processed `args` structure
   returned by [[Module:parameters]], before parsing inline modifiers. This is passed one argument, the processed
   returned by [[Module:parameters]], before parsing inline modifiers. This is passed one argument, the processed
Line 669: Line 986:
* `term_dest`: The field to store the value of the item itself into, after inline modifiers and (if allowed) language
* `term_dest`: The field to store the value of the item itself into, after inline modifiers and (if allowed) language
   prefixes are stripped off. Defaults to {"term"}.
   prefixes are stripped off. Defaults to {"term"}.
* `pre_normalize_modifiers`: As in `parse_inline_modifiers()`.
* `allow_conflicting_inline_mods_and_separate_params`: If specified, don't throw an error if a value is specified for
  a given property using both an inline modifier and separate param; in this case, the inline modifier takes precedence.
3. Fields related to language prefixes:
* `parse_lang_prefix`: If true, allow and parse off a language code prefix attached to items followed by a colon, such
* `parse_lang_prefix`: If true, allow and parse off a language code prefix attached to items followed by a colon, such
   as {la:minūtia} or {grc:[[σκῶρ|σκατός]]}. Etymology-only languages are allowed. Inline modifiers can be attached to
   as {la:minūtia} or {grc:[[σκῶρ|σκατός]]}. Etymology-only languages are allowed. Inline modifiers can be attached to
   such items. The exact syntax allowed is as specified in the `parse_term_with_lang()` function in
   such items. The exact syntax allowed is as specified in the `parse_term_with_lang()` function in
   [[Module:parse utilities]]. If `allow_multiple_lang_prefixes` is given, a comma-separated list of language prefixes
   [[Module:parse utilities]]. If `allow_multiple_lang_prefixes` is given, a {{cd|+}}-sign-separated list of language
   can be attached to an item. The resulting language object is stored into the `termlang` field, and also into the
   prefixes can be attached to an item. The resulting language object is stored into the `termlang` field, and also into
   `lang` field (or in the case of `allow_multiple_lang_prefixes`, the list of language objects is stored into the
   the `lang` field (or in the case of `allow_multiple_lang_prefixes`, the list of language objects is stored into the
   `termlangs` field, and the first specified object is stored in the `lang` field).
   `termlangs` field, and the first specified object is stored in the `lang` field).
* `allow_multiple_lang_prefixes`: If given in conjunction with `parse_lang_prefix`, multiple comma-separated language
* `allow_multiple_lang_prefixes`: If given in conjunction with `parse_lang_prefix`, multiple language code prefixes can
  code prefixes can be given. See `parse_lang_prefix` above.
  be given, separated by a {{cd|+}} sign. See `parse_lang_prefix` above.
* `allow_bad_lang_prefixes`: If given in conjunction with `parse_lang_prefix`, unrecognized language prefixes do not
* `allow_bad_lang_prefix`: If given in conjunction with `parse_lang_prefix`, unrecognized language prefixes do not
   trigger an error, but are simply ignored (and not stripped off the item). Note that, regardless of whether this is
   trigger an error, but are simply ignored (and not stripped off the item). Note that, regardless of whether this is
   given, prefixes before a colon do not trigger an error if they do not have the form of a language prefix or if a space
   given, prefixes before a colon do not trigger an error if they do not have the form of a language prefix or if a space
   follows the colon. It is not recommended that this be given because typos in language prefixes will not trigger an
   follows the colon. It is not recommended that this be given because typos in language prefixes will not trigger an
   error and will tend to remain unfixed.
   error and will tend to remain unfixed.
* `lang`: The language object for the language of the items, or the name of the argument to fetch the object from. In
 
  general it is not necessary to specify this as `process_list_arguments()` only initializes items based on inline
4. Fields related to custom/special separators:
  modifiers and separate arguments and doesn't actually format the resulting items. However, if specified, it is used
  for certain purposes:
  *# It specifies the default for the `lang` property of returned objects if not otherwise set (e.g. by a language
    prefix).
  *# It is used to initialize an internal cache for speeding up language-code parsing (primarily useful if the same
    language code may appear in several items, such as with {{tl|col}} and related templates).
  The value of `lang` can be any of the following:
  * If a string of the form "foo.default", it is assumed to be requesting the value of `args["foo"].default`.
  * Otherwise, if a string or number, it is assumed to be requesting the value of `args` at that key. Note that if the
    string is in the form of a number (e.g. "3"), it is normalized to a number prior to fetching (this also happens with
a spec like "2.default").
  * Otherwise, if a function, it is assumed to be a function to return the argument value given `args`, which is passed
    to the function as its only argument.
  * Otherwise, it is used directly.
* `sc`: The script object for the items, or the name of the argument to fetch the object from. The possible values and
  their handling are the same as with `lang`. In general, as with `lang`,  it is not necessary to specify this. However,
  if specified, it is used to supply the default for the `sc` property of returned items if not otherwise set (e.g. by
  the {{para|sc<var>N</var>}} parameter or `<sc:...>` inline modifier).
* `disallow_custom_separators`: If specified, disallow specifying custom separators (semicolon, underscore, tilde; see
* `disallow_custom_separators`: If specified, disallow specifying custom separators (semicolon, underscore, tilde; see
   the internal `special_separators` table) as an item value to override the default separator. By default, the previous
   the internal `default_special_separators` table, or the `special_separators` field) as an item value to override the
  separator of each item is considered to be an empty string (for the first item) and otherwise the value of
  default separator. By default, the previous separator of each item is considered to be an empty string (for the first
  `default_separator` (normally a comma + space), unless either the preceding item is one of the values listed in
  item) and otherwise the value of the field `default_separator` (normally a comma + space), unless either the preceding
  `special_separators`, such as a bare semicolon (which causes the following item's previous separator to be a semicolon
  item is one of the values listed in `special_separators`, such as a bare semicolon (which causes the following item's
  + space) or an item has an embedded comma in it (which causes ''all'' items other than the first to have their
  previous separator to be a semicolon + space) or an item has an embedded comma in it (which causes ''all'' items other
  previous separator be a semicolon + space). The previous separator of each item is set on the item's `separator`
  than the first to have their previous separator be a semicolon + space). The previous separator of each item is set on
  property. Bare semicolons and other separator arguments do not count when indexing items using separate parameters.
  the item's `separator` property. Bare semicolons and other separator arguments do not count when indexing items using
  separate parameters.
   For example, the following is correct:
   For example, the following is correct:
   ** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}}
   ** {{tl|template|lang|item 1|q1=qualifier 1|;|item 2|q2=qualifier 2}}
Line 716: Line 1,021:
   not recognized.
   not recognized.
* `default_separator`: Override the default separator (normally {", "}).
* `default_separator`: Override the default separator (normally {", "}).
* `special_separators`: Table giving the special/custom separators that can be given, and how they should display. If
  not specified, the default in `default_special_separators` is used. This is a table mapping separator values (such as
  {"~"}) to the corresponding display string (such as {" ~ "}).
5. Fields related to multiple subitems in a given term:
* `splitchar`: A Lua pattern. If specified, each user-specified argument can consist of multiple delimiter-separated
  subitems, each of which may be followed by inline modifiers. In this case, each element in the returned list of items
  is no longer an object describing an item, but instead an object with a `terms` field, whose value is a list
  describing the subitems (whose format is the same as the normal format of an item in the top-level list when
  `splitchar` is not specified). Each subitem object will have a `delimiter` field holding the actual delimiter
  occurring before the subitem, which is useful in the case where `splitchar` matches multiple possible characters. In
  this case, it is possible to specify that a given modifier can only occur after the last subitem and effectively
  modifies the whole collection of subitems by setting `overall = true` on the modifier. In this case, the modifier's
  value will be stored in the top-level object (the object with the `terms` field specifying the subitems). Note that
  splitting on delimiters will not happen in certain protected sequences (by default comma+whitespace; see below). In
  addition, the algorithm to split on delimiters is sensitive to inline modifier syntax and will not be confused by
  delimiters inside of inline modifiers or inside of square brackets, which do not trigger splitting (whether or not
  contained within protected sequences).
* `escape_fun` and `unescape_fun`: As in `split_escaping()` and `split_alternating_runs_escaping()` in
  [[Module:parse utilities]]. They control the protected sequences that won't be split when `splitchar` is specified
  (see previous item). By default, `escape_comma_whitespace` and `unescape_comma_whitespace` are used, so that
  comma+whitespace sequences won't be split.
* `subitem_param_handling`: How to handle separate parameters that are specified in the presence of multiple subitems.
  The possible values are {"only"} (only allow separate parameters if there aren't any subitems, otherwise throw an
  error), {"first"} (store the separate parameters in the first subitem) and {"last"} (store the separate parameters
  in the last subitem). The default is {"only"}. As a special case, an {{para|scN}} separate parameter will be stored
  into all subitems.
6. Other fields:
* `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and
* `dont_skip_items`: Normally, items that are completely unspecified (have no term and no properties) are skipped and
   not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term
   not inserted into the returned list of items. (Such items cannot occur if `disallow_holes = true` is set on the term
Line 725: Line 1,059:
   argument, an object containing the following fields:
   argument, an object containing the following fields:
   ** `term`: The raw term, prior to parsing off language prefixes and inline modifiers (since the processing of
   ** `term`: The raw term, prior to parsing off language prefixes and inline modifiers (since the processing of
    `stop_when` happens before parsing the term).
`stop_when` happens before parsing the term).
   ** `any_param_at_index`: True if any separate property parameters exist for this item.
   ** `any_param_at_index`: True if any separate property parameters exist for this item.
   ** `orig_index`: Same as `orig_index` below.
   ** `orig_index`: Same as `orig_index` below.
   ** `itemno`: Same as `itemno` below.
   ** `itemno`: Same as `itemno` below.
   ** `stored_itemno`: The index where this item will be stored into the returned items table. This may differ from
   ** `stored_itemno`: The index where this item will be stored into the returned items table. This may differ from
    `itemno` due to skipped items (it will never be different if `dont_skip_items` is set).
`itemno` due to skipped items (it will never be different if `dont_skip_items` is set).
   The function should return true to stop processing items and return the ones processed so far (not including the item
   The function should return true to stop processing items and return the ones processed so far (not including the item
   currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item
   currently being processed). This is used, for example, in [[Module:alternative forms]], where an unspecified item
   signal the end of items and the start of labels.
   signal the end of items and the start of labels.
* `splitchar` is a Lua pattern. If specified, each user-specified argument can consist of multiple delimiter-separated
  subitems, each of which may be followed by inline modifiers. In this case, each element in the returned list of items
  is no longer an object describing an item, but instead an object with a `terms` field, whose value is a list
  describing the subitems (whose format is the same as the normal format of an item in the top-level list when
  `splitchar` is not specified). Each subitem object will have a `delimiter` field holding the actual delimiter
  occurring before the subitem, which is useful in the case where `splitchar` matches multiple possible characters. In
  this case, it is possible to specify that a given modifier can only occur after the last subitem and effectively
  modifies the whole collection of subitems by setting `overall = true` on the modifier. In this case, the modifier's
  value will be stored in the top-level object (the object with the `terms` field specifying the subitems). Likewise,
  any modifiers specified in the form of separate parameters will be treated as overall; if you want them to apply to
  the subitems, it is your responsibility to set the subitem properties appropriately. Note that splitting on delimiters
  will not happen in certain protected sequences (by default comma+whitespace; see below). In addition, the algorithm to
  split on delimiters is sensitive to inline modifier syntax and will not be confused by delimiters inside of inline
  modifiers or inside of square brackets, which do not trigger splitting (whether or not contained within protected
  sequences). Note that when `splitchar` is set, the code always sets `preserve_splitchar` in the call to
  `parse_inline_modifiers()`, meaning that the delimiter preceding the subitems is always available on the `delimiter`
  key of the corresponding objects.
* `escape_fun` and `unescape_fun` are as in split_escaping() and split_alternating_runs_escaping() in
  [[Module:parse utilities]] and control the protected sequences that won't be split when `splitchar` is specified (see
  previous item). By default, `escape_comma_whitespace` and `unescape_comma_whitespace` are used, so that
  comma+whitespace sequences won't be split.


Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one
Two values are returned, the list of items and the processed `args` structure. In each returned item, there will be one
field set for each specified property (either through inline modifiers or separate parameters). In addition, the
field set for each specified property (either through inline modifiers or separate parameters). If subitems are not
following fields may be set:
allowed, each item directly has fields set on it for the specified properties. If subitems ''are'' allowed, each item
contains a `terms` field, which is a list of subitem objects, each of which has fields set on it for the specified
properties of that subitem. In addition, the following fields may be set on each item or subitem:
* `term`: The term portion of the item (minus inline modifiers and language prefixes). {nil} if no term was given.
* `term`: The term portion of the item (minus inline modifiers and language prefixes). {nil} if no term was given.
* `orig_index`: The original index into the item in the items table returned by `process()` in [[Module:parameters]].
* `orig_index`: The original index into the item in the items table returned by `process()` in [[Module:parameters]].
Line 764: Line 1,079:
* `itemno`: The logical index of the item. The index of separate parameters corresponds to this index. This may be
* `itemno`: The logical index of the item. The index of separate parameters corresponds to this index. This may be
   different from `orig_index` in the presence of raw semicolons; see above.
   different from `orig_index` in the presence of raw semicolons; see above.
* `separator`: The separator to display before the term. Always set unless `disallow_custom_separators` is given, in
  which case it is not set.
* `termlang`: If there is a language prefix, the corresponding language object is stored here (only if
* `termlang`: If there is a language prefix, the corresponding language object is stored here (only if
   `parse_lang_prefix` is set and `allow_multiple_lang_prefixes` is not set).
   `parse_lang_prefix` is set and `allow_multiple_lang_prefixes` is not set).
Line 775: Line 1,088:
* `sc`: The script object of the item. This is set when either (a) the `sc` property is allowed and specified; (b)
* `sc`: The script object of the item. This is set when either (a) the `sc` property is allowed and specified; (b)
   `sc` isn't otherwise set and the `sc` field of the overall `data` object is set, providing a default value.
   `sc` isn't otherwise set and the `sc` field of the overall `data` object is set, providing a default value.
* `delimiter`: If subitems are allowed, this specifies the delimiter used prior to the given subitem (e.g. {","}).
In addition, regardless of whether subitems are allowed, the top-level item will have a `separator` field set if
`disallow_custom_separators` is not given, specifying the separator to display before the item.
]==]
]==]
function export.process_list_arguments(data)
function export.parse_list_with_inline_modifiers_and_separate_params(data)
local args
validate_argument_related_fields(data)
if not data.termarg then
local raw_args, termarg, param_mods, args = data.raw_args, data.termarg, data.param_mods
internal_error("Required value `data.termarg` not specified")
if raw_args then
end
local params = data.params
if not data.param_mods then
local termarg_spec = params[termarg]
internal_error("Required value `data.param_mods` not specified")
end
if data.raw_args then
-- FIXME, remove support for `data.args` in favor of `data.processed_args`
if data.processed_args or data.args then
internal_error("Only one of `data.raw_args` and `data.processed_args` can be specified")
end
if not data.params then
internal_error("When `data.raw_args` is specified, so must `data.params`, so that the raw arguments can be parsed")
end
local termarg_spec = data.params[data.termarg]
if not termarg_spec then
internal_error("There must be a spec in `data.params` corresponding to `data.termarg`")
end
if termarg_spec == true or not termarg_spec.list then
if termarg_spec == true or not termarg_spec.list then
internal_error("Term spec in `data.params` must have `list` set", termarg_spec)
internal_error("Term spec in `data.params` must have `list` set", termarg_spec)
end
end
if termarg_spec == true or not (termarg_spec.allow_holes or termarg_spec.disallow_holes) then
if termarg_spec == true or not (termarg_spec.allow_holes or termarg_spec.disallow_holes) then
internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set", termarg_spec)
internal_error("Term spec in `data.params` must have either `allow_holes` or `disallow_holes` set",
termarg_spec)
end
end
export.augment_params_with_modifiers(data.params, data.param_mods)
export.augment_params_with_modifiers(params, param_mods)
args = process_params(data.raw_args, data.params)
args = process_params(raw_args, params)
else
else
args = data.processed_args or data.args
args = data.processed_args
if not args then
internal_error("Either `data.raw_args` or `data.processed_args` must be specified")
end
if data.params then
internal_error("When `data.processed_args` is specified, `data.params` should not be specified")
end
end
end


if data.process_args_before_parsing then
local process_args_before_parsing = data.process_args_before_parsing
data.process_args_before_parsing(args)
if process_args_before_parsing then
process_args_before_parsing(args)
end
end


-- Find the maximum index among any of the list parameters.
-- Find the maximum index among any of the list parameters.
local term_args = args[data.termarg]
local term_args = args[termarg]
-- As a special case, the term args might not have a `maxindex` field because they might have
-- As a special case, the term args might not have a `maxindex` field because they might have
-- been declared with `disallow_holes = true`, so fall back to the actual length of the list
-- been declared with `disallow_holes = true`, so fall back to the actual length of the list
-- using the length function, since # can be unpredictable with arbitrary tables.
-- using the table_len function, since # can be unpredictable with arbitrary tables.
local maxmaxindex = term_args.maxindex or length(term_args)
local maxmaxindex = term_args.maxindex or table_len(term_args)
for _, v in pairs(args) do
for _, v in pairs(args) do
if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then
if type(v) == "table" and v.maxindex and v.maxindex > maxmaxindex then
Line 829: Line 1,127:
end
end
end
end


local special_separators = data.special_separators or export.default_special_separators
local special_separators = data.special_separators or export.default_special_separators
local items, lang_cache, use_semicolon = {}, {}
local items, lang_cache, use_semicolon = {}, data.lang_cache or {}
local lang = fetch_argument(args, data.lang)
local lang = fetch_argument(args, data.lang)
if lang then
if lang then
Line 848: Line 1,145:


-- Compute whether any of the separate indexed params exist for this index.
-- Compute whether any of the separate indexed params exist for this index.
local any_param_at_index = term ~= nil
local any_param_at_index
if not any_param_at_index then
for param_mod in pairs(param_mods) do
for k, v in pairs(args) do
local argval = args[param_mod]
-- Look for named list parameters. We check:
-- Careful with argument values that may be `false`.
-- (1) key is a string (excludes the term param, which is a number);
if argval then
-- (2) value is a table, i.e. a list;
argval = argval[itemno]
-- (3) v.maxindex is set (i.e. allow_holes was used);
end
-- (4) the value has an entry at index `itemno` (the current logical index).
if not argval_missing(argval) then
if type(k) == "string" and type(v) == "table" and v.maxindex and v[itemno] then
any_param_at_index = true
any_param_at_index = true
break
break
end
end
end
end
end


if data.stop_when and data.stop_when {
if data.stop_when and data.stop_when{
term = term,
term = term,
any_param_at_index = any_param_at_index,
-- FIXME, we should just pass in `any_param_at_index` directly.
any_param_at_index = term ~= nil or any_param_at_index,
orig_index = i,
orig_index = i,
itemno = itemno,
itemno = itemno,
Line 874: Line 1,170:


-- If any of the params used for formatting this term is present, create a term and add it to the list.
-- If any of the params used for formatting this term is present, create a term and add it to the list.
if not data.dont_skip_items and not any_param_at_index then
if not data.dont_skip_items and term == nil and not any_param_at_index then
else
else
local termobj = {
local termobj = {
Line 882: Line 1,178:
if not data.disallow_custom_separators then
if not data.disallow_custom_separators then
termobj.separator = i == 1 and "" or special_separators[term_args[i - 1]]
termobj.separator = i == 1 and "" or special_separators[term_args[i - 1]]
end
-- Parse all the term-specific parameters and store in `termobj`.
for param_mod, param_mod_spec in pairs(data.param_mods) do
local dest = param_mod_spec.item_dest or param_mod
local arg = args[param_mod] and args[param_mod][itemno]
if arg then
termobj[dest] = arg
end
end
end


-- Add 1 because first term index starts at 2.
-- Add 1 because first term index starts at 2.
local paramname = data.termarg + i - 1
local paramname = termarg + i - 1


local function generate_subobj(termobj, term, parse_err)
if term then
if data.parse_lang_prefix and term:find(":") then
local function generate_obj(term, parse_err)
local actual_term, termlangs = parse_term_with_lang {
return export.generate_obj_maybe_parsing_lang_prefix {
term = term,
term = term,
termobj = data.splitchar and {} or termobj,
term_dest = term_dest,
paramname = paramname,
parse_lang_prefix = data.parse_lang_prefix,
parse_err = parse_err,
parse_err = parse_err,
paramname = paramname,
allow_bad_lang_prefix = data.allow_bad_lang_prefix,
allow_bad = data.allow_bad_lang_prefix,
allow_multiple_lang_prefixes = data.allow_multiple_lang_prefixes,
allow_multiple = data.allow_multiple_lang_prefixes,
lang_cache = lang_cache,
lang_cache = lang_cache,
}
}
termobj[term_dest] = actual_term ~= "" and actual_term or nil
if termlangs then
-- If we couldn't parse a language code, don't overwrite an existing setting in `lang`
-- that may have originated from a separate |langN= param.
if data.allow_multiple_lang_prefixes then
termobj.termlangs = termlangs
termobj.lang = termlangs and termlangs[1] or nil
else
termobj.termlang = termlangs
termobj.lang = termlangs
end
end
else
termobj[term_dest] = term ~= "" and term or nil
end
end
return termobj
end


local function generate_obj(term, parse_err)
return generate_subobj(data.splitchar and {} or termobj, term, parse_err)
end
if term then
parse_inline_modifiers(term, {
parse_inline_modifiers(term, {
paramname = paramname,
paramname = paramname,
param_mods = data.param_mods,
param_mods = param_mods,
generate_obj = generate_obj,
generate_obj = generate_obj,
splitchar = data.splitchar,
splitchar = data.splitchar,
Line 938: Line 1,207:
unescape_fun = data.unescape_fun,
unescape_fun = data.unescape_fun,
outer_container = data.splitchar and termobj or nil,
outer_container = data.splitchar and termobj or nil,
pre_normalize_modifiers = data.pre_normalize_modifiers,
})
})
end
end


local function postprocess_termobj(termobj)
local term_data = {
-- Set these after parsing inline modifiers, not in generate_obj(), otherwise we'll get an error in
args = args,
-- parse_inline_modifiers() if we try to use <lang:...> or <sc:...> as inline modifiers.
param_mods = param_mods,
termobj.lang = termobj.lang or lang
itemno = itemno,
termobj.sc = termobj.sc or sc
termobj = termobj,
term_dest = term_dest,
has_subitems = not not data.splitchar,
lang = lang,
-- As a special case, if the caller defined a scN= separate param, set it on all subitems if there
-- are multiple, falling back to the overall sc= param.
sc = args.sc and args.sc[itemno] or sc,
subitem_param_handling = data.subitem_param_handling,
allow_conflicting_inline_mods_and_separate_params =
data.allow_conflicting_inline_mods_and_separate_params,
postprocess_termobj = postprocess_termobj,
disallow_custom_separators = data.disallow_custom_separators,
use_semicolon = use_semicolon,
}


if not data.disallow_custom_separators and not use_semicolon then
copy_separate_params_to_termobj_and_postprocess(term_data)
if data.splitchar and termobj.delimiter == "," then
use_semicolon = term_data.use_semicolon
use_semicolon = true
else
-- If the displayed term (from .term/etc. or .alt) has an embedded comma, use a semicolon to
-- join the terms.
local term_text = termobj[term_dest] or termobj.alt
if term_text and term_text:find(",") then
use_semicolon = true
end
end
end
end


if data.splitchar then
insert(items, termobj)
for _, subobj in ipairs(termobj.terms) do
postprocess_termobj(subobj)
end
else
postprocess_termobj(termobj)
end
 
table.insert(items, termobj)
end
end
end
end
Line 986: Line 1,250:


return items, args
return items, args
end
--[==[
Parse a single term that may have properties specified through inline modifiers or separate parameters. This differs
from `parse_list_with_inline_modifiers_and_separate_params()` in that the latter is for parsing a list of terms, each of
which may have properties specified through inline modifiers or separate parameters. Both functions optionally support
having multiple subitems in a single term. This function is used e.g. for form-of templates
({{tl|inflection of}}/{{tl|infl of}}, {{tl|form of}}, and specific templates such as
{{tl|alt form}}/{{tl|alternative form of}}, {{tl|abbr of}}/{{tl|abbreviation of}}, {{tl|clipping of}}, and many others);
for etymology templates ({{tl|bor}}/{{tl|borrowed}}, {{tl|der}}/{{tl|derived}}, etc. as well as `misc_variant` templates
like {{tl|ellipsis}}, {{tl|abbrev}}, {{tl|clipping}}, {{tl|reduplication}} and the like); and for other templates with
an argument structure similar to {{tl|l}} or {{tl|m}}. In these templates there is a term specified using a numeric
parameter and associated separate parameters specifying term properties such as {{para|t}} for the gloss or {{para|tr}}
for manual transliteration. All such properties can also be specified through inline modifiers attached directly to each
term (`<t:...>`, `<tr:...>`, etc.). Normally it is an error if both an inline modifier and separate parameter for the
same value are given, but this can be overridden (in which case inline modifiers take precedence over separate
parameters when both occur).
Some notable properties of this function:
# Processing of the raw frame parent args using `process()` in [[Module:parameters]] can occur either inside of this
  function (the usual workflow) or outside of this function (for more complex cases). In the former case the raw parent
  args are passed in along with a partially built `params` structure of the sort required by [[Module:parameters]],
  containing only the term list itself along with any other parameters that are '''not''' term properties (such as
  a language code in {{para|1}} and boolean flags like {{para|nocat}}, {{para|nocap}}, etc.). This structure is
  ''augmented'' with parameters, one for each per-term property, and [[Module:parameters]] is invoked. In the latter
  case where raw argument processing is done by the caller, they must build the partial `params` structure; augment it
  themselves using `augment_params_with_modifiers()`; call [[Module:parameters]] themselves; and pass in the processed
  arguments. In both cases, the return value of this function contains two values, an object specifying the term and all
  properties; and the processed arguments structure, so that the non-term-property arguments can be processed as
  appropriate.
# Optionally, the term can consist of a number of ''subitems'' separated by delimiters (usually a comma, but the
  possible delimiter or delimiters are controllable). Each subitem can have its own inline modifiers. This functionality
  is used, for example, by form-of templates. When this feature is invoked, the format of the term object changes;
  instead of directly being an object describing the term and its properties, it is an object with a `terms` field
  containing a list of per-subitem objects along with other top-level fields describing per-term properties. By default,
  if there are separate parameters specified along with multiple subitems, an error occurs, but this is controllable;
  currently, you can request that the parameters be assigned to the first or last subitem.
`data` is an object containing several possible fields.
1. Fields that are required or recommended (usually related to argument processing):
* `raw_args` ('''required''' unless `processed_args` is specified): The raw arguments, normally fetched from
  {frame:getParent().args}. They are parsed using `process()` in [[Module:parameters]]. Most callers pass in raw
  arguments.
* `processed_args`: The object of parsed arguments returned by `process()` in [[Module:parameters]]. One (but not both)
  of `raw_args` and `processed_args` must be set.
* `param_mods` ('''required'''): A structure describing the possible inline modifiers and their properties. See the
  introductory comment above. Most often, this is generated using `construct_param_mods()` rather than specified
  manually.
* `params` ('''required''' unless `processed_args` is specified): A structure describing the possible parameters,
  '''other than''' the ones that are separate-parameter equivalents of inline modifiers. This is automatically
  "augmented" with the separate-parameter equivalents of the inline modifiers described in `param_mods` prior to parsing
  the raw arguments with [[Module:parameters]]. '''WARNING:''' This structure is destructively modified, both by the
  "augmentation" process of adding separate-parameter equivalents of inline modifiers, and by the processing done by
  [[Module:parameters]] itself. (Nonetheless, substructures can safely be shared in this structure, and will be
  correctly handled.)
* `termarg` ('''required'''): The argument containing the item with attached inline modifiers to be parsed. Usually a
  numeric value such as {1} or {2}.
* `track_module` ('''recommended'''): The name of the calling module, for use in adding tracking pages that are used
  internally to track pages containing template invocations with certain properties.
* `lang` ('''recommended'''): The language object for the language of the item or subitems, or the name of the argument
  to fetch the object from. It is not strictly necessary to specify this, as this function only initializes items based
  on inline modifiers and separate arguments and doesn't actually format the resulting items. However, if specified, it
  is used for certain purposes:
  *# It specifies the default for the `lang` property of returned objects if not otherwise set (e.g. by a language
    prefix).
  *# It is used to initialize an internal cache for speeding up language-code parsing (primarily useful if the same
    language code may appear in several subitems).
  The value of `lang` can be any of the following:
  * If a string or number, it is assumed to be requesting the value of `args` at that key. Note that if the string is in
    the form of a number (e.g. "3"), it is normalized to a number prior to fetching.
  * Otherwise, if a function, it is assumed to be a function to return the argument value given `args`, which is passed
    to the function as its only argument.
  * Otherwise, it is used directly.
* `sc` ('''recommended'''): The script object for the item or subitems, or the name of the argument to fetch the object
  from. The possible values and their handling are the same as with `lang`. In general, as with `lang`, it is not
  strictly necessary to specify this. However, if specified, it is used to supply the default for the `sc` property of
  returned items if not otherwise set (e.g. by the {{para|sc}} parameter or `<sc:...>` inline modifier). The most common
  value is {"sc"}.
* `make_separate_g_into_list`: Set this to {true} if separate gender parameters exist are are specified using
  {{para|g}}, {{para|g2}}, etc. instead of using a single comma-separated {{para|g}} field.
2. Other argument-related fields:
* `adjust_params_before_arg_processing`: An optional function to further adjust the `params` structure prior to
  calling `process()` in [[Module:parameters]]. This should be used when there are mismatches between the format of a
  given property as an inline modifier and the corresponding property as a separate parameter (as with the {{para|g}}
  parameter and {{cd|<g:...>}} modifier, but this particular case is handled by the `make_separate_g_into_list` field).
* `process_args_before_parsing`: An optional function to apply further processing to the processed `args` structure
  returned by [[Module:parameters]], before parsing inline modifiers. This is passed one argument, the processed
  arguments. It should make modifications in-place.
* `term_dest`: The field to store the value of the item itself into, after inline modifiers and (if allowed) language
  prefixes are stripped off. Defaults to {"term"}.
* `pre_normalize_modifiers`: As in `parse_inline_modifiers()`.
* `allow_conflicting_inline_mods_and_separate_params`: If specified, don't throw an error if a value is specified for
  a given property using both an inline modifier and separate param; in this case, the inline modifier takes precedence.
3. Fields related to language prefixes:
* `parse_lang_prefix`: If true, allow and parse off a language code prefix attached to items followed by a colon, such
  as {la:minūtia} or {grc:[[σκῶρ|σκατός]]}. Etymology-only languages are allowed. Inline modifiers can be attached to
  such items. The exact syntax allowed is as specified in the `parse_term_with_lang()` function in
  [[Module:parse utilities]]. If `allow_multiple_lang_prefixes` is given, a {{cd|+}}-sign-separated list of language
  prefixes can be attached to an item. The resulting language object is stored into the `termlang` field, and also into
  the `lang` field (or in the case of `allow_multiple_lang_prefixes`, the list of language objects is stored into the
  `termlangs` field, and the first specified object is stored in the `lang` field).
* `allow_multiple_lang_prefixes`: If given in conjunction with `parse_lang_prefix`, multiple language code prefixes can
  be given, separated by a {{cd|+}} sign. See `parse_lang_prefix` above.
* `allow_bad_lang_prefix`: If given in conjunction with `parse_lang_prefix`, unrecognized language prefixes do not
  trigger an error, but are simply ignored (and not stripped off the item). Note that, regardless of whether this is
  given, prefixes before a colon do not trigger an error if they do not have the form of a language prefix or if a space
  follows the colon. It is not recommended that this be given because typos in language prefixes will not trigger an
  error and will tend to remain unfixed.
4. Fields related to multiple subitems in the term:
* `splitchar`: A Lua pattern. If specified, the user-specified argument can consist of multiple delimiter-separated
  subitems, each of which may be followed by inline modifiers. In this case, the first returned value is no longer an
  object describing the item, but instead an object with a `terms` field, whose value is a list describing the subitems
  (whose format is the same as the normal format of the item when `splitchar` is not specified). Each subitem object
  will have a `delimiter` field holding the actual delimiter occurring before the subitem, which is useful in the case
  where `splitchar` matches multiple possible characters. In this case, it is possible to specify that a given modifier
  can only occur after the last subitem and effectively modifies the whole collection of subitems by setting
  `overall = true` on the modifier. In this case, the modifier's value will be stored in the top-level object (the
  object with the `terms` field specifying the subitems). Note that splitting on delimiters will not happen in certain
  protected sequences (by default comma+whitespace; see below). In addition, the algorithm to split on delimiters is
  sensitive to inline modifier syntax and will not be confused by delimiters inside of inline modifiers or inside of
  square brackets, which do not trigger splitting (whether or not contained within protected sequences).
* `escape_fun` and `unescape_fun`: As in `split_escaping()` and `split_alternating_runs_escaping()` in
  [[Module:parse utilities]]. They control the protected sequences that won't be split when `splitchar` is specified
  (see previous item). By default, `escape_comma_whitespace` and `unescape_comma_whitespace` are used, so that
  comma+whitespace sequences won't be split.
* `subitem_param_handling`: How to handle separate parameters that are specified in the presence of multiple subitems.
  The possible values are {"only"} (only allow separate parameters if there aren't any subitems, otherwise throw an
  error), {"first"} (store the separate parameters in the first subitem) and {"last"} (store the separate parameters
  in the last subitem). The default is {"only"}. As a special case, an {{para|scN}} separate parameter will be stored
  into all subitems.
Two values are returned, an object describing the item (or subitems) and the processed `args` structure. In the returned
item, there will be one field set for each specified property (either through inline modifiers or separate parameters).
If subitems are not allowed, the item directly has fields set on it for the specified properties. If subitems ''are''
allowed, the item contains a `terms` field, which is a list of subitem objects, each of which has fields set on it for
the specified properties of that subitem. In addition, the following fields may be set on the item or each subitem:
* `term`: The term portion of the item (minus inline modifiers and language prefixes). {nil} if no term was given.
* `termlang`: If there is a language prefix, the corresponding language object is stored here (only if
  `parse_lang_prefix` is set and `allow_multiple_lang_prefixes` is not set).
* `termlangs`: If there is are language prefixes and both `parse_lang_prefix` and `allow_multiple_lang_prefixes` are
  set, the list of corresponding language objects is stored here.
* `lang`: The language object of the item. This is set when either (a) there is a language prefix parsed off (if
  multiple prefixes are allowed, this corresponds to the first one); (b) the `lang` property is allowed and specified;
  (c) neither (a) nor (b) apply and the `lang` field of the overall `data` object is set, providing a default value.
* `sc`: The script object of the item. This is set when either (a) the `sc` property is allowed and specified; (b)
  `sc` isn't otherwise set and the `sc` field of the overall `data` object is set, providing a default value.
* `delimiter`: If subitems are allowed, this specifies the delimiter used prior to the given subitem (e.g. {","}).
]==]
function export.parse_term_with_inline_modifiers_and_separate_params(data)
validate_argument_related_fields(data)
local raw_args, termarg, param_mods, args = data.raw_args, data.termarg, data.param_mods
if raw_args then
local params = data.params
local termarg_spec = params[termarg]
if type(termarg_spec) == "table" and termarg_spec.list then
internal_error("Term spec in `data.params` must not have `list` set", termarg_spec)
end
export.augment_params_with_modifiers(params, param_mods, "always")
if data.make_separate_g_into_list then
-- HACK: g= is a list for compatibility, but sublist as an inline parameter.
params.g = {list = true, item_dest = "genders"}
end
local adjust_params_before_arg_processing = data.adjust_params_before_arg_processing
if adjust_params_before_arg_processing then
adjust_params_before_arg_processing(params)
end
args = process_params(raw_args, params)
else
args = data.processed_args
end
local process_args_before_parsing = data.process_args_before_parsing
if process_args_before_parsing then
process_args_before_parsing(args)
end
local term, lang_cache = args[termarg], data.lang_cache
local lang = fetch_argument(args, data.lang)
if lang and lang_cache then
lang_cache[lang:getCode()] = lang
end
local sc = fetch_argument(args, data.sc)
local term_dest = data.term_dest or "term"
local termobj, splitchar = {}, data.splitchar
if term then
local function generate_obj(term, parse_err)
return export.generate_obj_maybe_parsing_lang_prefix {
term = term,
termobj = splitchar and {} or termobj,
term_dest = term_dest,
paramname = termarg,
parse_lang_prefix = data.parse_lang_prefix,
parse_err = parse_err,
allow_bad_lang_prefix = data.allow_bad_lang_prefix,
allow_multiple_lang_prefixes = data.allow_multiple_lang_prefixes,
lang_cache = lang_cache,
}
end
parse_inline_modifiers(term, {
paramname = termarg,
param_mods = param_mods,
generate_obj = generate_obj,
splitchar = splitchar,
preserve_splitchar = true,
escape_fun = data.escape_fun,
unescape_fun = data.unescape_fun,
outer_container = splitchar and termobj or nil,
pre_normalize_modifiers = data.pre_normalize_modifiers,
})
end
copy_separate_params_to_termobj_and_postprocess{
args = args,
param_mods = param_mods,
termobj = termobj,
has_subitems = not not splitchar,
lang = lang,
sc = sc,
subitem_param_handling = data.subitem_param_handling,
allow_conflicting_inline_mods_and_separate_params = data.allow_conflicting_inline_mods_and_separate_params,
}
return termobj, args
end
end




return export
return export