45,647
edits
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
local | local insert = table.insert | ||
local m_form_of = require("Module:form of") | local m_form_of = require("Module:form of") | ||
local | local m_params = require("Module:parameters") | ||
local put_module = "Module:parse utilities" | |||
local rfind = mw.ustring.find | local rfind = mw.ustring.find | ||
local rmatch = mw.ustring.match | local rmatch = mw.ustring.match | ||
local rsplit = mw.text.split | local rsplit = mw.text.split | ||
local rgsplit = mw.text.gsplit | |||
local export = {} | |||
--[==[ intro: | |||
This module contains code that directly implements {{tl|form of}}, {{tl|inflection of}}, and the various other | |||
[[:Category:Form-of templates|form-of templates]]. It is meant to be called directly from templates. See also | |||
[[Module:form of]], which contains the underlying implementing code and is meant to be called from other modules. | |||
]==] | |||
-- Equivalent to list.extend(new_items) in Python. Appends items in `new_items` (a list) to `list`. | |||
local function extend_list(list, new_items) | |||
for _, item in ipairs(new_items) do | |||
insert(list, item) | |||
end | |||
end | |||
local function get_common_template_params() | |||
return { | |||
-- Named params not controlling link display | |||
["cat"] = {list = true}, | |||
["notext"] = {type = "boolean"}, | |||
["sort"] = {}, | |||
["enclitic"] = {}, | |||
-- FIXME! The following should only be available when withcap=1 in invocation args. Before doing that, need to | |||
-- remove all uses of nocap= in other circumstances. | |||
["nocap"] = {type = "boolean"}, | |||
-- FIXME! The following should only be available when withdot=1 in invocation args. Before doing that, need to | |||
-- remove all uses of nodot= in other circumstances. | |||
["nodot"] = {type = "boolean"}, | |||
["pagename"] = {}, -- for testing, etc. | |||
} | |||
end | |||
--[=[ | --[=[ | ||
Line 26: | Line 58: | ||
and the values should be boolean true. | and the values should be boolean true. | ||
]=]-- | ]=]-- | ||
local function process_parent_args(template, parent_args, params, defaults, ignorespecs, tracked_params) | local function process_parent_args(template, parent_args, params, defaults, ignorespecs, tracked_params, function_name) | ||
if #defaults > 0 or #ignorespecs > 0 then | if #defaults > 0 or #ignorespecs > 0 then | ||
local new_parent_args = {} | local new_parent_args = {} | ||
Line 36: | Line 68: | ||
new_parent_args[defparam] = defval | new_parent_args[defparam] = defval | ||
end | end | ||
local params_to_ignore = {} | local params_to_ignore = {} | ||
local numbered_list_params_to_ignore = {} | local numbered_list_params_to_ignore = {} | ||
Line 42: | Line 74: | ||
for _, ignorespec in ipairs(ignorespecs) do | for _, ignorespec in ipairs(ignorespecs) do | ||
for | for ignore in rgsplit(ignorespec, ",") do | ||
local param = rmatch(ignore, "^(.*):list$") | local param = rmatch(ignore, "^(.*):list$") | ||
if param then | if param then | ||
if rfind(param, "^[0-9]+$") then | if rfind(param, "^[0-9]+$") then | ||
insert(numbered_list_params_to_ignore, tonumber(param)) | |||
else | else | ||
insert(named_list_params_to_ignore, | |||
"^" .. require("Module:utilities").pattern_escape(param) .. "[0-9]*$") | "^" .. require("Module:string utilities").pattern_escape(param) .. "[0-9]*$") | ||
end | end | ||
else | else | ||
Line 86: | Line 118: | ||
end | end | ||
local args = | local args = m_params.process(parent_args, params, nil, "form of/templates", function_name) | ||
return args | return args | ||
Line 100: | Line 132: | ||
local inflection_tags = {} | local inflection_tags = {} | ||
for _, tagspec in ipairs(tagspecs) do | for _, tagspec in ipairs(tagspecs) do | ||
for | for tag in rgsplit(tagspec, split_regex) do | ||
insert(inflection_tags, tag) | |||
end | end | ||
end | end | ||
Line 107: | Line 139: | ||
end | end | ||
local term_param_mods = { | |||
t = { | |||
-- [[Module:links]] expects the gloss in the "gloss" key. | |||
item_dest = "gloss", | |||
}, | |||
gloss = {}, | |||
tr = {}, | |||
ts = {}, | |||
g = { | |||
-- [[Module:links]] expects genders in the "genders" key. | |||
item_dest = "genders", | |||
sublist = true, | |||
}, | |||
id = {}, | |||
alt = {}, | |||
q = {}, | |||
qq = {}, | |||
lit = {}, | |||
pos = {}, | |||
sc = { type = "script" }, | |||
} | |||
local function parse_terms_with_inline_modifiers(paramname, val, lang) | |||
local function generate_obj(term) | |||
return {lang = lang, term = term} | |||
end | |||
local retval | |||
-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or | |||
-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of | |||
-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <tr:...>, so if | |||
-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the | |||
-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>. | |||
if val:find("<") and not val:find("^[^<]*<[a-z]*[^a-z:]") then | |||
retval = require(put_module).parse_inline_modifiers(val, { | |||
paramname = paramname, | |||
param_mods = term_param_mods, | |||
generate_obj = generate_obj, | |||
splitchar = ",", | |||
}) | |||
else | |||
if val:find(",<") then | |||
-- this happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an initialism | |||
-- expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas | |||
retval = {val} | |||
elseif val:find(",%s") or (val:find(",") and val:find("[\\%[<]")) then | |||
-- Comma after whitespace not split; nor are backslash-escaped commas or commas inside of square or | |||
-- angle brackets. If we see any of these, use the more sophisticated algorithm in | |||
-- [[Module:parse utilities]]. Otherwise it's safe to just split on commas directly. This optimization avoids | |||
-- loading [[Module:parse utilities]] unnecessarily. | |||
retval = require(put_module).split_on_comma(val) | |||
else | |||
retval = rsplit(val, ",") | |||
end | |||
for i, split in ipairs(retval) do | |||
retval[i] = generate_obj(split) | |||
end | |||
end | |||
return retval | |||
end | |||
local link_params = { "term", "alt", "t", "gloss", "sc", "tr", "ts", "pos", "id", "lit" } | |||
local link_param_set = {} | |||
for _, param in ipairs(link_params) do | |||
link_param_set[param] = true | |||
end | |||
-- Modify PARAMS in-place by adding parameters that control the link to the | -- Modify PARAMS in-place by adding parameters that control the link to the | ||
Line 112: | Line 214: | ||
-- entry itself; TERM_PARAM + 1 will be the display text, and TERM_PARAM + 2 | -- entry itself; TERM_PARAM + 1 will be the display text, and TERM_PARAM + 2 | ||
-- will be the gloss, unless NO_NUMBERED_GLOSS is given. | -- will be the gloss, unless NO_NUMBERED_GLOSS is given. | ||
local function add_link_params(params, term_param, no_numbered_gloss) | local function add_link_params(parent_args, params, term_param, no_numbered_gloss) | ||
-- | for k, v in pairs(parent_args) do | ||
params | if type(k) == "string" then | ||
local base, num = k:match("^([a-z]+)([0-9]+)$") | |||
if base and link_param_set[base] then | |||
error("Support for the separate-parameter style of multiple lemmas in form-of templates is going away; use a comma-separated lemma param with inline modifiers") | |||
end | |||
end | |||
end | |||
-- If no params for the second or higher term exist, use a simpler param setup to save memory. | |||
params[term_param + 1] = {alias_of = "alt"} | params[term_param + 1] = {alias_of = "alt"} | ||
if not no_numbered_gloss then | if not no_numbered_gloss then | ||
params[term_param + 2] = {alias_of = "t"} | params[term_param + 2] = {alias_of = "t"} | ||
end | end | ||
-- Numbered params controlling link display | |||
params[term_param] = {} | |||
-- Named params controlling link display | -- Named params controlling link display | ||
params["gloss"] = {alias_of = "t"} | params["gloss"] = {alias_of = "t"} | ||
params["g"] = {list = true} | params["g"] = {list = true} | ||
params[" | params["sc"] = {type = "script"} | ||
params[" | |||
-- Not "term". | |||
for i = 2, #link_params do | |||
local param = link_params[i] | |||
params[param] = params[param] or {} | |||
end | |||
end | |||
local function add_base_lemma_params(parent_args, iargs, params, compat) | |||
-- Need to do what [[Module:parameters]] does to string arguments from parent_args as we're running this | |||
-- before calling [[Module:parameters]] on parent_args. | |||
local function ine(arg) | |||
if not arg then | |||
return nil | |||
end | |||
arg = mw.text.trim(arg) | |||
return arg ~= "" and arg or nil | |||
end | |||
-- Check the language-specific data for additional base lemma params. But if there's no language-specific data and | |||
-- the language is an etym-only language, fall back to the corresponding full language and check again. | |||
local langcode = ine(parent_args[compat and "lang" or 1]) or iargs["lang"] or "und" | |||
if m_form_of.langs_with_lang_specific_tags[langcode] then | |||
local langdata = mw.loadData(m_form_of.form_of_lang_data_module_prefix .. langcode) | |||
if langdata.base_lemma_params then | |||
for _, param in ipairs(langdata.base_lemma_params) do | |||
params[param.param] = {} | |||
end | |||
return langdata.base_lemma_params | |||
end | |||
else | |||
local lang = require("Module:languages").getByCode(langcode, nil, "allow etym") | |||
if lang and lang:getCode() ~= lang:getFullCode() then | |||
local full_code = lang:getFullCode() | |||
local langdata = mw.loadData(m_form_of.form_of_lang_data_module_prefix .. full_code) | |||
if langdata.base_lemma_params then | |||
for _, param in ipairs(langdata.base_lemma_params) do | |||
params[param.param] = {} | |||
end | |||
return langdata.base_lemma_params | |||
end | |||
end | |||
end | |||
end | end | ||
-- Given processed invocation arguments IARGS and processed parent arguments | --[=[ | ||
Given processed invocation arguments IARGS and processed parent arguments ARGS, as well as TERM_PARAM (the parent | |||
argument specifying the first main entry/lemma) and COMPAT (true if the language code is found in args["lang"] instead | |||
of args[1]), return an object as follows: | |||
{ | |||
lang = LANG, | |||
lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...}, | |||
enclitics = {ENCLITIC_OBJ, ENCLITIC_OBJ, ...}, | |||
base_lemmas = {BASE_LEMMA_OBJ, BASE_LEMMA_OBJ, ...}, | |||
categories = {"CATEGORY", "CATEGORY", ...}, | |||
} | |||
where | |||
local function | |||
local lang = args[compat and "lang" or 1] | * LANG is the language code; | ||
* LEMMAS is a sequence of objects specifying the main entries/lemmas, as passed to full_link in [[Module:links]]; | |||
however, if the invocation argument linktext= is given, it will be a string consisting of that text, and if the | |||
invocation argument nolink= is given, it will be nil; | |||
* ENCLITICS is nil or a sequence of objects specifying the enclitics, as passed to full_link in [[Module:links]]; | |||
* BASE_LEMMA_OBJ is a sequence of objects specifying the base lemma(s), which are used when the lemma is itself a | |||
form of another lemma (the base lemma), e.g. a comparative, superlative or participle; each object is of the form | |||
{ paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...} } where PARAM_OBJ describes the properties of the | |||
base lemma parameter (i.e. the relationship between the intermediate and base lemmas) and LEMMA_OBJ is of the same | |||
format of ENCLITIC_OBJ, i.e. an object suitable to be passed to full_link in [[Module:links]]; PARAM_OBJ is of the | |||
format { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{inflection of}} | |||
etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as | |||
{"comd"} or {"past", "part"}; | |||
* CATEGORIES is the categories to add the page to (consisting of any categories specified in the invocation or | |||
parent args and any tracking categories, but not any additional lang-specific categories that may be added by | |||
{{inflection of}} or similar templates). | |||
This is a subfunction of construct_form_of_text(). | |||
]=] | |||
local function get_lemmas_and_categories(iargs, args, term_param, compat, base_lemma_params) | |||
local lang = args[compat and "lang" or 1] | |||
-- Determine categories for the page, including tracking categories | -- Determine categories for the page, including tracking categories | ||
Line 158: | Line 325: | ||
if not args["nocat"] then | if not args["nocat"] then | ||
for _, cat in ipairs(iargs["cat"]) do | for _, cat in ipairs(iargs["cat"]) do | ||
insert(categories, lang:getFullName() .. " " .. cat) | |||
end | end | ||
end | end | ||
for _, cat in ipairs(args["cat"]) do | for _, cat in ipairs(args["cat"]) do | ||
insert(categories, lang:getFullName() .. " " .. cat) | |||
end | end | ||
-- Format the link, preceding text and categories | -- Format the link, preceding text and categories | ||
local | local function add_term_tracking_categories(term) | ||
-- maybe add tracking category if primary entry doesn't exist (this is an | |||
-- expensive call so we don't do it by default) | |||
if iargs["noprimaryentrycat"] and term and mw.title.getCurrentTitle().nsText == "" | |||
and not mw.title.new(term).exists then | |||
insert(categories, lang:getFullName() .. " " .. iargs["noprimaryentrycat"]) | |||
end | |||
end | |||
local lemmas | |||
if iargs["nolink"] then | if iargs["nolink"] then | ||
lemmas = nil | |||
elseif iargs["linktext"] then | elseif iargs["linktext"] then | ||
lemmas = iargs["linktext"] | |||
else | else | ||
local term = args[term_param] | local term = args[term_param] | ||
Line 183: | Line 359: | ||
end | end | ||
end | end | ||
if term then | |||
lemmas = parse_terms_with_inline_modifiers(term_param, term, lang) | |||
for _, lemma in ipairs(lemmas) do | |||
add_term_tracking_categories(lemma.term) | |||
end | |||
else | |||
lemmas = {{ lang = lang }} | |||
end | end | ||
-- | |||
-- sc= but not invocation arg sc= should override inline modifier sc=. | |||
if | local sc | ||
if args["sc"] then | |||
lemmas[1].sc = args["sc"] | |||
elseif not lemmas[1].sc and iargs["sc"] then | |||
lemmas[1].sc = iargs["sc"] | |||
end | end | ||
local | if #args["g"] > 0 then | ||
local genders = {} | |||
for _, g in ipairs(args["g"]) do | |||
extend_list(genders, rsplit(g, ",")) | |||
end | |||
lemmas[1].genders = genders | |||
end | |||
if args["t"] then | |||
lemmas[1].gloss = args["t"] | |||
end | |||
for _, param in ipairs(link_params) do | |||
if param ~= "sc" and param ~= "term" and param ~= "g" and param ~= "gloss" and param ~= "t" and | |||
args[param] then | |||
lemmas[1][param] = args[param] | |||
end | |||
end | |||
end | |||
local enclitics | |||
if args.enclitic then | |||
enclitics = parse_terms_with_inline_modifiers("enclitic", args.enclitic, lang) | |||
end | end | ||
local base_lemmas = {} | |||
return lang, | if base_lemma_params then | ||
for _, base_lemma_param_obj in ipairs(base_lemma_params) do | |||
local param = base_lemma_param_obj.param | |||
if args[param] then | |||
insert(base_lemmas, { | |||
paramobj = base_lemma_param_obj, | |||
lemmas = parse_terms_with_inline_modifiers(param, args[param], lang), | |||
}) | |||
end | |||
end | |||
end | |||
return { | |||
lang = lang, | |||
lemmas = lemmas, | |||
enclitics = enclitics, | |||
base_lemmas = base_lemmas, | |||
categories = categories, | |||
} | |||
end | end | ||
-- Construct and return the full definition line for a form-of-type template | -- Construct and return the full definition line for a form-of-type template invocation, given processed invocation | ||
-- arguments IARGS, processed parent arguments ARGS, TERM_PARAM (the parent argument specifying the main entry), COMPAT | |||
-- (true if the language code is found in args["lang"] instead of args[1]), and DO_FORM_OF, which is a function that | |||
-- | -- returns the actual definition-line text and any language-specific categories. The terminating period/dot will be | ||
-- added as appropriate, the language-specific categories will be added to any categories requested by the invocation | |||
-- or parent args, and then whole thing will be appropriately formatted. | |||
-- | -- | ||
-- ( | -- DO_FORM_OF takes one argument, the return value of get_lemmas_and_categories() (an object describing the lemmas, | ||
-- clitics, base lemmas and categories fetched). | |||
-- | |||
-- | -- | ||
-- DO_FORM_OF should return two arguments: | -- DO_FORM_OF should return two arguments: | ||
-- | -- | ||
-- (1) The actual definition-line text, marked up appropriately with | -- (1) The actual definition-line text, marked up appropriately with <span>...</span> but without any terminating | ||
-- period/dot. | |||
-- (2) Any extra categories to add the page to (other than those that can be | -- (2) Any extra categories to add the page to (other than those that can be derived from parameters specified to the | ||
-- invocation or parent arguments, which will automatically be added to the page). | |||
local function construct_form_of_text(iargs, args, term_param, compat, base_lemma_params, do_form_of) | |||
local function construct_form_of_text(iargs, args, term_param, compat, do_form_of) | local lemma_data = get_lemmas_and_categories(iargs, args, term_param, compat, base_lemma_params) | ||
local | |||
local form_of_text, lang_cats = do_form_of( | local form_of_text, lang_cats = do_form_of(lemma_data) | ||
extend_list(lemma_data.categories, lang_cats) | |||
local text = form_of_text .. ( | local text = form_of_text .. ( | ||
args["nodot"] and "" or args["dot"] or iargs["withdot"] and "." or "" | args["nodot"] and "" or args["dot"] or iargs["withdot"] and "." or "" | ||
) | ) | ||
if #categories == 0 then | if #lemma_data.categories == 0 then | ||
return text | return text | ||
end | end | ||
return text .. require("Module:utilities").format_categories(categories, lang, args["sort"], | return text .. require("Module:utilities").format_categories(lemma_data.categories, lemma_data.lang, args["sort"], | ||
-- If lemma_is_sort_key is given, supply the first lemma term as the sort base if possible. If sort= is given, | |||
-- it will override the base; otherwise, the base will be converted appropriately to a sort key using the | |||
-- same algorithm applied to pagenames. | |||
iargs.lemma_is_sort_key and type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].term, | |||
-- Supply the first lemma's script for sort key computation. | |||
m_form_of.force_cat, type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].sc) | |||
end | end | ||
-- | -- Invocation parameters shared between form_of_t(), tagged_form_of_t() and inflection_of_t(). | ||
local function get_common_invocation_params() | |||
return { | |||
parameters | |||
function | |||
["term_param"] = {type = "number"}, | ["term_param"] = {type = "number"}, | ||
["lang"] = {}, | ["lang"] = {}, -- To be used as the default code in params. | ||
["sc"] = {}, | ["sc"] = {type = "script"}, | ||
["cat"] = {list = true}, | ["cat"] = {list = true}, | ||
["ignore"] = {list = true}, | ["ignore"] = {list = true}, | ||
Line 338: | Line 474: | ||
["posttext"] = {}, | ["posttext"] = {}, | ||
["noprimaryentrycat"] = {}, | ["noprimaryentrycat"] = {}, | ||
["lemma_is_sort_key"] = {}, | |||
} | } | ||
end | |||
local iargs = | |||
--[==[ | |||
Function that implements {{tl|form of}} and the various more specific form-of templates (but not {{tl|inflection of}} | |||
or templates that take tagged inflection parameters). | |||
Invocation params: | |||
; {{para|1|req=1}} | |||
: Text to display before the link. | |||
; {{para|term_param}} | |||
: Numbered param holding the term linked to. Other numbered params come after. Defaults to 1 if invocation or template | |||
param {{para|lang}} is present, otherwise 2. | |||
; {{para|lang}} | |||
: Default language code for language-specific templates. If specified, no language code needs to be specified, and if | |||
specified it needs to be set using {{para|lang}}, not {{para|1}}. | |||
; {{para|sc}} | |||
: Default script code for language-specific templates. The script code can still be overridden using template param | |||
{{para|sc}}. | |||
; {{para|cat}}, {{para|cat2}}, ...: | |||
: Categories to place the page into. The language name will automatically be prepended. Note that there is also a | |||
template param {{para|cat}} to specify categories at the template level. Use of {{para|nocat}} disables categorization | |||
of categories specified using invocation param {{para|cat}}, but not using template param {{para|cat}}. | |||
; {{para|ignore}}, {{para|ignore2}}, ...: | |||
: One or more template params to silently accept and ignore. Useful e.g. when the template takes additional parameters | |||
such as {{para|from}} or {{para|POS}}. Each value is a comma-separated list of either bare parameter names or | |||
specifications of the form `PARAM:list` to specify that the parameter is a list parameter. | |||
; {{para|def}}, {{para|def2}}, ...: | |||
: One or more default values to supply for template args. For example, specifying {{para|def|2=tr=-}} causes the default | |||
for template param {{para|tr}} to be `-`. Actual template params override these defaults. | |||
; {{para|withcap}} | |||
: Capitalize the first character of the text preceding the link, unless template param {{para|nocap}} is given. | |||
; {{para|withdot}} | |||
: Add a final period after the link, unless template param {{para|nodot}} is given to suppress the period, or | |||
{{para|dot}} is given to specify an alternative punctuation character. | |||
; {{para|nolink}} | |||
: Suppress the display of the link. If specified, none of the template params that control the link | |||
({{para|<var>term_param</var>}}, {{para|<var>term_param</var> + 1}}, {{para|<var>term_param</var> + 2}}, {{para|t}}, | |||
{{para|gloss}}, {{para|sc}}, {{para|tr}}, {{para|ts}}, {{para|pos}}, {{para|g}}, {{para|id}}, {{para|lit}}) will be | |||
available. If the calling template uses any of these parameters, they must be ignored using {{para|ignore}}. | |||
{{para|linktext}} | |||
: Override the display of the link with the specified text. This is useful if a custom template is available to format | |||
the link (e.g. in Hebrew, Chinese and Japanese). If specified, none of the template params that control the link | |||
({{para|<var>term_param</var>}}, {{para|<var>term_param</var> + 1}}, {{para|<var>term_param</var> + 2}}, {{para|t}}, | |||
{{para|gloss}}, {{para|sc}}, {{para|tr}}, {{para|ts}}, {{para|pos}}, {{para|g}}, {{para|id}}, {{para|lit}}) will be | |||
available. If the calling template uses any of these parameters, they must be ignored using {{para|ignore}}. | |||
; {{para|posttext}} | |||
: Additional text to display directly after the formatted link, before any terminating period/dot and inside of | |||
`<span class='use-with-mention'>`. | |||
; {{para|noprimaryentrycat}} | |||
: Category to add the page to if the primary entry linked to doesn't exist. The language name will automatically be | |||
prepended. | |||
; {{para|lemma_is_sort_key}} | |||
: If the user didn't specify a sort key, use the lemma as the sort key (instead of the page itself). | |||
]==] | |||
function export.form_of_t(frame) | |||
local iparams = get_common_invocation_params() | |||
iparams[1] = {required = true} | |||
local iargs = m_params.process(frame.args, iparams) | |||
local parent_args = frame:getParent().args | local parent_args = frame:getParent().args | ||
local term_param = iargs["term_param"] | local term_param = iargs["term_param"] | ||
local compat = iargs["lang"] or parent_args["lang"] | local compat = iargs["lang"] or parent_args["lang"] | ||
term_param = term_param or compat and 1 or 2 | term_param = term_param or compat and 1 or 2 | ||
local params = | local params = get_common_template_params() | ||
-- Numbered params | |||
params[compat and "lang" or 1] = { | |||
required = not iargs["lang"], | |||
type = "language", | |||
default = iargs["lang"] or "und" | |||
} | } | ||
local base_lemma_params | |||
if not iargs["nolink"] and not iargs["linktext"] then | if not iargs["nolink"] and not iargs["linktext"] then | ||
add_link_params(params, term_param) | add_link_params(parent_args, params, term_param) | ||
base_lemma_params = add_base_lemma_params(parent_args, iargs, params, compat) | |||
end | end | ||
Line 388: | Line 576: | ||
local args = process_parent_args("form-of-t", parent_args, params, iargs["def"], | local args = process_parent_args("form-of-t", parent_args, params, iargs["def"], | ||
iargs["ignore"], ignored_params) | iargs["ignore"], ignored_params, "form_of_t") | ||
local text = args["notext"] and "" or iargs[1] | local text = args["notext"] and "" or iargs[1] | ||
if args["cap"] or iargs["withcap"] and not args["nocap"] then | if args["cap"] or iargs["withcap"] and not args["nocap"] then | ||
Line 395: | Line 583: | ||
end | end | ||
return construct_form_of_text(iargs, args, term_param, compat, | return construct_form_of_text(iargs, args, term_param, compat, base_lemma_params, | ||
function( | function(lemma_data) | ||
return m_form_of.format_form_of {text = text, | return m_form_of.format_form_of {text = text, lemmas = lemma_data.lemmas, enclitics = lemma_data.enclitics, | ||
base_lemmas = lemma_data.base_lemmas, lemma_face = "term", posttext = iargs["posttext"]}, {} | |||
end | end | ||
) | ) | ||
Line 404: | Line 592: | ||
-- Construct and return the full definition line for a form-of-type template | --[=[ | ||
Construct and return the full definition line for a form-of-type template invocation that is based on inflection tags. | |||
This is a wrapper around construct_form_of_text() and takes the following arguments: processed invocation arguments | |||
IARGS, processed parent arguments ARGS, TERM_PARAM (the parent argument specifying the main entry), COMPAT (true if the | |||
language code is found in args["lang"] instead of args[1]), and TAGS, the list of (non-canonicalized) inflection tags. | |||
It returns that actual definition-line text including terminating period/full-stop, formatted categories, etc. and | |||
should be directly returned as the template function's return value. JOINER is the optional strategy to join multipart | |||
tags for display; currently accepted values are "and", "slash", "en-dash"; defaults to "slash". | |||
]=] | |||
local function construct_tagged_form_of_text(iargs, args, term_param, compat, base_lemma_params, tags, joiner) | |||
return construct_form_of_text(iargs, args, term_param, compat, base_lemma_params, | |||
local function construct_tagged_form_of_text(iargs, args, term_param, compat, tags, joiner) | function(lemma_data) | ||
return construct_form_of_text(iargs, args, term_param, compat, | -- NOTE: tagged_inflections returns two values, so we do too. | ||
function( | |||
return m_form_of.tagged_inflections { | return m_form_of.tagged_inflections { | ||
lang = lemma_data.lang, | |||
tags = tags, | tags = tags, | ||
lemmas = lemma_data.lemmas, | |||
enclitics = lemma_data.enclitics, | |||
base_lemmas = lemma_data.base_lemmas, | |||
lemma_face = "term", | |||
POS = args["p"], | |||
pagename = args["pagename"], | |||
-- Set no_format_categories because we do it ourselves in construct_form_of_text(). | |||
no_format_categories = true, | |||
nocat = args["nocat"], | |||
notext = args["notext"], | notext = args["notext"], | ||
capfirst = args["cap"] or iargs["withcap"] and not args["nocap"], | capfirst = args["cap"] or iargs["withcap"] and not args["nocap"], | ||
posttext = iargs["posttext"], | posttext = iargs["posttext"], | ||
joiner = joiner | joiner = joiner, | ||
} | } | ||
end | end | ||
) | ) | ||
Line 434: | Line 627: | ||
--[=[ | --[==[ | ||
Function that implements form-of templates that are defined by specific tagged | Function that implements form-of templates that are defined by specific tagged inflections (typically a template | ||
inflections (typically a template referring to a non-lemma inflection, | referring to a non-lemma inflection, such as {{tl|plural of}}). This works exactly like {form_of_t()} except that the | ||
such as {{ | "form of" text displayed before the link is based off of a pre-specified set of inflection tags (which will be | ||
that the "form of" text displayed before the link is based off of a | appropriately linked to the glossary) instead of arbitrary text. From the user's perspective, there is no difference | ||
pre-specified set of inflection tags (which will be appropriately linked to | between templates implemented using {form_of_t()} and {tagged_form_of_t()}; they accept exactly the same parameters and | ||
the glossary) instead of arbitrary text. From the user's perspective, there | work the same. See also {inflection_of_t()} below, which is intended for templates with user-specified inflection tags. | ||
is no difference between templates implemented using form_of_t() and | |||
tagged_form_of_t(); they accept exactly the same parameters and work the same. | |||
See also inflection_of_t() below, which is intended for templates with | |||
user-specified inflection tags. | |||
Invocation params: | Invocation params: | ||
1=, 2 | ; {{para|1|req=1}}, {{para|2}}, ... | ||
: One or more inflection tags describing the inflection in question. | |||
split_tags | ; {{para|split_tags}} | ||
: If specified, character to split specified inflection tags on. This allows multiple tags to be included in a single | |||
argument, simplifying template code. | |||
; {{para|term_param}} | |||
term_param | ; {{para|lang}} | ||
lang | ; {{para|sc}} | ||
sc | ; {{para|cat}}, {{para|cat2}}, ... | ||
cat | ; {{para|ignore}}, {{para|ignore2}}, ... | ||
ignore | ; {{para|def}}, {{para|def2}}, ... | ||
def | ; {{para|withcap}} | ||
withcap | ; {{para|withdot}} | ||
withdot | ; {{para|nolink}} | ||
nolink | ; {{para|linktext}} | ||
linktext | ; {{para|posttext}} | ||
posttext | ; {{para|noprimaryentrycat}} | ||
noprimaryentrycat | ; {{para|lemma_is_sort_key}} | ||
: All of these are the same as in {form_of_t()}. | |||
]=] | ]==] | ||
function export.tagged_form_of_t(frame) | function export.tagged_form_of_t(frame) | ||
local iparams = | local iparams = get_common_invocation_params() | ||
iparams[1] = {list = true, required = true} | |||
iparams["split_tags"] = {} | |||
local iargs = m_params.process(frame.args, iparams) | |||
local iargs = | |||
local parent_args = frame:getParent().args | local parent_args = frame:getParent().args | ||
Line 494: | Line 670: | ||
term_param = term_param or compat and 1 or 2 | term_param = term_param or compat and 1 or 2 | ||
local params = | local params = get_common_template_params() | ||
-- Numbered params | |||
params[compat and "lang" or 1] = { | |||
required = not iargs["lang"], | |||
type = "language", | |||
default = iargs["lang"] or "und" | |||
} | |||
-- Always included because lang-specific categories may be added | |||
params["nocat"] = {type = "boolean"} | |||
params["p"] = {} | |||
params["POS"] = {alias_of = "p"} | |||
local base_lemma_params | |||
if not iargs["nolink"] and not iargs["linktext"] then | if not iargs["nolink"] and not iargs["linktext"] then | ||
add_link_params(params, term_param) | add_link_params(parent_args, params, term_param) | ||
base_lemma_params = add_base_lemma_params(parent_args, iargs, params, compat) | |||
end | end | ||
Line 534: | Line 703: | ||
local args = process_parent_args("tagged-form-of-t", parent_args, | local args = process_parent_args("tagged-form-of-t", parent_args, | ||
params, iargs["def"], iargs["ignore"], ignored_params) | params, iargs["def"], iargs["ignore"], ignored_params, "tagged_form_of_t") | ||
return construct_tagged_form_of_text(iargs, args, term_param, compat, | return construct_tagged_form_of_text(iargs, args, term_param, compat, base_lemma_params, | ||
split_inflection_tags(iargs[1], iargs["split_tags"]) | split_inflection_tags(iargs[1], iargs["split_tags"])) | ||
end | end | ||
--[=[ | --[==[ | ||
Function that implements {{inflection of}} and certain semi-specific variants, | Function that implements {{tl|inflection of}} and certain semi-specific variants, such as {{tl|participle of}} and | ||
such as {{participle of}} and {{past participle form of}}. This function is | {{tl|past participle form of}}. This function is intended for templates that allow the user to specify a set of | ||
intended for templates that allow the user to specify a set of inflection tags. | inflection tags. | ||
It works similarly to form_of_t() and tagged_form_of_t() except that the | |||
calling convention for the calling template is | It works similarly to {form_of_t()} and {tagged_form_of_t()} except that the calling convention for the calling | ||
template is | |||
: { {{TEMPLATE|LANG|MAIN_ENTRY_LINK|MAIN_ENTRY_DISPLAY_TEXT|TAG|TAG|...}}} | |||
instead of | instead of | ||
: { {{TEMPLATE|LANG|MAIN_ENTRY_LINK|MAIN_ENTRY_DISPLAY_TEXT|GLOSS}}} | |||
Note that there isn't a numbered parameter for the gloss, but it can still | |||
be specified using t | Note that there isn't a numbered parameter for the gloss, but it can still be specified using {{para|t}} or | ||
{{para|gloss}}. | |||
Invocation params: | Invocation params: | ||
preinfl | ; {{para|preinfl}}, {{para|preinfl2}}, ... | ||
: Extra inflection tags to automatically prepend to the tags specified by the template. | |||
; {{para|postinfl}}, {{para|postinfl2}}, ... | |||
postinfl | : Extra inflection tags to automatically append to the tags specified by the template. Used for example by | ||
{{tl|past participle form of}} to add the tags `of the|past|p` onto the user-specified tags, which indicate which | |||
past participle form the page refers to. | |||
; {{para|split_tags}} | |||
: If specified, character to split specified inflection tags on. This allows multiple tags to be included in a single | |||
split_tags | argument, simplifying template code. Note that this applies *ONLY* to inflection tags specified in the invocation | ||
arguments using {{para|preinfl}} or {{para|postinfl}}, not to user-specified inflection tags. | |||
; {{para|term_param}} | |||
; {{para|lang}} | |||
; {{para|sc}} | |||
; {{para|cat}}, {{para|cat2}}, ... | |||
term_param | ; {{para|ignore}}, {{para|ignore2}}, ... | ||
lang | ; {{para|def}}, {{para|def2}}, ... | ||
sc | ; {{para|withcap}} | ||
cat | ; {{para|withdot}} | ||
ignore | ; {{para|nolink}} | ||
def | ; {{para|linktext}} | ||
withcap | ; {{para|posttext}} | ||
withdot | ; {{para|noprimaryentrycat}} | ||
nolink | ; {{para|lemma_is_sort_key}} | ||
linktext | : All of these are the same as in {form_of_t()}. | ||
posttext | ]==] | ||
noprimaryentrycat | |||
]=] | |||
function export.inflection_of_t(frame) | function export.inflection_of_t(frame) | ||
local iparams = | local iparams = get_common_invocation_params() | ||
iparams["preinfl"] = {list = true} | |||
iparams["postinfl"] = {list = true} | |||
iparams["split_tags"] = {} | |||
local iargs = | local iargs = m_params.process(frame.args, iparams) | ||
local parent_args = frame:getParent().args | local parent_args = frame:getParent().args | ||
local term_param = iargs["term_param"] | local term_param = iargs["term_param"] | ||
local compat = iargs["lang"] or parent_args["lang"] | local compat = iargs["lang"] or parent_args["lang"] | ||
term_param = term_param or compat and 1 or 2 | term_param = term_param or compat and 1 or 2 | ||
local tagsind = term_param + 2 | |||
local params = | local params = get_common_template_params() | ||
-- Numbered params | |||
params[compat and "lang" or 1] = { | |||
required = not iargs["lang"], | |||
type = "language", | |||
default = iargs["lang"] or "und" | |||
} | } | ||
params[tagsind] = {list = true, | |||
-- at least one inflection tag is required unless preinfl or postinfl tags are given | |||
required = #iargs["preinfl"] == 0 and #iargs["postinfl"] == 0} | |||
-- Named params not controlling link display | |||
-- Always included because lang-specific categories may be added | |||
params["nocat"] = {type = "boolean"} | |||
params["p"] = {} | |||
params["POS"] = {alias_of = "p"} | |||
-- Temporary, allows multipart joiner to be controlled on a template-by-template basis. | |||
params["joiner"] = {} | |||
local base_lemma_params | |||
if not iargs["nolink"] and not iargs["linktext"] then | if not iargs["nolink"] and not iargs["linktext"] then | ||
add_link_params(params, term_param, "no-numbered-gloss") | add_link_params(parent_args, params, term_param, "no-numbered-gloss") | ||
base_lemma_params = add_base_lemma_params(parent_args, iargs, params, compat) | |||
end | end | ||
Line 656: | Line 806: | ||
local args = process_parent_args("inflection-of-t", parent_args, | local args = process_parent_args("inflection-of-t", parent_args, | ||
params, iargs["def"], iargs["ignore"], ignored_params) | params, iargs["def"], iargs["ignore"], ignored_params, "inflection_of_t") | ||
local infls | local infls | ||
if not next(iargs["preinfl"]) and not next(iargs["postinfl"]) then | if not next(iargs["preinfl"]) and not next(iargs["postinfl"]) then | ||
infls = args[ | -- If no preinfl or postinfl tags, just use the user-specified tags directly. | ||
infls = args[tagsind] | |||
else | else | ||
-- Otherwise, we need to prepend the preinfl tags and postpend the postinfl tags. If there's only one tag set | |||
-- (no semicolon), it's easier. Since this is common, we optimize for it. | |||
infls = {} | infls = {} | ||
for _, infl in ipairs( | local saw_semicolon = false | ||
for _, infl in ipairs(args[tagsind]) do | |||
if infl == ";" then | |||
saw_semicolon = true | |||
break | |||
end | |||
end | end | ||
local split_preinfl = split_inflection_tags(iargs["preinfl"], iargs["split_tags"]) | |||
local split_postinfl = split_inflection_tags(iargs["postinfl"], iargs["split_tags"]) | |||
if not saw_semicolon then | |||
extend_list(infls, split_preinfl) | |||
extend_list(infls, args[tagsind]) | |||
extend_list(infls, split_postinfl) | |||
else | |||
local groups = m_form_of.split_tags_into_tag_sets(args[tagsind]) | |||
for _, group in ipairs(groups) do | |||
if #infls > 0 then | |||
insert(infls, ";") | |||
end | |||
extend_list(infls, split_preinfl) | |||
extend_list(infls, group) | |||
extend_list(infls, split_postinfl) | |||
end | |||
end | end | ||
end | end | ||
return construct_tagged_form_of_text(iargs, args, term_param, compat, infls, | return construct_tagged_form_of_text(iargs, args, term_param, compat, base_lemma_params, infls, | ||
parent_args["joiner"]) | parent_args["joiner"]) | ||
end | end | ||
--[=[ | --[==[ | ||
Normalize a part-of-speech tag given a possible abbreviation | Normalize a part-of-speech tag given a possible abbreviation (passed in as {{para|1}} of the invocation args). If the | ||
(passed in as 1 | abbreviation isn't recognized, the original POS tag is returned. If no POS tag is passed in, return the value of | ||
isn't recognized, the original POS tag is returned. If no POS | invocation arg {{para|default}}. | ||
tag is passed in, return the value of invocation arg default | ]==] | ||
]=] | |||
function export.normalize_pos(frame) | function export.normalize_pos(frame) | ||
local iparams = { | local iparams = { | ||
Line 689: | Line 856: | ||
["default"] = {}, | ["default"] = {}, | ||
} | } | ||
local iargs = | local iargs = m_params.process(frame.args, iparams) | ||
if not iargs[1] and not iargs["default"] then | if not iargs[1] and not iargs["default"] then | ||
error("Either 1= or default= must be given in the invocation args") | error("Either 1= or default= must be given in the invocation args") | ||
end | end | ||
if not iargs[1] then | |||
return iargs["default"] | |||
end | |||
return mw.loadData(m_form_of.form_of_pos_module)[iargs[1]] or iargs[1] | |||
end | end | ||
return export | return export |