Module:form of/templates: Difference between revisions

no edit summary
No edit summary
No edit summary
Line 1: Line 1:
local export = {}
local form_of_module = "Module:form of"
local languages_module = "Module:languages"
local load_module = "Module:load"
local parameters_module = "Module:parameters"
local parse_interface_module = "Module:parse interface"
local parse_utilities_module = "Module:parse utilities"
local string_utilities_module = "Module:string utilities"
local utilities_module = "Module:utilities"
local insert = table.insert
local insert = table.insert
local m_form_of = require("Module:form of")
local ipairs = ipairs
local m_params = require("Module:parameters")
local pairs = pairs
local put_module = "Module:parse utilities"
local require = require
local rfind = mw.ustring.find
 
local rmatch = mw.ustring.match
--[==[
local rsplit = mw.text.split
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local rgsplit = mw.text.gsplit
 
local function decode_entities(...)
decode_entities = require(string_utilities_module).decode_entities
return decode_entities(...)
end
 
local function format_categories(...)
format_categories = require(utilities_module).format_categories
return format_categories(...)
end
 
local function format_form_of(...)
format_form_of = require(form_of_module).format_form_of
return format_form_of(...)
end
 
local function get_lang(...)
get_lang = require(languages_module).getByCode
return get_lang(...)
end
 
local function gsplit(...)
gsplit = require(string_utilities_module).gsplit
return gsplit(...)
end
 
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end
 
local function parse_inline_modifiers(...)
parse_inline_modifiers = require(parse_interface_module).parse_inline_modifiers
return parse_inline_modifiers(...)
end
 
local function pattern_escape(...)
pattern_escape = require(string_utilities_module).pattern_escape
return pattern_escape(...)
end
 
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end
 
local function safe_load_data(...)
safe_load_data = require(load_module).safe_load_data
return safe_load_data(...)
end
 
local function split(...)
split = require(string_utilities_module).split
return split(...)
end
 
local function split_tag_set(...)
split_tag_set = require(form_of_module).split_tag_set
return split_tag_set(...)
end
 
local function tagged_inflections(...)
tagged_inflections = require(form_of_module).tagged_inflections
return tagged_inflections(...)
end
 
local function trim(...)
trim = require(string_utilities_module).trim
return trim(...)
end
 
local function ucfirst(...)
ucfirst = require(string_utilities_module).ucfirst
return ucfirst(...)
end
 
--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local force_cat
local function get_force_cat()
force_cat, get_force_cat = require(form_of_module).force_cat, nil
return force_cat
end
 
local m_form_of_pos
local function get_m_form_of_pos()
m_form_of_pos, get_m_form_of_pos = load_data(require(form_of_module).form_of_pos_module), nil
return m_form_of_pos
end


local export = {}
local module_prefix
local function get_module_prefix()
module_prefix, get_module_prefix = require(form_of_module).form_of_lang_data_module_prefix, nil
return module_prefix
end


--[==[ intro:
--[==[ intro:
Line 15: Line 118:
[[Module:form of]], which contains the underlying implementing code and is meant to be called from other modules.
[[Module:form of]], which contains the underlying implementing code and is meant to be called from other modules.
]==]
]==]




Line 62: Line 166:
local new_parent_args = {}
local new_parent_args = {}
for _, default in ipairs(defaults) do
for _, default in ipairs(defaults) do
local defparam, defval = rmatch(default, "^(.-)=(.*)$")
local defparam, defval = default:match("^(.-)=(.*)$")
if not defparam then
if not defparam then
error("Bad default spec " .. default)
error("Bad default spec " .. default)
Line 74: Line 178:


for _, ignorespec in ipairs(ignorespecs) do
for _, ignorespec in ipairs(ignorespecs) do
for ignore in rgsplit(ignorespec, ",") do
for ignore in gsplit(ignorespec, ",") do
local param = rmatch(ignore, "^(.*):list$")
local param = ignore:match("^(.*):list$")
if param then
if param then
if rfind(param, "^[0-9]+$") then
if param:match("^%d+$") then
insert(numbered_list_params_to_ignore, tonumber(param))
insert(numbered_list_params_to_ignore, tonumber(param))
else
else
insert(named_list_params_to_ignore,
insert(named_list_params_to_ignore, "^" .. pattern_escape(param) .. "%d*$")
"^" .. require("Module:string utilities").pattern_escape(param) .. "[0-9]*$")
end
end
else
else
if rfind(ignore, "^[0-9]+$") then
if ignore:match("^%d+$") then
ignore = tonumber(ignore)
ignore = tonumber(ignore)
end
end
Line 104: Line 207:
else
else
for _, lparam in ipairs(named_list_params_to_ignore) do
for _, lparam in ipairs(named_list_params_to_ignore) do
if rfind(k, lparam) then
if k:match(lparam) then
ignore_me = true
ignore_me = true
break
break
Line 118: Line 221:
end
end


local args = m_params.process(parent_args, params, nil, "form of/templates", function_name)
local args = process_params(parent_args, params, nil, "form of/templates", function_name)


return args
return args
Line 132: Line 235:
local inflection_tags = {}
local inflection_tags = {}
for _, tagspec in ipairs(tagspecs) do
for _, tagspec in ipairs(tagspecs) do
for tag in rgsplit(tagspec, split_regex) do
for tag in gsplit(tagspec, split_regex) do
insert(inflection_tags, tag)
insert(inflection_tags, tag)
end
end
Line 165: Line 268:
local function parse_terms_with_inline_modifiers(paramname, val, lang)
local function parse_terms_with_inline_modifiers(paramname, val, lang)
local function generate_obj(term)
local function generate_obj(term)
return {lang = lang, term = term}
return {lang = lang, term = decode_entities(term)}
end
end


local retval
return parse_inline_modifiers(val, {
-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or
paramname = paramname,
-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of
param_mods = term_param_mods,
-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <tr:...>, so if
generate_obj = generate_obj,
-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
splitchar = ",",
-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
})
if val:find("<") and not val:find("^[^<]*<[a-z]*[^a-z:]") then
retval = require(put_module).parse_inline_modifiers(val, {
paramname = paramname,
param_mods = term_param_mods,
generate_obj = generate_obj,
splitchar = ",",
})
else
if val:find(",<") then
-- this happens when there's an embedded {{,}} template, as in [[MMR]], [[TMA]], [[DEI]], where an initialism
-- expands to multiple terms; easiest not to try and parse the lemma spec as multiple lemmas
retval = {val}
elseif val:find(",%s") or (val:find(",") and val:find("[\\%[<]")) then
-- Comma after whitespace not split; nor are backslash-escaped commas or commas inside of square or
-- angle brackets. If we see any of these, use the more sophisticated algorithm in
-- [[Module:parse utilities]]. Otherwise it's safe to just split on commas directly. This optimization avoids
-- loading [[Module:parse utilities]] unnecessarily.
retval = require(put_module).split_on_comma(val)
else
retval = rsplit(val, ",")
end
for i, split in ipairs(retval) do
retval[i] = generate_obj(split)
end
end
 
return retval
end
end


Line 215: Line 291:
-- will be the gloss, unless NO_NUMBERED_GLOSS is given.
-- will be the gloss, unless NO_NUMBERED_GLOSS is given.
local function add_link_params(parent_args, params, term_param, no_numbered_gloss)
local function add_link_params(parent_args, params, term_param, no_numbered_gloss)
for k, v in pairs(parent_args) do
for k in pairs(parent_args) do
if type(k) == "string" then
if type(k) == "string" then
local base, num = k:match("^([a-z]+)([0-9]+)$")
local base = k:match("^(%l+)(%d+)$")
if base and link_param_set[base] then
if base and link_param_set[base] then
error("Support for the separate-parameter style of multiple lemmas in form-of templates is going away; use a comma-separated lemma param with inline modifiers")
error("Support for the separate-parameter style of multiple lemmas in form-of templates is going away; use a comma-separated lemma param with inline modifiers")
Line 244: Line 320:
end
end


-- Need to do what [[Module:parameters]] does to string arguments from parent_args as we're running this
-- before calling [[Module:parameters]] on parent_args.
local function ine(arg)
if not arg then
return nil
end
arg = trim(arg)
return arg ~= "" and arg or nil
end


local function add_base_lemma_params(parent_args, iargs, params, compat)
local function add_base_lemma_params(parent_args, iargs, params, compat)
-- Need to do what [[Module:parameters]] does to string arguments from parent_args as we're running this
-- Check the language-specific data for additional base lemma params. But if there's no language-specific data,
-- before calling [[Module:parameters]] on parent_args.
-- attempt any parent varieties as well (i.e. superordinate varieties).
local function ine(arg)
local lang = get_lang(ine(parent_args[compat and "lang" or 1]) or ine(iargs["lang"]) or "und", nil, true)
if not arg then
while lang do
return nil
local langdata = safe_load_data((module_prefix or get_module_prefix()) .. lang:getCode())
end
if langdata then
arg = mw.text.trim(arg)
local base_lemma_params = langdata.base_lemma_params
return arg ~= "" and arg or nil
if base_lemma_params then
end
for _, param in ipairs(base_lemma_params) do
 
-- Check the language-specific data for additional base lemma params. But if there's no language-specific data and
-- the language is an etym-only language, fall back to the corresponding full language and check again.
local langcode = ine(parent_args[compat and "lang" or 1]) or iargs["lang"] or "und"
if m_form_of.langs_with_lang_specific_tags[langcode] then
local langdata = mw.loadData(m_form_of.form_of_lang_data_module_prefix .. langcode)
if langdata.base_lemma_params then
for _, param in ipairs(langdata.base_lemma_params) do
params[param.param] = {}
end
return langdata.base_lemma_params
end
else
local lang = require("Module:languages").getByCode(langcode, nil, "allow etym")
if lang and lang:getCode() ~= lang:getFullCode() then
local full_code = lang:getFullCode()
local langdata = mw.loadData(m_form_of.form_of_lang_data_module_prefix .. full_code)
if langdata.base_lemma_params then
for _, param in ipairs(langdata.base_lemma_params) do
params[param.param] = {}
params[param.param] = {}
end
end
return langdata.base_lemma_params
return base_lemma_params
end
end
end
end
lang = lang:getParent()
end
end
end
end
Line 330: Line 397:
for _, cat in ipairs(args["cat"]) do
for _, cat in ipairs(args["cat"]) do
insert(categories, lang:getFullName() .. " " .. cat)
insert(categories, lang:getFullName() .. " " .. cat)
end
-- Format the link, preceding text and categories
local function add_term_tracking_categories(term)
-- maybe add tracking category if primary entry doesn't exist (this is an
-- expensive call so we don't do it by default)
if iargs["noprimaryentrycat"] and term and mw.title.getCurrentTitle().nsText == ""
and not mw.title.new(term).exists then
insert(categories, lang:getFullName() .. " " .. iargs["noprimaryentrycat"])
end
end
end


Line 362: Line 418:
if term then
if term then
lemmas = parse_terms_with_inline_modifiers(term_param, term, lang)
lemmas = parse_terms_with_inline_modifiers(term_param, term, lang)
for _, lemma in ipairs(lemmas) do
add_term_tracking_categories(lemma.term)
end
else
else
lemmas = {{ lang = lang }}
lemmas = {{ lang = lang }}
Line 370: Line 423:


-- sc= but not invocation arg sc= should override inline modifier sc=.
-- sc= but not invocation arg sc= should override inline modifier sc=.
local sc
if args["sc"] then
if args["sc"] then
lemmas[1].sc = args["sc"]
lemmas[1].sc = args["sc"]
Line 380: Line 432:
local genders = {}
local genders = {}
for _, g in ipairs(args["g"]) do
for _, g in ipairs(args["g"]) do
extend_list(genders, rsplit(g, ","))
extend_list(genders, split(g, ","))
end
end
lemmas[1].genders = genders
lemmas[1].genders = genders
Line 449: Line 501:
return text
return text
end
end
return text .. require("Module:utilities").format_categories(lemma_data.categories, lemma_data.lang, args["sort"],
return text .. format_categories(lemma_data.categories, lemma_data.lang, args["sort"],
-- If lemma_is_sort_key is given, supply the first lemma term as the sort base if possible. If sort= is given,
-- If lemma_is_sort_key is given, supply the first lemma term as the sort base if possible. If sort= is given,
-- it will override the base; otherwise, the base will be converted appropriately to a sort key using the
-- it will override the base; otherwise, the base will be converted appropriately to a sort key using the
Line 455: Line 507:
iargs.lemma_is_sort_key and type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].term,
iargs.lemma_is_sort_key and type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].term,
-- Supply the first lemma's script for sort key computation.
-- Supply the first lemma's script for sort key computation.
m_form_of.force_cat, type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].sc)
force_cat or get_force_cat(), type(lemma_data.lemmas) == "table" and lemma_data.lemmas[1].sc)
end
end


Line 535: Line 587:
local iparams = get_common_invocation_params()
local iparams = get_common_invocation_params()
iparams[1] = {required = true}
iparams[1] = {required = true}
local iargs = m_params.process(frame.args, iparams)
local iargs = process_params(frame.args, iparams)
local parent_args = frame:getParent().args
local parent_args = frame:getParent().args


Line 580: Line 632:
local text = args["notext"] and "" or iargs[1]
local text = args["notext"] and "" or iargs[1]
if args["cap"] or iargs["withcap"] and not args["nocap"] then
if args["cap"] or iargs["withcap"] and not args["nocap"] then
text = require("Module:string utilities").ucfirst(text)
text = ucfirst(text)
end
end


return construct_form_of_text(iargs, args, term_param, compat, base_lemma_params,
return construct_form_of_text(iargs, args, term_param, compat, base_lemma_params,
function(lemma_data)
function(lemma_data)
return m_form_of.format_form_of {text = text, lemmas = lemma_data.lemmas, enclitics = lemma_data.enclitics,
return format_form_of{text = text, lemmas = lemma_data.lemmas, enclitics = lemma_data.enclitics,
base_lemmas = lemma_data.base_lemmas, lemma_face = "term", posttext = iargs["posttext"]}, {}
base_lemmas = lemma_data.base_lemmas, lemma_face = "term", posttext = iargs["posttext"]}, {}
end
end
Line 605: Line 657:
function(lemma_data)
function(lemma_data)
-- NOTE: tagged_inflections returns two values, so we do too.
-- NOTE: tagged_inflections returns two values, so we do too.
return m_form_of.tagged_inflections {
return tagged_inflections{
lang = lemma_data.lang,
lang = lemma_data.lang,
tags = tags,
tags = tags,
Line 662: Line 714:
iparams["split_tags"] = {}
iparams["split_tags"] = {}


local iargs = m_params.process(frame.args, iparams)
local iargs = process_params(frame.args, iparams)
local parent_args = frame:getParent().args
local parent_args = frame:getParent().args


Line 757: Line 809:
iparams["split_tags"] = {}
iparams["split_tags"] = {}


local iargs = m_params.process(frame.args, iparams)
local iargs = process_params(frame.args, iparams)
local parent_args = frame:getParent().args
local parent_args = frame:getParent().args


Line 830: Line 882:
extend_list(infls, split_postinfl)
extend_list(infls, split_postinfl)
else
else
local groups = m_form_of.split_tags_into_tag_sets(args[tagsind])
local groups = split_tag_set(args[tagsind])
for _, group in ipairs(groups) do
for _, group in ipairs(groups) do
if #infls > 0 then
if #infls > 0 then
Line 856: Line 908:
["default"] = {},
["default"] = {},
}
}
local iargs = m_params.process(frame.args, iparams)
local iargs = process_params(frame.args, iparams)
if not iargs[1] and not iargs["default"] then
if not iargs[1] and not iargs["default"] then
error("Either 1= or default= must be given in the invocation args")
error("Either 1= or default= must be given in the invocation args")
Line 863: Line 915:
return iargs["default"]
return iargs["default"]
end
end
return mw.loadData(m_form_of.form_of_pos_module)[iargs[1]] or iargs[1]
return (m_form_of_pos or get_m_form_of_pos())[iargs[1]] or iargs[1]
end
end


return export
return export