Module:form of: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
Line 3: Line 3:
export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages
export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages


local m_links = require("Module:links")
local form_of_cats_module = "Module:form of/cats"
local m_string_utils = require("Module:string utilities")
local form_of_data_module = "Module:form of/data"
local m_table = require("Module:table")
local form_of_data1_module = "Module:form of/data/1"
local form_of_data2_module = "Module:form of/data/2"
local form_of_functions_module = "Module:form of/functions"
local form_of_lang_data_module_prefix = "Module:form of/lang-data/"
local form_of_pos_module = "Module:form of/pos"
local function_module = "Module:fun"
local headword_data_module = "Module:headword/data"
local json_module = "Module:JSON"
local labels_module = "Module:labels"
local links_module = "Module:links"
local load_module = "Module:load"
local parse_utilities_module = "Module:parse utilities"
local parse_utilities_module = "Module:parse utilities"
local labels_module = "Module:labels"
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local utilities_module = "Module:utilities"
local utilities_module = "Module:utilities"
export.form_of_pos_module = "Module:form of/pos"
export.form_of_functions_module = "Module:form of/functions"
export.form_of_cats_module = "Module:form of/cats"
export.form_of_lang_data_module_prefix = "Module:form of/lang-data/"
export.form_of_data_module = "Module:form of/data"
export.form_of_data2_module = "Module:form of/data2"


local ulen = m_string_utils.len
local anchor_encode = mw.uri.anchorEncode
local rsubn = m_string_utils.gsub
local concat = table.concat
local rmatch = m_string_utils.match
local dump = mw.dumpObject
local rsplit = m_string_utils.split
local fetch_categories_and_labels -- Defined below.
local format_form_of -- Defined below.
local get_tag_display_form -- Defined below.
local get_tag_set_display_form -- Defined below.
local insert = table.insert
local ipairs = ipairs
local is_link_or_html -- Defined below.
local list_to_text = mw.text.listToText
local lookup_shortcut -- Defined below.
local lookup_tag -- Defined below.
local normalize_tag_set -- Defined below.
local parse_tag_set_properties -- Defined below.
local require = require
local sort = table.sort
local split_tag_set -- Defined below.
local tagged_inflections -- Defined below.
local type = type
 
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function append(...)
append = require(table_module).append
return append(...)
end
 
local function contains(...)
contains = require(table_module).contains
return contains(...)
end
 
local function deep_copy(...)
deep_copy = require(table_module).deepCopy
return deep_copy(...)
end
 
local function deep_equals(...)
deep_equals = require(table_module).deepEquals
return deep_equals(...)
end
 
local function extend(...)
extend = require(table_module).extend
return extend(...)
end
 
local function format_categories(...)
format_categories = require(utilities_module).format_categories
return format_categories(...)
end
 
local function full_link(...)
full_link = require(links_module).full_link
return full_link(...)
end
 
local function insert_if_not(...)
insert_if_not = require(table_module).insertIfNot
return insert_if_not(...)
end
 
local function is_subset_list(...)
is_subset_list = require(table_module).isSubsetList
return is_subset_list(...)
end
 
local function iterate_from(...)
iterate_from = require(function_module).iterateFrom
return iterate_from(...)
end
 
local function join_multiparts(...)
join_multiparts = require(form_of_functions_module).join_multiparts
return join_multiparts(...)
end
 
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end
 
local function parse_inline_modifiers(...)
parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
return parse_inline_modifiers(...)
end
 
local function safe_load_data(...)
safe_load_data = require(load_module).safe_load_data
return safe_load_data(...)
end
 
local function safe_require(...)
safe_require = require(load_module).safe_require
return safe_require(...)
end
 
local function serial_comma_join(...)
serial_comma_join = require(table_module).serialCommaJoin
return serial_comma_join(...)
end
 
local function shallow_copy(...)
shallow_copy = require(table_module).shallowCopy
return shallow_copy(...)
end


export.TAG_TYPE = 1
local function show_labels(...)
export.GLOSSARY = 2
show_labels = require(labels_module).show_labels
export.SHORTCUTS = 3
return show_labels(...)
export.WIKIDATA = 4
end


export.APPENDIX = true
local function slice(...)
export.WP = false
slice = require(table_module).slice
export.WIKT = 0
return slice(...)
end
 
local function split(...)
split = require(string_utilities_module).split
return split(...)
end
 
local function ucfirst(...)
ucfirst = require(string_utilities_module).ucfirst
return ucfirst(...)
end


--[==[
--[==[
Set listing the languages with lang-specific tags. If a language isn't listed here, the tags for that language won't be
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
recognized.
local cat_functions
]==]
local function get_cat_functions()
export.langs_with_lang_specific_tags = {
cat_functions, get_cat_functions = require(form_of_functions_module).cat_functions, nil
["en"] = true,
return cat_functions
["got"] = true,
end
["lt"] = true,
 
["lv"] = true,
local default_pagename
["nl"] = true,
local function get_default_pagename()
["pi"] = true,
default_pagename, get_default_pagename = load_data(headword_data_module).pagename, nil
["sw"] = true,
return default_pagename
["ttj"] = true,
end
}
 
local display_handlers
local function get_display_handlers()
display_handlers, get_display_handlers = require(form_of_functions_module).display_handlers, nil
return display_handlers
end
 
local m_cats_data
local function get_m_cats_data()
m_cats_data, get_m_cats_data = load_data(form_of_cats_module), nil
return m_cats_data
end
 
local m_data
local function get_m_data()
-- Needs require.
m_data, get_m_data = require(form_of_data_module), nil
return m_data
end
 
local m_data1
local function get_m_data1()
m_data1, get_m_data1 = load_data(form_of_data1_module), nil
return m_data1
end
 
local m_data2
local function get_m_data2()
m_data2, get_m_data2 = load_data(form_of_data2_module), nil
return m_data2
end
 
local m_pos_data
local function get_m_pos_data()
m_pos_data, get_m_pos_data = load_data(form_of_pos_module), nil
return m_pos_data
end


--[==[ intro:
--[==[ intro:
Line 52: Line 207:


* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.
* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.
* [[Module:form of/data]] is a data-only file containing information on the more common inflection tags, listing the
* [[Module:form of/data/1]] is a data-only file containing information on the more common inflection tags, listing the
   tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate
   tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate
   glossary link and the wikidata ID.
   glossary link and the wikidata ID.
* [[Module:form of/data2]] is a data-only file containing information on the less common inflection tags, in the same
* [[Module:form of/data/2]] is a data-only file containing information on the less common inflection tags, in the same
   format as [[Module:form of/data]].
   format as [[Module:form of/data/1]].
* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection
* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection
   tags for the language with code LANGCODE, in the same format as [[Module:form of/data]]. Language-specific tags
   tags for the language with code LANGCODE, in the same format as [[Module:form of/data/1]]. Language-specific tags
   override general tags.
   override general tags.
* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the
* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the
Line 64: Line 219:
* [[Module:form of/pos]] is a data-only file listing the recognized parts of speech and their abbreviations, used for
* [[Module:form of/pos]] is a data-only file listing the recognized parts of speech and their abbreviations, used for
   categorization. FIXME: This should be unified with the parts of speech listed in [[Module:links]].
   categorization. FIXME: This should be unified with the parts of speech listed in [[Module:links]].
* [[Module:form of/functions]] contains functions for use with [[Module:form of/data]] and [[Module:form of/cats]].
* [[Module:form of/functions]] contains functions for use with [[Module:form of/data/1]] and [[Module:form of/cats]].
   They are contained in this module because data-only modules can't contain code. The functions in this file are of two
   They are contained in this module because data-only modules can't contain code. The functions in this file are of two
   types:
   types:
Line 119: Line 274:
   sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the
   sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the
   common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
   common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
   shortcut that expands to `simple|past`).
   shortcut that expands to `simple|past`).]==]
]==]
 
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
 
 
local function normalize_index(list, index)
if index < 0 then
return #list + index + 1
end
return index
end
 
-- FIXME, consider moving to [[Module:table]]
-- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also treated as a set.
local function is_subset(tags1, tags2)
tags1 = m_table.listToSet(tags1)
tags2 = m_table.listToSet(tags2)
for tag, _ in pairs(tags1) do
if not tags2[tag] then
return false
end
end
return true
end
 
 
-- FIXME, move to [[Module:table]]
local function slice(list, i, j)
--checkType("slice", 1, list, "table")
--checkType("slice", 2, i, "number", true)
--checkType("slice", 3, j, "number", true)
if i == nil then
i = 1
else
i = normalize_index(list, i)
end
j = normalize_index(list, j or -1)
 
local retval = {}
local k = 0
for index = i, j do
k = k + 1
retval[k] = list[index]
end
return retval
end
 


local function wrap_in_span(text, classes)
local function wrap_in_span(text, classes)
return ("<span class='%s'>%s</span>"):format(classes, text)
return ("<span class='%s'>%s</span>"):format(classes, text)
end
end


--[==[
--[==[
Line 203: Line 306:
   for the tag text and lemma links, and additionally {"form-of-definition-link"} specifically for the lemma links.
   for the tag text and lemma links, and additionally {"form-of-definition-link"} specifically for the lemma links.
   (FIXME: Should separate out the lemma links into their own field.)
   (FIXME: Should separate out the lemma links into their own field.)
* `.posttext`: Additional text to display after the lemma links.
* `.posttext`: Additional text to display after the lemma links.]==]
]==]
function export.format_form_of(data)
function export.format_form_of(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
Line 212: Line 314:
local lemma_classes = data.text_classes or "form-of-definition-link"
local lemma_classes = data.text_classes or "form-of-definition-link"
local parts = {}
local parts = {}
local function ins(text)
insert(parts, "<span class='" .. text_classes .. "'>")
table.insert(parts, text)
insert(parts, data.text)
end
ins("<span class='" .. text_classes .. "'>")
ins(data.text)
if data.text ~= "" and data.lemmas then
if data.text ~= "" and data.lemmas then
ins(" ")
insert(parts, " ")
end
end
if data.lemmas then
if data.lemmas then
if type(data.lemmas) == "string" then
if type(data.lemmas) == "string" then
ins(wrap_in_span(data.lemmas, lemma_classes))
insert(parts, wrap_in_span(data.lemmas, lemma_classes))
else
else
local formatted_terms = {}
local formatted_terms = {}
for _, lemma in ipairs(data.lemmas) do
for _, lemma in ipairs(data.lemmas) do
table.insert(formatted_terms, wrap_in_span(
insert(formatted_terms, wrap_in_span(
m_links.full_link(lemma, data.lemma_face), lemma_classes
full_link(lemma, data.lemma_face), lemma_classes
))
))
end
end
ins(m_table.serialCommaJoin(formatted_terms))
insert(parts, serial_comma_join(formatted_terms))
end
end
end
end
Line 236: Line 335:
-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or
-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or
-- they will clash with upright parens generated by link annotations such as transliterations and pos=.
-- they will clash with upright parens generated by link annotations such as transliterations and pos=.
ins("</span>")
insert(parts, "</span>")
local formatted_terms = {}
local formatted_terms = {}
for _, enclitic in ipairs(data.enclitics) do
for _, enclitic in ipairs(data.enclitics) do
-- FIXME, should we have separate clitic face and/or classes?
-- FIXME, should we have separate clitic face and/or classes?
table.insert(formatted_terms, wrap_in_span(
insert(formatted_terms, wrap_in_span(
m_links.full_link(enclitic, data.lemma_face, nil, "show qualifiers"), lemma_classes
full_link(enclitic, data.lemma_face, nil, "show qualifiers"), lemma_classes
))
))
end
end
ins(" (")
insert(parts, " (")
ins(wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))
insert(parts, wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))
ins(m_table.serialCommaJoin(formatted_terms))
insert(parts, serial_comma_join(formatted_terms))
ins(")")
insert(parts, ")")
ins("<span class='" .. text_classes .. "'>")
insert(parts, "<span class='" .. text_classes .. "'>")
end
end
if data.base_lemmas and #data.base_lemmas > 0 then
if data.base_lemmas and #data.base_lemmas > 0 then
for _, base_lemma in ipairs(data.base_lemmas) do
for _, base_lemma in ipairs(data.base_lemmas) do
ins(", the </span>")
insert(parts, ", the </span>")
ins(export.tagged_inflections {
insert(parts, (tagged_inflections{
lang = base_lemma.lemmas[1].lang,
lang = base_lemma.lemmas[1].lang,
tags = base_lemma.paramobj.tags,
tags = base_lemma.paramobj.tags,
Line 261: Line 360:
nocat = true,
nocat = true,
text_classes = data.text_classes,
text_classes = data.text_classes,
})
}))
ins("<span class='" .. text_classes .. "'>")
insert(parts, "<span class='" .. text_classes .. "'>")
end
end
end
end
Line 268: Line 367:
-- final colon when there are multiple tag sets in tagged_inflections().
-- final colon when there are multiple tag sets in tagged_inflections().
if data.posttext then
if data.posttext then
ins(data.posttext)
insert(parts, data.posttext)
end
end
ins("</span>")
insert(parts, "</span>")
return table.concat(parts)
return concat(parts)
end
end
 
format_form_of = export.format_form_of


--[==[
--[==[
Return true if `tag` contains an internal link or HTML.
Return true if `tag` contains an internal link or HTML.]==]
]==]
function export.is_link_or_html(tag)
function export.is_link_or_html(tag)
return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)
return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)
end
end
 
is_link_or_html = export.is_link_or_html


--[==[
--[==[
Line 298: Line 396:


This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
[[Module:form of/data]] (which includes more common non-lang-specific tags) and finally (only if the tag is not
[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and finally (only if the tag is not
recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data2]].
recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data/2]].


If the expansion is a string and is different from the tag, track it if `do_track` is true.
If the expansion is a string and is different from the tag, track it if `do_track` is true.]==]
]==]
function export.lookup_shortcut(tag, lang, do_track)
function export.lookup_shortcut(tag, lang, do_track)
-- If there is HTML or a link in the tag, return it directly; don't try
-- If there is HTML or a link in the tag, return it directly; don't try
-- to look it up, which will fail.
-- to look it up, which will fail.
if tag == ";" or tag:find("//", nil, true) or export.is_link_or_html(tag) then
if tag == ";" or tag:find("//", nil, true) or is_link_or_html(tag) then
return tag
return tag
end
end
local expansion
local expansion
local langcode = lang and lang:getCode()
while lang do
if langcode and export.langs_with_lang_specific_tags[langcode] then
local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())
local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)
-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
-- optimization; see below.
-- optimization; see below.
if langdata.tags[tag] then
if langdata then
return tag
end
expansion = langdata.shortcuts[tag]
end
if not expansion and lang then
-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
local full_langcode = lang:getFullCode()
if full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then
local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)
-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
-- optimization; see below.
if langdata.tags[tag] then
if langdata.tags[tag] then
return tag
return tag
end
end
expansion = langdata.shortcuts[tag]
expansion = langdata.shortcuts[tag]
if expansion then
break
end
end
end
-- If the language has a parent (i.e. a superordinate variety), try again with that.
lang = lang:getParent()
end
end
if not expansion then
if not expansion then
local m_data = mw.loadData(export.form_of_data_module)
-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause
-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause
-- [[Module:form of/data2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data]] --
-- [[Module:form of/data/2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data/1]] --
-- or, for that matter, in [[Module:form of/data2]]). This is an optimization; the code will still work without
-- or, for that matter, in [[Module:form of/data/2]]). This is an optimization; the code will still work without
-- it, but will use up more memory.
-- it, but will use up more memory.
if m_data.tags[tag] then
if (m_data1 or get_m_data1()).tags[tag] then
return tag
return tag
end
end
expansion = m_data.shortcuts[tag]
expansion = m_data1.shortcuts[tag]
end
end
if not expansion then
if not expansion then
local m_data2 = mw.loadData(export.form_of_data2_module)
expansion = (m_data2 or get_m_data2()).shortcuts[tag]
expansion = m_data2.shortcuts[tag]
end
end
if not expansion then
if not expansion then
return tag
return tag
end
end
 
return expansion
return expansion
end
end
 
lookup_shortcut = export.lookup_shortcut


--[==[
--[==[
Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return
Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return
nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
[[Module:form of/data]] (which includes more common non-lang-specific tags) and then finally in
[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and then finally in
[[Module:form of/data2]].
[[Module:form of/data/2]].]==]
]==]
function export.lookup_tag(tag, lang)
function export.lookup_tag(tag, lang)
local langcode = lang and lang:getCode()
while lang do
if langcode and export.langs_with_lang_specific_tags[langcode] then
local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())
local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)
local tag = langdata and langdata.tags[tag]
if langdata.tags[tag] then
if tag then
return langdata.tags[tag]
return tag
end
end
local full_langcode = lang and lang:getFullCode()
if full_langcode and full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then
-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)
if langdata.tags[tag] then
return langdata.tags[tag]
end
end
-- If the language has a parent (i.e. a superordinate variety), try again with that.
lang = lang:getParent()
end
end
local m_data = mw.loadData(export.form_of_data_module)
local tagobj = (m_data1 or get_m_data1()).tags[tag]
local tagobj = m_data.tags[tag]
if tagobj then
if tagobj then
return tagobj
return tagobj
end
end
local m_data2 = mw.loadData(export.form_of_data2_module)
local tagobj2 = (m_data2 or get_m_data2()).tags[tag]
local tagobj2 = m_data2.tags[tag]
if tagobj2 then
if tagobj2 then
return tagobj2
return tagobj2
Line 390: Line 469:
return nil
return nil
end
end
 
lookup_tag = export.lookup_tag


-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list
-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list
-- shortcut.
-- shortcut.
local function normalize_single_tag(tag, lang, do_track)
local function normalize_single_tag(tag, lang, do_track)
local expansion = export.lookup_shortcut(tag, lang, do_track)
local expansion = lookup_shortcut(tag, lang, do_track)
if type(expansion) ~= "string" then
if type(expansion) ~= "string" then
error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")
error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")
Line 402: Line 481:
return tag
return tag
end
end


--[=[
--[=[
Line 413: Line 491:
-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't
-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't
-- expect to occur in links or HTML; see comment in normalize_tag().
-- expect to occur in links or HTML; see comment in normalize_tag().
if export.is_link_or_html(tag) then
if is_link_or_html(tag) then
return tag
return tag
end
end
local components = rsplit(tag, ":", true)
local components = split(tag, ":", true)
if #components == 1 then
if #components == 1 then
-- We allow list-tag shortcuts inside of multipart tags, e.g.
-- We allow list-tag shortcuts inside of multipart tags, e.g.
-- '1s//3p'. Check for this now.
-- '1s//3p'. Check for this now.
tag = export.lookup_shortcut(tag, lang, do_track)
tag = lookup_shortcut(tag, lang, do_track)
if type(tag) == "table" then
if type(tag) == "table" then
-- We found a list-tag shortcut; treat as if colon-separated.
-- We found a list-tag shortcut; treat as if colon-separated.
Line 429: Line 507:
end
end
local normtags = {}
local normtags = {}
for _, component in ipairs(components) do
for _, component in ipairs(components) do
table.insert(normtags, normalize_single_tag(component, lang, do_track))
insert(normtags, normalize_single_tag(component, lang, do_track))
end
end


return normtags
return normtags
end
end


--[=[
--[=[
Line 450: Line 528:
return tag
return tag
end
end
local split_tags = rsplit(tag, "//", true)
local split_tags = split(tag, "//", true)
if #split_tags == 1 then
if #split_tags == 1 then
local retval = normalize_multipart_component(tag, lang, do_track)
local retval = normalize_multipart_component(tag, lang, do_track)
Line 462: Line 540:
local normtags = {}
local normtags = {}
for _, single_tag in ipairs(split_tags) do
for _, single_tag in ipairs(split_tags) do
table.insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
 
insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
end
end
return normtags
return normtags
end
end


--[==[
--[==[
Line 486: Line 564:
Example 3:
Example 3:


{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}
{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}]==]
]==]
function export.normalize_tag_set(tag_set, lang, do_track)
function export.normalize_tag_set(tag_set, lang, do_track)
-- We track usage of shortcuts, normalized forms and (in the case of multipart tags or list tags) intermediate
-- forms. For example, if the tags 1s|mn|gen|indefinite are passed in, we track the following:
-- [[Wiktionary:Tracking/inflection of/tag/1s]]
-- [[Wiktionary:Tracking/inflection of/tag/1]]
-- [[Wiktionary:Tracking/inflection of/tag/s]]
-- [[Wiktionary:Tracking/inflection of/tag/first-person]]
-- [[Wiktionary:Tracking/inflection of/tag/singular]]
-- [[Wiktionary:Tracking/inflection of/tag/mn]]
-- [[Wiktionary:Tracking/inflection of/tag/m//n]]
-- [[Wiktionary:Tracking/inflection of/tag/m]]
-- [[Wiktionary:Tracking/inflection of/tag/n]]
-- [[Wiktionary:Tracking/inflection of/tag/masculine]]
-- [[Wiktionary:Tracking/inflection of/tag/neuter]]
-- [[Wiktionary:Tracking/inflection of/tag/gen]]
-- [[Wiktionary:Tracking/inflection of/tag/genitive]]
-- [[Wiktionary:Tracking/inflection of/tag/indefinite]]
local output_tag_set = {}
local output_tag_set = {}
local saw_semicolon = false
local saw_semicolon = false


for _, tag in ipairs(tag_set) do
for _, tag in ipairs(tag_set) do
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- of tags).
-- of tags).
tag = export.lookup_shortcut(tag, lang, do_track)
tag = lookup_shortcut(tag, lang, do_track)
if type(tag) == "table" then
if type(tag) == "table" then
saw_semicolon = m_table.contains(tag, ";")
if contains(tag, ";") then
if saw_semicolon then
-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single
-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single
-- tag set into multiple.
-- tag set into multiple.
saw_semicolon = true
break
break
end
end


for _, t in ipairs(tag) do
for _, t in ipairs(tag) do
table.insert(output_tag_set, normalize_tag(t, lang, do_track))
 
insert(output_tag_set, normalize_tag(t, lang, do_track))
end
end
else
else
table.insert(output_tag_set, normalize_tag(tag, lang, do_track))
insert(output_tag_set, normalize_tag(tag, lang, do_track))
end
end
end
end
Line 518: Line 612:


-- Use a more general algorithm that handles conjoined shortcuts.
-- Use a more general algorithm that handles conjoined shortcuts.
local output_tag_set = {}
output_tag_set = {}
for i, tag in ipairs(tag_set) do
for i, tag in ipairs(tag_set) do
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- of tags).
-- of tags).
tag = export.lookup_shortcut(tag, lang, do_track)
tag = lookup_shortcut(tag, lang, do_track)
if type(tag) == "table" then
if type(tag) == "table" then
local output_tag_sets = {}
local output_tag_sets = {}
local shortcut_tag_sets = export.split_tag_set(tag)
local shortcut_tag_sets = split_tag_set(tag)
local normalized_shortcut_tag_sets = {}
local normalized_shortcut_tag_sets = {}
for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do
for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do
m_table.extendList(normalized_shortcut_tag_sets,
extend(normalized_shortcut_tag_sets,
export.normalize_tag_set(shortcut_tag_set, lang, do_track))
normalize_tag_set(shortcut_tag_set, lang, do_track))
end
end
local after_tags = slice(tag_set, i + 1)
local after_tags = slice(tag_set, i + 1)
local normalized_after_tags_sets = export.normalize_tag_set(after_tags, lang, do_track)
local normalized_after_tags_sets = normalize_tag_set(after_tags, lang, do_track)
for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do
for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do
for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do
for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do
table.insert(output_tag_sets, m_table.append(output_tag_set, normalized_shortcut_tag_set,
insert(output_tag_sets, append(output_tag_set, normalized_shortcut_tag_set,
normalized_after_tags_set))
normalized_after_tags_set))
end
end
Line 541: Line 636:
return output_tag_sets
return output_tag_sets
else
else
table.insert(output_tag_set, normalize_tag(tag, lang, do_track))
insert(output_tag_set, normalize_tag(tag, lang, do_track))
end
end
end
end
Line 547: Line 642:
error("Internal error: Should not get here")
error("Internal error: Should not get here")
end
end
 
normalize_tag_set = export.normalize_tag_set
 
function export.combine_multipart_tags(tag_set)
for i, tag in ipairs(tag_set) do
if type(tag) == "table" then
for j, subtag in ipairs(tag) do
if type(subtag) == "table" then
tag[j] = table.concat(subtag, ":")
end
end
tag_set[i] = table.concat(tag, "//")
end
end
 
return tag_set
end
 
 
function export.normalize_tags(tags, lang, recombine_multitags, do_track)
local tag_sets = export.normalize_tag_set(tags, lang, do_track)
if recombine_multitags then
for i, tag_set in ipairs(tag_sets) do
tag_sets[i] = export.combine_multipart_tags(tag_set)
end
return export.combine_tag_sets(tag_sets)
end
return tag_sets
end
 


--[==[
--[==[
Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.]==]
Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.
NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
`def:s//p` (or the reverse, `s//def:p`).
]==]
function export.split_two_level_multipart_tag_set(tag_set)
for i, tag in ipairs(tag_set) do
if type(tag) == "table" then
-- We saw a multipart tag. Check if any of the parts are two-level.
local saw_two_level_tag = false
for _, first_level_tag in ipairs(tag) do
if type(first_level_tag) == "table" then
saw_two_level_tag = true
break
end
end
if saw_two_level_tag then
-- We found a two-level multipart tag.
-- (1) Extract the preceding tags.
local pre_tags = slice(tag_set, 1, i - 1)
-- (2) Extract the following tags.
local post_tags = slice(tag_set, i + 1)
-- (3) Loop over each tag set alternant in the two-level multipart tag.
-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
-- and recursively split that tag set.
local resulting_tag_sets = {}
for _, first_level_tag_set in ipairs(tag) do
local expanded_tag_set = {}
m_table.extendList(expanded_tag_set, pre_tags)
-- The second level may have a string or a list.
if type(first_level_tag_set) == "table" then
m_table.extendList(expanded_tag_set, first_level_tag_set)
else
table.insert(expanded_tag_set, first_level_tag_set)
end
m_table.extendList(expanded_tag_set, post_tags)
m_table.extendList(resulting_tag_sets, export.split_two_level_multipart_tag_set(expanded_tag_set))
end
return resulting_tag_sets
end
end
end
 
return {tag_set}
end
 
 
--[==[
Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.
]==]
function export.split_tag_set(tag_set)
function export.split_tag_set(tag_set)
local split_tag_sets = {}
local split_tag_sets = {}
Line 636: Line 652:
if tag == ";" then
if tag == ";" then
if #cur_tag_set > 0 then
if #cur_tag_set > 0 then
table.insert(split_tag_sets, cur_tag_set)
insert(split_tag_sets, cur_tag_set)
end
end
cur_tag_set = {}
cur_tag_set = {}
else
else
table.insert(cur_tag_set, tag)
insert(cur_tag_set, tag)
end
end
end
end
if #cur_tag_set > 0 then
if #cur_tag_set > 0 then
table.insert(split_tag_sets, cur_tag_set)
insert(split_tag_sets, cur_tag_set)
end
end
return split_tag_sets
return split_tag_sets
end
end
 
split_tag_set = export.split_tag_set
export.split_tags_into_tag_sets = export.split_tag_set
 
 
--[==[
Combine multiple tag sets in a tag set group into a simple tag set, with logical tag sets separated by semicolons.
This is the opposite of {split_tag_set()}.
]==]
function export.combine_tag_sets(tag_sets)
if #tag_sets == 1 then
return tag_sets[1]
end
local combined_tag_set = {}
for _, tag_set in ipairs(tag_sets) do
if #combined_tag_set > 0 then
table.insert(combined_tag_set, ";")
end
m_table.extendList(combined_tag_set, tag_set)
end
return tags
end
 


local tag_set_param_mods = {
local tag_set_param_mods = {
Line 675: Line 670:
item_dest = "labels",
item_dest = "labels",
convert = function(arg, parse_err)
convert = function(arg, parse_err)
return rsplit(arg, "//", true)
return split(arg, "//", true)
end,
end,
}
}
}
}


--[==[
--[==[
Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only
Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only
per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be
per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be
attached to the last tag.
attached to the last tag.]==]
]==]
function export.parse_tag_set_properties(tag_set)
function export.parse_tag_set_properties(tag_set)
local function generate_tag_set_obj(last_tag)
local function generate_tag_set_obj(last_tag)
Line 697: Line 690:
-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
if last_tag:find("<") and not last_tag:find("^[^<]*<[a-z]*[^a-z:]") then
if last_tag:find("<", nil, true) and not last_tag:find("^[^<]*<%l*[^%l:]") then
return require(parse_utilities_module).parse_inline_modifiers(last_tag, {
return parse_inline_modifiers(last_tag, {
param_mods = tag_set_param_mods,
param_mods = tag_set_param_mods,
generate_obj = generate_tag_set_obj,
generate_obj = generate_tag_set_obj,
Line 706: Line 699:
end
end
end
end
parse_tag_set_properties = export.parse_tag_set_properties


 
local function normalize_pos(pos)
function export.normalize_pos(pos)
if not pos then
if not pos then
return nil
return nil
end
end
return mw.loadData(export.form_of_pos_module)[pos] or pos
return (m_pos_data or get_m_pos_data())[pos] or pos
end
end


-- Return the display form of a single canonical-form tag. The value
-- Return the display form of a single canonical-form tag. The value
Line 720: Line 712:
-- multipart tag). To handle multipart tags, use get_tag_display_form().
-- multipart tag). To handle multipart tags, use get_tag_display_form().
local function get_single_tag_display_form(normtag, lang)
local function get_single_tag_display_form(normtag, lang)
local data = export.lookup_tag(normtag, lang)
local data = lookup_tag(normtag, lang)
local display = normtag
local display = normtag


Line 729: Line 721:


-- If there is a nonempty glossary index, then show a link to it
-- If there is a nonempty glossary index, then show a link to it
local glossary = data and data[export.GLOSSARY]
local glossary = data and data[(m_data or get_m_data()).GLOSSARY]
if glossary ~= nil then
if glossary ~= nil then
if glossary == export.WIKT then
if glossary == m_data.WIKT then
display = "[[" .. normtag .. "|" .. display .. "]]"
display = "[[" .. normtag .. "|" .. display .. "]]"
elseif glossary == export.WP then
elseif glossary == m_data.WP then
display = "[[w:" .. normtag .. "|" .. display .. "]]"
display = "[[w:" .. normtag .. "|" .. display .. "]]"
elseif glossary == export.APPENDIX then
elseif glossary == m_data.APPENDIX then
display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(normtag) .. "|" .. display .. "]]"
display = "[[Appendix:Glossary#" .. anchor_encode(normtag) .. "|" .. display .. "]]"
elseif type(glossary) ~= "string" then
elseif type(glossary) ~= "string" then
error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
type(glossary), mw.dumpObject(glossary), normtag))
type(glossary), dump(glossary), normtag))
else
else
local link = rmatch(glossary, "^wikt:(.*)")
local link = glossary:match("^wikt:(.*)")
if link then
if link then
display = "[[" .. link .. "|" .. display .. "]]"
display = "[[" .. link .. "|" .. display .. "]]"
end
end
if not link then
if not link then
link = rmatch(glossary, "^w:(.*)")
link = glossary:match("^w:(.*)")
if link then
if link then
display = "[[w:" .. link .. "|" .. display .. "]]"
display = "[[w:" .. link .. "|" .. display .. "]]"
Line 752: Line 744:
end
end
if not link then
if not link then
display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(glossary) .. "|" .. display .. "]]"
display = "[[Appendix:Glossary#" .. anchor_encode(glossary) .. "|" .. display .. "]]"
end
end
end
end
Line 758: Line 750:
return display
return display
end
end


--[==[
--[==[
Line 767: Line 758:
more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
unlikely to change.)
unlikely to change.)]==]
]==]
function export.get_tag_display_form(tagspec, lang, joiner)
function export.get_tag_display_form(tagspec, lang, joiner)
if type(tagspec) == "string" then
if type(tagspec) == "string" then
Line 774: Line 764:
end
end
-- We have a multipart tag. See if there's a display handler to display them specially.
-- We have a multipart tag. See if there's a display handler to display them specially.
for _, handler in ipairs(require(export.form_of_functions_module).display_handlers) do
for _, handler in ipairs(display_handlers or get_display_handlers()) do
local displayval = handler(tagspec, joiner)
local displayval = handler(tagspec, joiner)
if displayval then
if displayval then
Line 784: Line 774:
for _, first_level_tag in ipairs(tagspec) do
for _, first_level_tag in ipairs(tagspec) do
if type(first_level_tag) == "string" then
if type(first_level_tag) == "string" then
table.insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang))
insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang))
else
else
-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
Line 790: Line 780:
local components = {}
local components = {}
for _, component in ipairs(first_level_tag) do
for _, component in ipairs(first_level_tag) do
table.insert(components, get_single_tag_display_form(component, lang))
insert(components, get_single_tag_display_form(component, lang))
end
end
table.insert(displayed_tags, table.concat(components, " "))
insert(displayed_tags, concat(components, " "))
end
end
end
end
return require(export.form_of_functions_module).join_multiparts(displayed_tags, joiner)
return join_multiparts(displayed_tags, joiner)
end
end
 
get_tag_display_form = export.get_tag_display_form


--[==[
--[==[
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
{get_tag_display_form()} for the meaning of `joiner`.
{get_tag_display_form()} for the meaning of `joiner`.]==]
]==]
function export.get_tag_set_display_form(normalized_tag_set, lang, joiner)
function export.get_tag_set_display_form(normalized_tag_set, lang, joiner)
local parts = {}
local parts = {}


for _, tagspec in ipairs(normalized_tag_set) do
for _, tagspec in ipairs(normalized_tag_set) do
local to_insert = export.get_tag_display_form(tagspec, lang, joiner)
local to_insert = get_tag_display_form(tagspec, lang, joiner)
-- Maybe insert a space before inserting the display form of the tag. We insert a space if
-- Maybe insert a space before inserting the display form of the tag. We insert a space if
-- (a) we're not the first tag; and
-- (a) we're not the first tag; and
Line 817: Line 806:
-- (2) all tags with either of the above properties set are single-character tags.
-- (2) all tags with either of the above properties set are single-character tags.
-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,
-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,
-- which won't be found and which will trigger loading of [[Module:form of/data2]]. If multichar punctuation is
-- which won't be found and which will trigger loading of [[Module:form of/data/2]]. If multichar punctuation is
-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.
-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.
--
--
Line 823: Line 812:
-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.
-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.
if #parts > 0 then
if #parts > 0 then
local most_recent_tagobj = ulen(parts[#parts]) == 1 and export.lookup_tag(parts[#parts], lang)
local most_recent_tagobj = parts[#parts]:match("^.[\128-\191]*$") and lookup_tag(parts[#parts], lang)
local to_insert_tagobj = ulen(to_insert) == 1 and export.lookup_tag(to_insert, lang)
local to_insert_tagobj = to_insert:match("^.[\128-\191]*$") and lookup_tag(to_insert, lang)
if (
if (
(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and
(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and
(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)
(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)
) then
) then
table.insert(parts, " ")
insert(parts, " ")
end
end
end
end
table.insert(parts, to_insert)
insert(parts, to_insert)
end
end


return table.concat(parts)
return concat(parts)
end
end
 
get_tag_set_display_form = export.get_tag_set_display_form


--[==[
--[==[
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.
Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,
will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
and `POS` is the user-provided part of speech (which may be {nil}).
and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.
]==]
NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)
`def:s//p` (or the reverse, `s//def:p`).]==]
local m_cats = mw.loadData(export.form_of_cats_module)
local function split_two_level_multipart_tag_set(tag_set)
local categories = {}
for i, tag in ipairs(tag_set) do
local labels = {}
if type(tag) == "table" then
 
-- We saw a multipart tag. Check if any of the parts are two-level.
POS = export.normalize_pos(POS)
local saw_two_level_tag = false
-- First split any two-level multipart tags into multiple sets, to make our life easier.
for _, first_level_tag in ipairs(tag) do
for _, tag_set in ipairs(export.split_two_level_multipart_tag_set(normalized_tag_set)) do
if type(first_level_tag) == "table" then
-- Call a named function, either from the lang-specific data in
saw_two_level_tag = true
-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].
break
local function call_named_function(name, funtype)
local data = {
pagename = pagename or mw.title.getCurrentTitle().subpageText,
lemmas = lemmas,
tag_set = normalized_tag_set,
lang = lang,
POS = POS
}
local modules_tried = {}
local function try_lang_specific_module(langcode)
if export.langs_with_lang_specific_tags[langcode] then
local lang_specific_module = export.form_of_lang_data_module_prefix .. langcode .. "/functions"
local langdata = require(utilities_module).safe_require(lang_specific_module)
if langdata then
table.insert(modules_tried, lang_specific_module)
if langdata.cat_functions then
local fn = langdata.cat_functions[name]
if fn then
return fn(data), true
end
end
end
end
end
return nil, false
end
end
-- First try lang-specific.
if saw_two_level_tag then
local langcode = lang and lang:getCode()
-- We found a two-level multipart tag.
if langcode then
-- (1) Extract the preceding tags.
local retval, found_it = try_lang_specific_module(langcode)
local pre_tags = slice(tag_set, 1, i - 1)
if found_it then
-- (2) Extract the following tags.
return retval
local post_tags = slice(tag_set, i + 1)
-- (3) Loop over each tag set alternant in the two-level multipart tag.
-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
-- and recursively split that tag set.
local resulting_tag_sets = {}
for _, first_level_tag_set in ipairs(tag) do
local expanded_tag_set = {}
extend(expanded_tag_set, pre_tags)
-- The second level may have a string or a list.
if type(first_level_tag_set) == "table" then
extend(expanded_tag_set, first_level_tag_set)
else
insert(expanded_tag_set, first_level_tag_set)
end
extend(expanded_tag_set, post_tags)
extend(resulting_tag_sets, split_two_level_multipart_tag_set(expanded_tag_set))
end
end
return resulting_tag_sets
end
end
-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
end
local full_langcode = lang and lang:getFullCode()
end
if full_langcode and full_langcode ~= langcode then
return {tag_set}
local retval, found_it = try_lang_specific_module(full_langcode)
end
if found_it then
 
return retval
local function try_lang_specific_module(langcode, modules_tried, name, data)
end
local lang_specific_module = form_of_lang_data_module_prefix .. langcode .. "/functions"
end
local langdata = safe_require(lang_specific_module)
-- Try lang-independent.
if langdata then
table.insert(modules_tried, export.form_of_functions_module)
insert(modules_tried, lang_specific_module)
local fn = require(export.form_of_functions_module).cat_functions[name]
if langdata.cat_functions then
local fn = langdata.cat_functions[name]
if fn then
if fn then
return fn(data)
return fn(data), true
end
end
for i, modname in ipairs(modules_tried) do
modules_tried[i] = "[[" .. modname .. "]]"
end
error(("No %s function named '%s' in %s"):format(funtype, name, lang_specific_part,
m_table.serialCommaJoin(modules_tried, {conj = "or", dontTag = true})))
end
end
end
return nil, false
end
-- Call a named function, either from the lang-specific data in
-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].
local function call_named_function(name, funtype, normalized_tag_set, lang, POS, pagename, lemmas)
local data = {
pagename = pagename or default_pagename or get_default_pagename(),
lemmas = lemmas,
tag_set = normalized_tag_set,
lang = lang,
POS = POS
}
local modules_tried = {}
-- First try lang-specific.
while lang do
local retval, found_it = try_lang_specific_module(lang:getCode(), modules_tried, name, data)
if found_it then
return retval
end
-- If the language has a parent (i.e. a superordinate variety), try again with that.
lang = lang:getParent()
end
-- Try lang-independent.
insert(modules_tried, form_of_functions_module)
local fn = (cat_functions or get_cat_functions())[name]
if fn then
return fn(data)
end
for i, modname in ipairs(modules_tried) do
modules_tried[i] = "[[" .. modname .. "]]"
end
error(("No %s function named '%s' in %s"):format(funtype, name, list_to_text(modules_tried, nil, " or ")))
end


-- Given a tag from the current tag set (which may be a list in case of a multipart tag),
-- Given a tag from the current tag set (which may be a list in case of a multipart tag),
-- and a tag from a categorization spec, check that the two match.
-- and a tag from a categorization spec, check that the two match.
-- (1) If both are strings, we just check for equality.
-- (1) If both are strings, we just check for equality.
-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a
-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a
-- multipart tag), we check that the spec tag is in the list. This is because we want to treat
-- multipart tag), we check that the spec tag is in the list. This is because we want to treat
-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.
-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.
-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},
-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},
-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag
-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag
-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.
-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.
-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the
-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the
-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization
-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization
-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"
-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"
-- in it. "1//3" works, as does "1//2//3".
-- in it. "1//3" works, as does "1//2//3".
local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)
local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)
if type(spec_tag) == "table" then
if type(spec_tag) == "table" then
if type(tag_set_tag) == "table" and is_subset(spec_tag, tag_set_tag) then
if type(tag_set_tag) == "table" and is_subset_list(spec_tag, tag_set_tag) then
return true
return true
end
end
elseif type(tag_set_tag) == "table" then
elseif type(tag_set_tag) == "table" then
if m_table.contains(tag_set_tag, spec_tag) then
if contains(tag_set_tag, spec_tag) then
return true
return true
end
elseif tag_set_tag == spec_tag then
return true
end
return false
end
end
elseif tag_set_tag == spec_tag then
return true
end
return false
end


-- Check that the current tag set matches the given spec tag. This means that any of the tags
-- Check that the current tag set matches the given spec tag. This means that any of the tags
-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the
-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the
-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a
-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a
-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more
-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more
-- complex when multipart tags are present.
-- complex when multipart tags are present.
local function tag_set_matches_spec_tag(spec_tag)
local function tag_set_matches_spec_tag(spec_tag, tag_set, lang)
spec_tag = normalize_tag(spec_tag, lang)
spec_tag = normalize_tag(spec_tag, lang)
for _, tag_set_tag in ipairs(tag_set) do
for _, tag_set_tag in ipairs(tag_set) do
if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
return true
return true
end
end
return false
end
end
end
return false
end


-- Check whether the given spec matches the current tag set. Two values are returned:
-- Check whether the given spec matches the current tag set. Two values are returned:
-- (1) whether the spec matches the tag set; (2) the index of the category to add if
-- (1) whether the spec matches the tag set; (2) the index of the category to add if
-- the spec matches.
-- the spec matches.
local function check_condition(spec)
local function check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
if type(spec) == "boolean" then
if type(spec) == "boolean" then
return spec
return spec
elseif type(spec) ~= "table" then
elseif type(spec) ~= "table" then
error("Wrong type of condition " .. spec .. ": " .. type(spec))
error("Wrong type of condition " .. spec .. ": " .. type(spec))
end
local predicate = spec[1]
if predicate == "has" then
return tag_set_matches_spec_tag(spec[2], tag_set, lang), 3
elseif predicate == "hasall" then
for _, tag in ipairs(spec[2]) do
if not tag_set_matches_spec_tag(tag, tag_set, lang) then
return false, 3
end
end
local predicate = spec[1]
end
if predicate == "has" then
return true, 3
return tag_set_matches_spec_tag(spec[2]), 3
elseif predicate == "hasany" then
elseif predicate == "hasall" then
for _, tag in ipairs(spec[2]) do
for _, tag in ipairs(spec[2]) do
if tag_set_matches_spec_tag(tag, tag_set, lang) then
if not tag_set_matches_spec_tag(tag) then
return false, 3
end
end
return true, 3
return true, 3
elseif predicate == "hasany" then
end
for _, tag in ipairs(spec[2]) do
end
if tag_set_matches_spec_tag(tag) then
return false, 3
return true, 3
elseif predicate == "tags=" then
local normalized_spec_tag_sets = normalize_tag_set(spec[2], lang)
if #normalized_spec_tag_sets > 1 then
error("Internal error: No support for conjoined shortcuts in category/label specs in "
.. "[[Module:form of/cats]] when processing spec tag set " .. concat(spec[2], "|"))
end
local normalized_spec_tag_set = normalized_spec_tag_sets[1]
-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove
-- support for two-level multipart tags.
for _, tag in ipairs(normalized_spec_tag_set) do
if type(tag) == "table" then
for _, subtag in ipairs(tag) do
if type(subtag) == "table" then
error("Internal error: No support for two-level multipart tags in category/label specs"
.. "[[Module:form of/cats]] when processing spec tag set "
.. concat(spec[2], "|"))
end
end
end
end
end
end
-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,
-- we first check that both tag set tags and spec tags have the same length. If so, we sort the
-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are
-- in the tag set tags.
if #tag_set ~= #normalized_spec_tag_set then
return false, 3
end
local tag_set_tags = deep_copy(tag_set)
for i=1,#tag_set_tags do
if type(tag_set_tags[i]) == "table" then
sort(tag_set_tags[i])
end
if type(normalized_spec_tag_set[i]) == "table" then
sort(normalized_spec_tag_set[i])
end
end
for i=1,#tag_set_tags do
if not contains(tag_set_tags, normalized_spec_tag_set[i]) then
return false, 3
return false, 3
elseif predicate == "tags=" then
end
local normalized_spec_tag_sets = export.normalize_tag_set(spec[2], lang)
end
if #normalized_spec_tag_sets > 1 then
return true, 3
error("Internal error: No support for conjoined shortcuts in category/label specs in "
elseif predicate == "p=" then
.. "[[Module:form of/cats]] when processing spec tag set " .. table.concat(spec[2], "|"))
return POS == normalize_pos(spec[2]), 3
end
elseif predicate == "pany" then
local normalized_spec_tag_set = normalized_spec_tag_sets[1]
for _, specpos in ipairs(spec[2]) do
-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove
if POS == normalize_pos(specpos) then
-- support for two-level multipart tags.
for _, tag in ipairs(normalized_spec_tag_set) do
if type(tag) == "table" then
for _, subtag in ipairs(tag) do
if type(subtag) == "table" then
error("Internal error: No support for two-level multipart tags in category/label specs"
.. "[[Module:form of/cats]] when processing spec tag set "
.. table.concat(spec[2], "|"))
end
end
end
end
-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,
-- we first check that both tag set tags and spec tags have the same length. If so, we sort the
-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are
-- in the tag set tags.
if #tag_set ~= #normalized_spec_tag_set then
return false, 3
end
local tag_set_tags = m_table.deepcopy(tag_set)
for i=1,#tag_set_tags do
if type(tag_set_tags[i]) == "table" then
table.sort(tag_set_tags[i])
end
if type(normalized_spec_tag_set[i]) == "table" then
table.sort(normalized_spec_tag_set[i])
end
end
for i=1,#tag_set_tags do
if not m_table.contains(tag_set_tags, normalized_spec_tag_set[i]) then
return false, 3
end
end
return true, 3
return true, 3
elseif predicate == "p=" then
return POS == export.normalize_pos(spec[2]), 3
elseif predicate == "pany" then
for _, specpos in ipairs(spec[2]) do
if POS == export.normalize_pos(specpos) then
return true, 3
end
end
return false, 3
elseif predicate == "pexists" then
return POS ~= nil, 2
elseif predicate == "not" then
local condval = check_condition(spec[2])
return not condval, 3
elseif predicate == "and" then
local condval = check_condition(spec[2])
if condval then
condval = check_condition(spec[3])
end
return condval, 4
elseif predicate == "or" then
local condval = check_condition(spec[2])
if not condval then
condval = check_condition(spec[3])
end
return condval, 4
elseif predicate == "call" then
return fn(call_named_function(spec[2], "condition")), 3
else
error("Unrecognized predicate: " .. predicate)
end
end
end
end
return false, 3
elseif predicate == "pexists" then
return POS ~= nil, 2
elseif predicate == "not" then
local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
return not condval, 3
elseif predicate == "and" then
local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
if condval then
condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
end
return condval, 4
elseif predicate == "or" then
local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
if not condval then
condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
end
return condval, 4
elseif predicate == "call" then
return call_named_function(spec[2], "condition", normalized_tag_set, lang, POS, pagename, lemmas), 3
else
error("Unrecognized predicate: " .. predicate)
end
end


-- Process a given spec. This checks any conditions in the spec against the
-- Process a given spec. This checks any conditions in the spec against the
-- tag set, and insert any resulting categories into `categories`. Return value
-- tag set, and insert any resulting categories into `categories`. Return value
-- is true if the outermost condition evaluated to true and a category was inserted
-- is true if the outermost condition evaluated to true and a category was inserted
-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates
-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates
-- to true).
-- to true).
local function process_spec(spec)
local function process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
if not spec then
if not spec then
return false
return false
elseif type(spec) == "string" then
elseif type(spec) == "string" then
-- A category. Substitute POS request with user-specified part of speech or default.
-- A category. Substitute POS request with user-specified part of speech or default.
spec = rsub(spec, "<<p=(.-)>>", function(default)
spec = spec:gsub("<<p=(.-)>>", function(default)
return POS or export.normalize_pos(default)
return POS or normalize_pos(default)
end)
end)
table.insert(categories, lang:getFullName() .. " " .. spec)
insert(categories, lang:getFullName() .. " " .. spec)
return true
elseif type(spec) == "table" and spec.labels then
-- A label spec.
for _, label in ipairs(spec.labels) do
insert_if_not(labels, label)
end
return true
elseif type(spec) ~= "table" then
error("Wrong type of specification " .. spec .. ": " .. type(spec))
end
local predicate = spec[1]
if predicate == "multi" then
for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.
process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
end
return true
elseif predicate == "cond" then
for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.
if process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels) then
return true
return true
elseif type(spec) == "table" and spec.labels then
-- A label spec.
for _, label in ipairs(spec.labels) do
m_table.insertIfNot(labels, label)
end
return true
elseif type(spec) ~= "table" then
error("Wrong type of specification " .. spec .. ": " .. type(spec))
end
local predicate = spec[1]
if predicate == "multi" then
-- WARNING! #spec doesn't work for objects loaded from loadData()
for i, sp in ipairs(spec) do
if i > 1 then
process_spec(sp)
end
end
return true
elseif predicate == "cond" then
-- WARNING! #spec doesn't work for objects loaded from loadData()
for i, sp in ipairs(spec) do
if i > 1 and process_spec(sp) then
return true
end
end
return false
elseif predicate == "call" then
return process_spec(call_named_function(spec[2], "spec"))
else
local condval, ifspec = check_condition(spec)
if condval then
process_spec(spec[ifspec])
return true
else
process_spec(spec[ifspec + 1])
-- FIXME: Are we sure this is correct?
return false
end
end
end
end
end
return false
elseif predicate == "call" then
return process_spec(
call_named_function(spec[2], "spec", normalized_tag_set, lang, POS, pagename, lemmas),
tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels
)
else
local condval, ifspec = check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
if condval then
process_spec(spec[ifspec], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
return true
else
process_spec(spec[ifspec + 1], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
-- FIXME: Are we sure this is correct?
return false
end
end
end


--[==[
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.
Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,
and `POS` is the user-provided part of speech (which may be {nil}).]==]
function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)
local categories, labels = {}, {}
POS = normalize_pos(POS)
-- First split any two-level multipart tags into multiple sets, to make our life easier.
for _, tag_set in ipairs(split_two_level_multipart_tag_set(normalized_tag_set)) do
local langcode = lang:getCode()
local langcode = lang:getCode()
local langspecs = m_cats[langcode]
local langspecs = (m_cats_data or get_m_cats_data())[langcode]
if langspecs then
if langspecs then
for _, spec in ipairs(langspecs) do
for _, spec in ipairs(langspecs) do
process_spec(spec)
process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
end
end
end
end
local full_code = lang:getFullCode()
local full_code = lang:getFullCode()
if full_code ~= langcode then
if full_code ~= langcode then
local langspecs = m_cats[full_code]
local langspecs = (m_cats_data or get_m_cats_data())[full_code]
if langspecs then
if langspecs then
for _, spec in ipairs(langspecs) do
for _, spec in ipairs(langspecs) do
process_spec(spec)
process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
end
end
end
end
end
end
if full_code ~= "und" then
if full_code ~= "und" then
local langspecs = m_cats["und"]
local langspecs = (m_cats_data or get_m_cats_data())["und"]
if langspecs then
if langspecs then
for _, spec in ipairs(langspecs) do
for _, spec in ipairs(langspecs) do
process_spec(spec)
process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
end
end
end
end
end
end
end
end
return categories, labels
return categories, labels
end
end
fetch_categories_and_labels = export.fetch_categories_and_labels


local function format_labels(labels, data, notext)
if labels and #labels > 0 then
return show_labels{
labels = labels,
lang = data.lang,
sort = data.sort,
nocat = data.nocat
} .. (notext and (data.pretext or "") == "" and "" or " ")
else
return ""
end
end


--[==[
--[==[
Line 1,206: Line 1,240:
`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also
`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also
suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is
suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is
set to {true}).
set to {true}).]==]
]==]
function export.tagged_inflections(data)
function export.tagged_inflections(data)
if not data.tags and not data.tag_sets then
if not data.tags and not data.tag_sets then
Line 1,217: Line 1,250:
local tag_sets = data.tag_sets
local tag_sets = data.tag_sets
if not tag_sets then
if not tag_sets then
tag_sets = export.split_tag_set(data.tags)
tag_sets = split_tag_set(data.tags)
for i, tag_set in ipairs(tag_sets) do
for i, tag_set in ipairs(tag_sets) do
tag_sets[i] = export.parse_tag_set_properties(tag_set)
tag_sets[i] = parse_tag_set_properties(tag_set)
end
end
end
end
Line 1,226: Line 1,259:
local categories = {}
local categories = {}
for _, tag_set in ipairs(tag_sets) do
for _, tag_set in ipairs(tag_sets) do
local normalized_tag_sets = export.normalize_tag_set(tag_set.tags, data.lang, "do-track")
local normalized_tag_sets = normalize_tag_set(tag_set.tags, data.lang, "do-track")


for _, normalized_tag_set in ipairs(normalized_tag_sets) do
for _, normalized_tag_set in ipairs(normalized_tag_sets) do
local cur_infl = {}
local this_categories, this_labels = fetch_categories_and_labels(normalized_tag_set, data.lang,
local this_categories, this_labels = export.fetch_categories_and_labels(normalized_tag_set, data.lang,
data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)
data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)
if not data.nocat then
if not data.nocat then
m_table.extendList(categories, this_categories)
extend(categories, this_categories)
end
end
local cur_infl = export.get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner)
local cur_infl = get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner)
if #cur_infl > 0 then
if #cur_infl > 0 then
if tag_set.labels then
if tag_set.labels then
this_labels = m_table.append(tag_set.labels, this_labels)
this_labels = append(tag_set.labels, this_labels)
end
end
table.insert(inflections, {infl_text = cur_infl, labels = this_labels})
insert(inflections, {infl_text = cur_infl, labels = this_labels})
end
end
end
end
Line 1,249: Line 1,281:
if overall_labels == nil then
if overall_labels == nil then
overall_labels = inflection.labels
overall_labels = inflection.labels
elseif not m_table.deepEquals(overall_labels, inflection.labels) then
elseif not deep_equals(overall_labels, inflection.labels) then
need_per_tag_set_labels = true
need_per_tag_set_labels = true
overall_labels = nil
overall_labels = nil
Line 1,262: Line 1,294:
end
end


local format_data = m_table.shallowcopy(data)
local format_data = shallow_copy(data)
 
local function format_labels(labels, notext)
if labels and #labels > 0 then
return require(labels_module).show_labels { labels = labels, lang = data.lang, sort = data.sort, nocat = data.nocat } ..
(notext and (data.pretext or "") == "" and "" or " ")
else
return ""
end
end


local of_text = data.lemmas and " of" or ""
local of_text = data.lemmas and " of" or ""
Line 1,279: Line 1,302:
error("Internal error: need_per_tag_set_labels should not be set with one inflection")
error("Internal error: need_per_tag_set_labels should not be set with one inflection")
end
end
format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
((data.capfirst and require("Module:string utilities").ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
((data.capfirst and ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
formatted_text = export.format_form_of(format_data)
formatted_text = format_form_of(format_data)
else
else
format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
((data.capfirst and "Inflection" or "inflection") .. of_text))
((data.capfirst and "Inflection" or "inflection") .. of_text))
format_data.posttext = (data.posttext or "") .. ":"
format_data.posttext = (data.posttext or "") .. ":"
local link = export.format_form_of(format_data)
local link = format_form_of(format_data)
local text_classes = data.text_classes or "form-of-definition use-with-mention"
local text_classes = data.text_classes or "form-of-definition use-with-mention"
for i, inflection in ipairs(inflections) do
for i, inflection in ipairs(inflections) do
inflections[i] = "\n## " .. format_labels(inflection.labels, false) ..
inflections[i] = "\n## " .. format_labels(inflection.labels, data, false) ..
"<span class='" .. text_classes .. "'>" .. inflection.infl_text .. "</span>"
"<span class='" .. text_classes .. "'>" .. inflection.infl_text .. "</span>"
end
end
formatted_text = link .. table.concat(inflections)
formatted_text = link .. concat(inflections)
end
end


if not data.no_format_categories then
if not data.no_format_categories then
if #categories > 0 then
if #categories > 0 then
formatted_text = formatted_text .. require("Module:utilities").format_categories(categories, data.lang,
formatted_text = formatted_text .. format_categories(categories, data.lang,
data.sort, nil, export.force_cat)
data.sort, nil, export.force_cat)
end
end
Line 1,304: Line 1,327:
return formatted_text, categories
return formatted_text, categories
end
end
 
tagged_inflections = export.tagged_inflections
 
--[==[
Given a tag set, return a flattened list all Wikidata ID's of all tags in the tag set. FIXME: Only used in a debugging
function in [[Module:se-verbs]]; move there.
]==]
function export.to_Wikidata_IDs(tag_set, lang, skip_tags_without_ids)
local ret = {}
 
local function get_wikidata_id(tag)
local data = export.lookup_tag(tag, lang)
 
if not data or not data[export.WIKIDATA] then
if not skip_tags_without_ids then
error('The tag "' .. tag .. '" does not have a Wikidata ID defined in the form-of data modules')
else
return nil
end
else
return ("Q%s"):format(data[export.WIKIDATA])
end
end
 
local normalized_tag_sets = export.normalize_tag_set(tag_set, lang)
for _, tag_set in ipairs(normalized_tag_sets) do
for _, tag in ipairs(tag_set) do
if type(tag) == "table" then
for _, subtag in ipairs(tag) do
if type(subtag) == "table" then
-- two-level multipart tag; FIXME: delete support for this
for _, subsubtag in ipairs(subtag) do
table.insert(ret, get_wikidata_id(subsubtag))
end
else
table.insert(ret, get_wikidata_id(subtag))
end
end
else
table.insert(ret, get_wikidata_id(tag))
end
end
end
 
return ret
end
 


function export.dump_form_of_data(frame)
function export.dump_form_of_data(frame)
local data = {
local data = {
data = require(export.form_of_data_module),
require(form_of_data1_module),
data2 = require(export.form_of_data2_module)
require(form_of_data2_module)
}
}
return require("Module:JSON").toJSON(data)
return require(json_module).toJSON(data)
end
 
 
function export.finalize_tag_data(tags, shortcuts)
local function process_shortcut(name, shortcut)
-- If the shortcut is already in the list, then there is a duplicate.
if shortcuts[shortcut] then
error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing shortcut for the tag \"" .. shortcuts[shortcut] .. "\".")
elseif tags[shortcut] then
error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing tag with that name.")
end
 
shortcuts[shortcut] = name
end
for name, data in pairs(tags) do
local data_shortcuts = data[export.SHORTCUTS]
if data_shortcuts then
if type(data_shortcuts) == "string" then
process_shortcut(name, data_shortcuts)
else
for _, shortcut in ipairs(data_shortcuts) do
process_shortcut(name, shortcut)
end
end
end
end
end
end


export.form_of_cats_module = form_of_cats_module
export.form_of_data1_module = form_of_data1_module
export.form_of_data2_module = form_of_data2_module
export.form_of_functions_module = form_of_functions_module
export.form_of_lang_data_module_prefix = form_of_lang_data_module_prefix
export.form_of_pos_module = form_of_pos_module


return export
return export

Navigation menu