Module:form of: Difference between revisions

(19 intermediate revisions by the same user not shown)

Line 1:

local ~~m_links~~ = ~~require(~~"Module:~~links~~")

local export = {}

local ~~m_table~~ = ~~require(~~"Module:~~table~~")

local ~~m_pos~~ = ~~mw.loadData(~~"Module:form of/~~pos~~")

export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages

local ~~m_functions~~ = ~~require(~~"Module:form of/functions")

local debug_track_module = "Module:debug/track"

local etymology_module = "Module:etymology"

local form_of_cats_module = "Module:form of/cats"

local form_of_data_module = "Module:form of/data"

local form_of_data1_module = "Module:form of/data/1"

local form_of_data2_module = "Module:form of/data/2"

local form_of_functions_module = "Module:form of/functions"

local form_of_lang_data_module_prefix = "Module:form of/lang-data/"

local function_module = "Module:fun"

local headword_data_module = "Module:headword/data"

local json_module = "Module:JSON"

local labels_module = "Module:labels"

local links_module = "Module:links"

local load_module = "Module:load"

local parse_utilities_module = "Module:parse utilities"

local string_utilities_module = "Module:string utilities"

local table_module = "Module:table"

local table_deep_equals_module = "Module:table/deepEquals"

local utilities_module = "Module:utilities"

local anchor_encode = mw.uri.anchorEncode

local concat = table.concat

local dump = mw.dumpObject

local fetch_categories_and_labels -- Defined below.

local format_form_of -- Defined below.

local get_tag_display_form -- Defined below.

local get_tag_set_display_form -- Defined below.

local insert = table.insert

local ipairs = ipairs

local is_link_or_html -- Defined below.

local list_to_text = mw.text.listToText

local lookup_shortcut -- Defined below.

local lookup_tag -- Defined below.

local normalize_tag_set -- Defined below.

local parse_tag_set_properties -- Defined below.

local require = require

local sort = table.sort

local split_tag_set -- Defined below.

local type = type

--[==[

Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]

local function append(...)

append = require(table_module).append

return append(...)

end

local function contains(...)

contains = require(table_module).contains

return contains(...)

end

local function debug_track(...)

debug_track = require(debug_track_module)

return debug_track(...)

end

local function deep_copy(...)

deep_copy = require(table_module).deepCopy

return deep_copy(...)

end

local function deep_equals(...)

deep_equals = require(table_deep_equals_module)

return deep_equals(...)

end

local function extend(...)

extend = require(table_module).extend

return extend(...)

end

local function format_categories(...)

format_categories = require(utilities_module).format_categories

return format_categories(...)

end

local function full_link(...)

full_link = require(links_module).full_link

return full_link(...)

end

local function insert_if_not(...)

insert_if_not = require(table_module).insertIfNot

return insert_if_not(...)

end

local function is_subset_list(...)

is_subset_list = require(table_module).isSubsetList

return is_subset_list(...)

end

local function iterate_from(...)

iterate_from = require(function_module).iterateFrom

return iterate_from(...)

end

local function join_multiparts(...)

join_multiparts = require(form_of_functions_module).join_multiparts

return join_multiparts(...)

end

local function load_data(...)

load_data = require(load_module).load_data

return load_data(...)

end

local function parse_inline_modifiers(...)

parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers

return parse_inline_modifiers(...)

end

local function remove_links(...)

remove_links = require(links_module).remove_links

return remove_links(...)

end

local function safe_load_data(...)

safe_load_data = require(load_module).safe_load_data

return safe_load_data(...)

end

local function safe_require(...)

safe_require = require(load_module).safe_require

return safe_require(...)

end

local function serial_comma_join(...)

serial_comma_join = require(table_module).serialCommaJoin

return serial_comma_join(...)

end

local function shallow_copy(...)

shallow_copy = require(table_module).shallowCopy

return shallow_copy(...)

end

local function show_labels(...)

show_labels = require(labels_module).show_labels

return show_labels(...)

end

local function slice(...)

slice = require(table_module).slice

return slice(...)

end

local function split(...)

split = require(string_utilities_module).split

return split(...)

end

local function ucfirst(...)

ucfirst = require(string_utilities_module).ucfirst

return ucfirst(...)

end

--[==[

Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]

local cat_functions

local function get_cat_functions()

cat_functions, get_cat_functions = require(form_of_functions_module).cat_functions, nil

return cat_functions

end

local default_pagename

local function get_default_pagename()

default_pagename, get_default_pagename = load_data(headword_data_module).pagename, nil

return default_pagename

end

local display_handlers

local function get_display_handlers()

display_handlers, get_display_handlers = require(form_of_functions_module).display_handlers, nil

return display_handlers

end

local m_cats_data

local function get_m_cats_data()

m_cats_data, get_m_cats_data = load_data(form_of_cats_module), nil

return m_cats_data

end

local ~~ulen = mw.ustring.len~~

local m_data

local ~~rsubn = mw~~.~~ustring.gsub~~

local function get_m_data()

~~local rmatch~~ = ~~mw.ustring.match~~

-- Needs require.

~~local rsplit = mw.text.split~~

m_data, get_m_data = require(form_of_data_module), nil

return m_data

end

local ~~export~~ = {}

local m_data1

local function get_m_data1()

m_data1, get_m_data1 = load_data(form_of_data1_module), nil

return m_data1

end

local m_data2

local function get_m_data2()

m_data2, get_m_data2 = load_data(form_of_data2_module), nil

return m_data2

end

local m_pos_data

local function get_m_pos_data()

m_pos_data, get_m_pos_data = load_data(headword_data_module).pos_aliases, nil

return m_pos_data

end

--[=[

--[==[ intro:

This module implements the underlying processing of {{form of}},

This module implements the underlying processing of {{tl|form of}}, {{tl|inflection of}} and specific variants such as

{{inflection of}} and specific variants such as {{past participle of}}

{{tl|past participle of}} and {{tl|alternative spelling of}}. Most of the logic in this file is to handle tags in

and {{alternative spelling of}}. Most of the logic in this file is to

{{tl|inflection of}}. Other related files:

handle tags in {{inflection of}}. Other related files:

* [[Module:form of/templates]] contains the majority of the logic that

* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.

implements the templates themselves.

* [[Module:form of/data/1]] is a data-only file containing information on the more common inflection tags, listing the

* [[Module:form of/data]] is a data-only file containing information on

tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate

the more common inflection tags, listing the tags, their shortcuts,

glossary link and the wikidata ID.

the category they belong to (tense-aspect, case, gender, voice-valence,

* [[Module:form of/data/2]] is a data-only file containing information on the less common inflection tags, in the same

etc.), the appropriate glossary link and the wikidata ID.

format as [[Module:form of/data/1]].

* [[Module:form of/~~data2~~]] is a data-only file containing information on

* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection

the less common inflection tags, in the same format as

tags for the language with code LANGCODE, in the same format as [[Module:form of/data/1]]. Language-specific tags

[[Module:form of/data]].

override general tags.

* [[Module:form of/~~cats~~]] is a data-only file ~~listing~~ the

* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the

language-specific ~~categories that are added when the appropriate~~

appropriate combinations of tags are seen for a given language.

~~combinations of~~ tags ~~are seen for a given language~~.

* [[Module:form of/functions]] contains functions for use with [[Module:form of/data/1]] and [[Module:form of/cats]].

* [[Module:form of/~~pos~~]] is a data-only file listing the ~~recognized~~

They are contained in this module because data-only modules can't contain code. The functions in this file are of two

~~parts~~ of ~~speech and their abbreviations, used~~ for ~~categorization.~~

types:

~~FIXME: This should be unified with the parts of speech listed in~~

*# Display handlers allow for customization of the display of multipart tags (see below). Currently there is only

~~[[Module:links]]~~.

one such handler, for handling multipart person tags such as `1//2//3`.

* [[Module:form of/functions]] contains functions for use with

*# Cat functions allow for more complex categorization logic, and are referred to by name in [[Module:form of/cats]].

[[Module:form of/data]] and [[Module:form of/cats]]. They are

Currently no such functions exist.

contained in this module because data-only modules can't contain

code. The functions in this file are of two types:

~~(1) Display handlers allow for customization of the display of~~

The following terminology is used in conjunction with {{tl|inflection of}}:

~~multipart tags (see below). Currently there~~ is ~~only one~~

~~such handler, for handling multipart person tags such as~~

~~'1//2//3'.~~

~~(2) Cat functions allow for more complex categorization logic,~~

~~and are referred to by name~~ in ~~[[Module~~:~~form of/cats]].~~

~~Currently no such functions exist.~~

The ~~following terminology~~ is used in ~~conjunction~~ with {{~~inflection~~ of}}:

* A ''tag'' is a single grammatical item, as specified in a single numbered parameter of {{tl|inflection of}}. Examples

are `masculine`, `nominative`, or `first-person`. Tags may be abbreviated, e.g. `m` for `masculine`, `nom` for

`nominative`, or `1` for `first-person`. Such abbreviations are called ''aliases'', and some tags have multiple

equivalent aliases (e.g. `p` or `pl` for `plural`). The full, non-abbreviated form of a tag is called its

''canonical form''.

* The ''display form'' of a tag is the way it's displayed to the user. Usually the displayed text of the tag is the same

as its canonical form, and it normally functions as a link to a glossary entry explaining the tag. Usually the link is

to an entry in [[Appendix:Glossary]], but sometimes the tag is linked to an individual dictionary entry or to a

Wikipedia entry. Occasionally, the display text differs from the canonical form of the tag. An example is the tag

`comparative case`, which has the display text read as simply `comparative`. Normally, tags referring to cases don't

have the word "case" in them, but in this case the tag `comparative` was already used as an alias for the tag

`comparative degree`, so the tag was named `comparative case` to avoid clashing. A similar situation occurs with

`adverbial case` vs. the grammar tag `adverbial` (as in `adverbial participle`).

* A ''tag set'' is an ordered list of tags, which together express a single inflection, for example, `1|s|pres|ind`,

which can be expanded to canonical-form tags as `first-person|singular|present|indicative`.

* A ''conjoined tag set'' is a tag set that consists of multiple individual tag sets separated by a semicolon, e.g.

`1|s|pres|ind|;|2|s|imp`, which specifies two tag sets, `1|s|pres|ind` as above and `2|s|imp` (in canonical form,

`second-person|singular|imperative`). Multiple tag sets specified in a single call to {{tl|inflection of}} are

specified in this fashion. Conjoined tag sets can also occur in list-tag shortcuts.

* A ''multipart tag'' is a tag that embeds multiple tags within it, such as `f//n` or `nom//acc//voc`. These are used in

the case of [[syncretism]], when the same form applies to multiple inflections. Examples are the Spanish present

subjunctive, where the first-person and third-person singular have the same form (e.g. {{m|es|siga}} from

{{m|es|seguir|t=to follow}}), or Latin third-declension adjectives, where the dative and ablative plural of all

genders have the same form (e.g. {{m|la|omnibus}} from {{m|la|omnis|t=all}}). These would be expressed respectively as

`1//3|s|pres|sub` and `dat//abl|m//f//n|p`, where the use of the multipart tag compactly encodes the syncretism and

avoids the need to individually list out all of the inflections. Multipart tags currently display as a list separated

by a slash, e.g. ''dative/ablative'' or ''masculine/feminine/neuter'' where each individual word is linked

appropriately. As a special case, multipart tags involving persons display specially; for example, the multipart tag

`1//2//3` displays as ''first-, second- and third-person'', with the word "person" occurring only once.

* A ''two-level multipart tag'' is a special type of multipart tag that joins two or more tag sets instead of joining

individual tags. The tags within the tag set are joined by a colon, e.g. `1:s//3:p`, which is displayed as

''first-person singular and third-person plural'', e.g. for use with the form {{m|grc|μέλλον}} of the verb

{{m|grc|μέλλω|t=to intend}}, which uses the tag set `1:s//3:p|impf|actv|indc|unaugmented` to express the syncretism

between the first singular and third plural forms of the imperfect active indicative unaugmented conjugation.

Two-level multipart tags should be used sparingly; if in doubt, list out the inflections separately. [FIXME: Make

two-level multipart tags obsolete.]

* A ''shortcut'' is a tag that expands to any type of tag described above, or to any type of tag set described above.

Aliases are a particular type of shortcut whose expansion is a single non-multipart tag.

* A ''multipart shortcut'' is a shortcut that expands into a multipart tag, for example `123`, which expands to the

multipart tag `1//2//3`. Only the most common such combinations exist as shortcuts.

* A ''list shortcut'' is a special type of shortcut that expands to a list of tags instead of a single tag. For example,

the shortcut `1s` expands to `1|s` (first-person singular). Only the most common such combinations exist as shortcuts.

* A ''conjoined shortcut'' is a special type of list shortcut that consists of a conjoined tag set (multiple logical tag

sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the

common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list

shortcut that expands to `simple|past`).]==]

* A TAG is a single grammatical item, as specified in a single numbered

-- Add tracking category for PAGE when called from {{inflection of}} or

~~parameter of~~ {{inflection of}}~~. Examples are 'masculine', 'nominative',~~

-- similar TEMPLATE. The tracking category linked to is

or ~~'first-person'. Tags may be abbreviated, e.g. 'm' for 'masculine',~~

-- [[Wiktionary:Tracking/inflection of/PAGE]].

~~'nom' for 'nominative', or '1' for 'first~~-~~person'. Such abbreviations~~

local function track(page)

~~are called SHORTCUTS, and some tags have multiple equivalent shortcuts~~

debug_track("inflection of/" ..

~~(e.g. 'p' or 'pl' for 'plural'). The full, non~~-~~abbreviated form of~~

-- avoid including links in pages (may cause error)

~~a tag is called its CANONICAL FORM~~.

page:gsub("%[", "("):gsub("%]", ")"):gsub("|", "!")

* The ~~DISPLAY FORM of a tag is the way it's displayed~~ to ~~the user. Usually~~

)

~~the displayed text of the tag~~ is ~~the same as its canonical form, and it~~

end

~~normally functions as a link to a glossary entry explaining the tag.~~

~~Usually the link is to an entry in~~ [[~~Appendix~~:~~Glossary~~]]~~, but sometimes~~

~~the tag is linked to an individual dictionary entry or to a Wikipedia~~

~~entry~~. ~~Occasionally, the display text differs from the canonical form of~~

~~the tag. An example is the tag 'comparative case', which has the display~~

~~text read as simply 'comparative'. Normally, tags referring to cases don't~~

~~have the word~~ "~~case~~" ~~in them, but in this case the tag 'comparative' was~~

~~already used as a shortcut for the tag 'comparative degree', so the tag was~~

~~named 'comparative case' to avoid clashing~~. ~~A similar situation occurs~~

~~with 'adverbial case' vs~~. ~~the grammar tag 'adverbial' (as in 'adverbial~~

~~participle').~~

* A TAG SET is an ordered list of tags, which together express a single

~~inflection, for example, '1|s|pres|ind', which can be expanded to~~

~~canonical~~-~~form tags as 'first~~-~~person|singular|present|indicative'.~~

~~Multiple tag sets can be specified~~ in ~~a single call to {{inflection of}}~~

~~by separating the individual tag sets with a semicolon, e.g.~~

~~'1|s|pres|ind|;|2|s|imp', which specifies two tag sets, '1|s|pres|ind'~~

~~as above and '2|s|imp'~~ (~~in canonical form,~~

~~'second-person|singular|imperative'~~).

* A MULTIPART TAG is a tag that embeds multiple tags within it, such as

~~'f//n' or 'nom//acc//voc'. These are used in the case of~~ [~~[syncretism]]~~,

~~when the same form applies to multiple inflections. Examples are the~~

~~Spanish present subjunctive, where the first-person and third-person~~

~~singular have the same form~~ (~~e.g. [[siga]] from [[seguir]] "to follow~~"),

~~or Latin third-declension adjectives, where the dative and ablative~~

~~plural of all genders have the same form~~ (~~e.g. [[omnibus~~]~~] from [[omnis]]~~

"~~all~~")~~. These would be expressed respectively as '1//3|s|pres|sub'~~

~~and 'dat//abl|m//f//n|p', where the use of the multipart tag compactly~~

~~encodes the syncretism and avoids the need to individually list out~~

~~all of the inflections. Multipart tags currently display as a list~~

~~separated by~~ "~~and~~"~~, ''dative and ablative'' or~~

~~''masculine, feminine and neuter'' where each individual word is linked~~

~~appropriately. As a special case, multipart tags involving persons display~~

~~specially; for example, the multipart tag ''1//2//3'' displays as~~

~~''first-, second- and third-person'', with the word~~ "~~person" occurring~~

~~only once.~~

* A TWO-LEVEL MULTIPART TAG is a special type of multipart tag that

~~joins two or more tag sets instead of joining individual tags. The tags~~

~~within the tag set are joined by a colon, e.g. '1:s//3:p', which is~~

~~displayed as ''first-person singular and third-person plural''~~, ~~e.g.~~

~~for use with the form [[μέλλον]] of the verb [[μέλλω]]~~ "~~to intend~~",

~~which uses the tag set '1:s//3:p|impf|actv|indc|unaugmented' to express~~

~~the syncretism between the first singular and third plural forms of the~~

~~imperfect active indicative unaugmented conjugation. Two-level multipart~~

~~tags should be used sparingly; if in doubt, list out the inflections~~

~~separately.~~

* A MULTIPART TAG SHORTCUT is a shortcut that expands into a multipart

~~tag, for example '123', which expands to the multipart tag '1//2//3'.~~

~~Only the most common such combinations exist as shortcuts.~~

* A LIST TAG SHORTCUT is a special type of shortcut that expands to a list

~~of tags instead of a single tag. For example, the shortcut '1s' expands to~~

~~'1|s' (first-person singular~~)~~. Only the most common such combinations~~

~~exist as shortcuts.~~

]=]

local function wrap_in_span(text, classes)

if classes then

return ("%s"):format(classes, text)

else

return text

end

~~-- version of rsubn~~() ~~that discards all but the first return value~~

local function show_linked_term(data)

local ~~function rsub~~(~~term~~, ~~foo~~, ~~bar~~)

local termobj, face, span_classes, ok_to_destructively_modify, overall_lang, text_classes =

local ~~retval~~ = ~~rsubn~~(~~term, foo, bar~~)

data.termobj, data.face, data.span_classes, data.ok_to_destructively_modify, data.overall_lang,

return ~~retval~~

data.text_classes

local need_to_copy, pretext_lang

local categories = {}

if overall_lang and overall_lang:getCode() ~= termobj.lang:getCode() then

local lang_display

lang_display, categories = require(etymology_module).insert_source_cat_get_display {

lang = data.overall_lang,

source = termobj.lang,

}

pretext_lang = wrap_in_span(lang_display .. " ", text_classes)

end

local need_to_show_qualifiers = termobj.q or termobj.qq or termobj.a or termobj.aa or termobj.l or termobj.ll or

termobj.refs

need_to_copy = not ok_to_destructively_modify and (pretext_lang or need_to_show_qualifiers)

if need_to_copy then

termobj = shallow_copy(termobj)

end

if pretext_lang then

termobj.pretext = pretext_lang

end

if need_to_show_qualifiers then

termobj.show_qualifiers = true

end

return wrap_in_span(full_link(termobj, face), span_classes), categories

end

--[==[

Lowest-level implementation of form-of templates, including the general {{tl|form of}} as well as those that deal with

inflection tags, such as the general {{tl|inflection of}}, semi-specific variants such as {{tl|participle of}}, and

specific variants such as {{tl|past participle of}}. `data` contains all the information controlling the display, with

the following fields:

* `.lang`: Overall language of the form-of template. If specified, any lemmas, enclitics or base lemmas that are of a

different language will have that language displayed before the term in question.

* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.

* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed

directly to {full_link()} in [[Module:links]]. Each object should have at minimum a `.lang` field containing the

language of the lemma and a `.term` field containing the lemma itself. Each object is formatted using {full_link()}

and then if there are more than one, they are joined using {serialCommaJoin()} in [[Module:table]]. Alternatively,

`.lemmas` can be a string, which is displayed directly, or omitted, to show no lemma links and omit the connecting

text.

* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.

* `.conj`: Conjunction or separator to use when joining multiple lemma objects. If {nil}, defaults to {"and"}. If this

has the value {false}, the lemmas are preceded with the `.separator` field in each lemma.

* `.enclitics`: List of enclitics to display after the lemmas, in parens.

* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.

* `.base_lemmas`: List of base lemmas to display after the lemmas, in the case where the lemmas in `.lemmas` are

themselves forms of another lemma (the base lemma), e.g. a comparative, superlative or participle. Each object is of

the form { { paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...}, conj = "CONJ" }} where PARAM_OBJ describes

the properties of the base lemma parameter (i.e. the relationship between the intermediate and base lemmas);

LEMMA_OBJ is an object suitable to be passed to {full_link()} in [[Module:links]]; and CONJ is the conjunction to

join multiple lemmas with, defaulting to {"and"}. PARAM_OBJ is of the format

{ { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{tl|inflection of}}

etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as

{ {"comd"}} or { {"past", "part"}}.

* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is

{"form-of-definition use-with-mention"}. Use `false` for no wrapping.

* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.

Use `false` for no wrapping.

* `.posttext`: Additional text to display after the lemma links.

* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified

in-place to save memory; otherwise they will be copied before modifying.

Returns two values, the formatted string and any categories to add the page to (which will arise if `.lang` is

specified and a language other than `.lang` is given in one of the lemmas in `.lemmas` or enclitics in `.enclitics`).

]==]

function export.format_form_of(data)

if type(data) ~= "table" then

error("First argument must now be a table of arguments")

error("Internal error: First argument must now be a table of arguments")

end

local text_classes = data.text_classes

if text_classes == nil and not data.nowrap then

text_classes = "form-of-definition use-with-mention"

end

local lemma_classes = data.lemma_classes

if lemma_classes == nil then

lemma_classes = "form-of-definition-link"

end

~~local text_classes = data.text_classes or "form-of-definition"~~

~~local terminfo_classes = data.text_classes or "form-of-definition-link"~~

local parts = {}

~~table.~~insert(parts, "")

if text_classes then

~~table.~~insert(parts, data.text)

insert(parts, "")

if data.text ~= "" and data.~~terminfo~~ then

end

~~table.~~insert(parts, " ")

insert(parts, data.text)

if data.text ~= "" and data.lemmas then

insert(parts, " ")

end

if data.~~terminfo~~ then

local categories = {}

~~table.insert(parts, "")~~

if data.lemmas then

if type(data.~~terminfo~~) == "string" then

if type(data.lemmas) == "string" then

~~table.~~insert(parts, data.~~terminfo~~)

insert(parts, wrap_in_span(data.lemmas, lemma_classes))

else

~~table~~.insert(parts, ~~m_links~~.~~full_link(~~data.~~terminfo~~, data.~~terminfo_face~~, ~~false~~))

local formatted_terms = {}

for _, lemma in ipairs(data.lemmas) do

local linked_term, this_categories = show_linked_term {

termobj = lemma,

face = data.lemma_face,

span_classes = lemma_classes,

ok_to_destructively_modify = data.ok_to_destructively_modify,

overall_lang = data.lang,

text_classes = text_classes

}

if this_categories[1] then

extend(categories, this_categories)

end

if data.conj == false and lemma.separator then

insert(formatted_terms, lemma.separator)

end

insert(formatted_terms, linked_term)

end

if data.conj == false then

insert(parts, concat(formatted_terms))

else

insert(parts, serial_comma_join(formatted_terms, {conj = data.conj or "and"}))

end

if data.lit then

insert(parts, ", literally " .. require(links_module).mark(data.lit, "gloss"))

end

~~table.insert(parts, "")~~

end

if data.enclitics and #data.enclitics > 0 then

-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or

-- they will clash with upright parens generated by link annotations such as transliterations and pos=.

if text_classes then

insert(parts, "")

end

local formatted_terms = {}

for _, enclitic in ipairs(data.enclitics) do

-- FIXME, should we have separate clitic face and/or classes?

local linked_term, this_categories = show_linked_term {

termobj = enclitic,

face = data.lemma_face,

span_classes = lemma_classes,

ok_to_destructively_modify = data.ok_to_destructively_modify,

overall_lang = data.lang,

text_classes = text_classes

}

if this_categories[1] then

extend(categories, this_categories)

end

insert(formatted_terms, linked_term)

end

insert(parts, " (")

insert(parts, wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))

insert(parts, serial_comma_join(formatted_terms, {conj = data.enclitic_conj or "and"}))

insert(parts, ")")

if text_classes then

insert(parts, "")

end

if data.base_lemmas and #data.base_lemmas > 0 then

for _, base_lemma in ipairs(data.base_lemmas) do

insert(parts, ", the ")

if text_classes then

insert(parts, "")

end

insert(parts, (export.tagged_inflections {

lang = data.lang or base_lemma.lemmas[1].lang,

tags = base_lemma.paramobj.tags,

lemmas = base_lemma.lemmas,

conj = base_lemma.conj or "and",

lemma_face = data.lemma_face,

no_format_categories = true,

nocat = true,

text_classes = data.text_classes,

ok_to_destructively_modify = ok_to_destructively_modify,

}))

if text_classes then

insert(parts, "")

end

-- FIXME, should posttext go before enclitics? If so we need to have separate handling for the

-- final colon when there are multiple tag sets in tagged_inflections().

if data.posttext then

~~table.~~insert(parts, data.posttext)

insert(parts, data.posttext)

end

if text_classes then

insert(parts, "")

end

~~table.insert~~(parts, ~~""~~)

return concat(parts), categories

return ~~table.concat~~(~~parts~~)

end

format_form_of = export.format_form_of

--[==[

Return true if `tag` contains an internal link or HTML.]==]

function export.is_link_or_html(tag)

return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)

end

is_link_or_html = export.is_link_or_html

--[==[

Look up a tag (either a shortcut of any sort of a canonical long-form tag) and return its expansion. The expansion

will be a string unless the shortcut is a list-tag shortcut such as `1s`; in that case, the expansion will be a

list. The caller must handle both cases. Only one level of expansion happens; hence, `acc` expands to {"accusative"},

`1s` expands to { {"1", "s"}} (not to { {"first", "singular"}}) and `123` expands to {"1//2//3"}. The expansion will be

the same as the passed-in tag in the following circumstances:

~~local function is_link_or_html~~(~~tag~~)

# The tag is `;` (this is special-cased, and no lookup is done).

~~return~~ tag~~:find~~(~~"[["~~, ~~nil, true~~) or tag~~:find~~(~~"|", nil~~, ~~true~~) or

# The tag is a multipart tag such as `nom//acc` (this is special-cased, and no lookup is done).

tag~~:find~~(~~"<"~~, ~~nil, true~~)

# The tag contains a raw link (this is special-cased, and no lookup is done).

~~end~~

# The tag contains HTML (this is special-cased, and no lookup is done).

# The tag is already a canonical long-form tag.

# The tag is unrecognized.

This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in

[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and finally (only if the tag is not

recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data/2]].

~~-- Look up a tag (either a shortcut of any sort of a canonical long-form tag)~~

If the expansion is a string and is different from the tag, track it if `do_track` is true.]==]

~~-- and return its expansion. The expansion will be a string unless the~~

function export.lookup_shortcut(tag, lang, do_track)

~~-- shortcut is a list-tag shortcut such as "1s"; in that case, the expansion~~

~~-- will be a list. The caller must handle both cases. Only one level of~~

~~-- expansion happens; hence, "acc" expands to "accusative", "1s" expands to~~

~~-- {"1", "s"} (not to {"first", "singular"}) and "123" expands to "1//2//3".~~

~~-- The expansion will be the same as the passed-in tag in the following~~

~~-- circumstances:~~

--

~~-- 1. The tag is ";" (this is special-cased, and no lookup is done).~~

~~-- 2. The tag is a multipart tag such as "nom//acc" (this is special-cased,~~

~~-- and no lookup is done).~~

~~-- 3. The tag contains a raw link (this is special-cased, and no lookup is~~

~~-- done).~~

~~-- 4. The tag contains HTML (this is special-cased, and no lookup is done).~~

~~-- 5. The tag is already a canonical long-form tag.~~

~~-- 6. The tag is unrecognized.~~

--

~~-- This function first looks up in [[Module:form of/data]] (which includes~~

~~-- more common tags) and then (only if the tag is not recognized as a~~

~~-- shortcut or canonical tag, and is not of types 1-4 above) in~~

~~-- [[Module:form of/data2]].~~

--

-- If the expansion is a string and is different from the tag, track it if

~~-- DO_TRACK~~ is true.

function export.lookup_shortcut(tag)

-- If there is HTML or a link in the tag, return it directly; don't try

-- to look it up, which will fail.

Line 188:

Line 521:

return tag

end

local ~~m_data~~ = mw.~~loadData~~(~~"Module:form of/data"~~)

local expansion

-- If this is a canonical long-form tag, just return it, and don't

while lang do

-- check for shortcuts (which will cause [[Module:form of/~~data2~~]] to be

local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())

-- ~~loaded~~).

-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an

if ~~m_data~~.tags[tag] then

-- optimization; see below.

~~return~~ tag

if langdata then

if langdata.tags[tag] then

return tag

end

expansion = langdata.shortcuts[tag]

if expansion then

break

end

-- If the language has a parent (i.e. a superordinate variety), try again with that.

lang = lang:getParent()

end

if not expansion then

-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause

-- [[Module:form of/data/2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data/1]] --

-- or, for that matter, in [[Module:form of/data/2]]). This is an optimization; the code will still work without

-- it, but will use up more memory.

if (m_data1 or get_m_data1()).tags[tag] then

return tag

end

expansion = m_data1.shortcuts[tag]

end

~~local expansion = m_data.shortcuts[tag]~~

if not expansion then

~~local~~ m_data2 ~~= mw.loadData~~(~~"Module:form of/data2"~~)

expansion = (m_data2 or get_m_data2()).shortcuts[tag]

~~expansion = m_data2~~.shortcuts[tag]

end

if not expansion then

return tag

end

-- Maybe track the expansion if it's not the same as the raw tag.

if do_track and expansion ~= tag and type(expansion) == "string" then

track("tag/" .. tag)

end

return expansion

end

lookup_shortcut = export.lookup_shortcut

--[==[

-- Look up a normalized/canonicalized tag and return the data object

Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return

-- associated with it. If the tag isn't found, return nil. This first looks up

nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in

-- in [[Module:form of/data]] (which includes more common tags) and then in

[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and then finally in

-- [[Module:form of/~~data2~~]].

[[Module:form of/data/2]].]==]

function export.lookup_tag(tag)

function export.lookup_tag(tag, lang)

local ~~m_data~~ = mw.~~loadData~~(~~"Module~~:~~form of/data"~~)

while lang do

local tagobj = ~~m_data~~.tags[tag]

local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())

local tag = langdata and langdata.tags[tag]

if tag then

return tag

end

-- If the language has a parent (i.e. a superordinate variety), try again with that.

lang = lang:getParent()

end

local tagobj = (m_data1 or get_m_data1()).tags[tag]

if tagobj then

return tagobj

end

local m_data2 ~~= mw.loadData~~(~~"Module:form of/data2"~~)

local tagobj2 = (m_data2 or get_m_data2()).tags[tag]

~~local tagobj2 = m_data2~~.tags[tag]

if tagobj2 then

return tagobj2

Line 224:

Line 587:

return nil

end

lookup_tag = export.lookup_tag

-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list

-- Normalize a single tag, which may be a shortcut but should not be a

-- shortcut.

-- multipart tag, a multipart~~-tag~~ shortcut or a list-~~tag~~ shortcut.

local function normalize_single_tag(tag, lang, do_track)

local function normalize_single_tag(tag)

local expansion = lookup_shortcut(tag, lang, do_track)

local expansion = ~~export.~~lookup_shortcut(tag)

if type(expansion) ~= "string" then

error("Tag '" .. tag .. "' is a list~~-tag~~ shortcut, which is not allowed here")

error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")

end

tag = expansion

if not lookup_tag(tag, lang) and do_track then

-- If after all expansions and normalizations we don't recognize the canonical tag, track it.

track("unknown")

track("unknown/" .. tag)

end

return tag

end

--[=[

-- Normalize a component of a multipart tag. This should not have any // in it,

Normalize a component of a multipart tag. This should not have any // in it, but may join multiple individual tags with

-- but may join multiple individual tags with a colon, and may be a single

a colon, and may be a single list-tag shortcut, which is treated as if colon-separated. The return value may be a list

-- list-tag shortcut, which is ~~treates~~ as if colon-separated. If

of tags.

~~-- RECOMBINE_TAGS isn't given, the~~ return value may be a list of tags;

]=]

~~-- otherwise, it will always be a string, and multiple tags will be~~

local function normalize_multipart_component(tag, lang, do_track)

~~-- represented as canonical-form tags joined by ":".~~

-- If there is HTML or a link in the tag, don't try to split on colon. A colon may legitimately occur in either one,

local function normalize_multipart_component(tag, ~~recombine_tags~~)

-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't

-- If there is HTML or a link in the tag, don't try to split on colon.

-- expect to occur in links or HTML; see comment in normalize_tag().

-- A colon may legitimately occur in either one, and we don't want

-- these things parsed. Note that we don't do this check before splitting

-- on //, which we don't expect to occur in links or HTML; see comment

-- in normalize_tag().

if is_link_or_html(tag) then

return tag

end

local components = ~~rsplit~~(tag, ":", true)

local components = split(tag, ":", true)

if #components == 1 then

-- We allow list-tag shortcuts inside of multipart tags, e.g.

-- '1s//3p'. Check for this now.

tag = ~~export.~~lookup_shortcut(tag)

tag = lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

-- Temporary tracking as we will disallow this.

track("list-tag-inside-of-multipart")

-- We found a list-tag shortcut; treat as if colon-separated.

components = tag

else

return normalize_single_tag(tag)

return normalize_single_tag(tag, lang, do_track)

end

local normtags = {}

-- Temporary tracking as we will disallow this.

track("two-level-multipart")

for _, component in ipairs(components) do

~~table~~.insert(normtags, normalize_single_tag(component))

if do_track then

-- There are multiple components; track each of the individual

-- raw tags.

track("tag/" .. component)

end

insert(normtags, normalize_single_tag(component, lang, do_track))

end

~~if recombine_tags then~~

return normtags

return ~~table.concat(~~normtags~~, ":")~~

~~else~~

~~return normtags~~

~~end~~

end

--[=[

-- Normalize a single tag. ~~If RECOMBINE_TAGS isn't given, the~~ return value

Normalize a single tag. The return value may be a list (in the case of multipart tags), which will contain nested lists

-- may be a list (in the case of multipart tags), which will contain nested

in the case of two-level multipart tags.

~~-- lists~~ in the case of two-level multipart tags~~; otherwise, it will always~~

]=]

~~-- be a string, and multipart tags will be represented as canonical-form tags~~

local function normalize_tag(tag, lang, do_track)

~~-- joined by "//" and/or ":".~~

-- We don't check for links or HTML before splitting on //, which we don't expect to occur in links or HTML. Doing

local function normalize_tag(tag, ~~recombine_multitags~~)

-- it this way allows for a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly (the template calls

-- We don't check for links or HTML before splitting on //, which we

-- will be expanded before we process the tag, and will contain links and HTML). The only check we do is for a URL,

-- don't expect to occur in links or HTML. Doing it this way allows for

-- which shouldn't normally occur, but might if the user tries to put an external link into the tag. URL's with //

-- a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly

-- normally have the sequence ://, which should never normally occur when // and : are used in their normal ways.

-- ~~(the template calls~~ will be expanded before we process the tag, and

-- will contain links and HTML). The only check we do is for a URL,

-- which shouldn't normally occur, but might if the user tries to put

-- an external link into the tag. URL's with // ~~normally have the~~

-- sequence ://, which should never normally occur when // and : are

-- used in their normal ways.

if tag:find("://", nil, true) then

return tag

end

local split_tags = ~~rsplit~~(tag, "//", true)

local split_tags = split(tag, "//", true)

if #split_tags == 1 then

local retval = normalize_multipart_component(tag, ~~recombine_multitags~~)

local retval = normalize_multipart_component(tag, lang, do_track)

if type(retval) == "table" then

-- The user gave a tag like '1:s', i.e. with colon but without

-- The user gave a tag like '1:s', i.e. with colon but without //. Allow this, but we need to return a

-- //. Allow this, but we need to return a ~~nested list. Note,~~

-- nested list.

-- ~~this will never happen when RECOMBINE_TAGS is given~~.

return {retval}

end

Line 310:

Line 671:

local normtags = {}

for _, single_tag in ipairs(split_tags) do

~~table.insert(normtags, normalize_multipart_component(single_tag,~~

if do_track then

~~recombine_multitags))~~

-- If the tag was a multipart tag, track each of individual raw tags.

~~end~~

track("tag/" .. single_tag)

if ~~recombine_multitags~~ then

end

~~return table~~.~~concat~~(~~normtags,~~ "//")

insert(normtags, normalize_multipart_component(single_tag, lang, do_track))

~~else~~

~~return~~ normtags

end

return normtags

end

--[==[

Normalize a tag set (a list of tags) into its canonical-form tags. The return value is a list of normalized tag sets

(a list because of there may be conjoined shortcuts among the input tags). A normalized tag set is a list of tag

elements, where each element is either a string (the canonical form of a tag), a list of such strings (in the case of

multipart tags) or a list of lists of such strings (in the case of two-level multipart tags). For example, the multipart

tag `nom//acc//voc` will be represented in canonical form as { {"nominative", "accusative", "vocative"}}, and the

two-level multipart tag `1:s//3:p` will be represented as { {{"first-person", "singular"}, {"third-person", "plural"}}}.

~~-- Normalize a tag set~~ (~~a list of tags~~) ~~into a list of canonical~~-form ~~tags~~

Example 1:

-- (~~which~~ -~~- may be larger due to the possibility of list-tag shortcuts~~).

-- ~~If RECOMBINE_TAGS isn't given~~, the ~~return~~ list ~~may itself contains lists;~~

{normalize_tag_set({"nom//acc//voc", "n", "p"})} = { {{{"nominative", "accusative", "vocative"}, "masculine", "plural"}}}

-- ~~in particular~~, ~~multipart~~ tags ~~will be represented as lists. Specifically~~,

-- ~~the list will consist~~ of ~~the elements~~ of ~~the multipart~~ tag~~, which will~~

Example 2:

-- ~~either be canonical~~-~~form strings or (in the case~~ of ~~two~~-~~level multipart~~

-- ~~tags) nested lists~~ of ~~canonical~~-~~form strings. For example, the multipart~~

{normalize_tag_set({"ed-form"}, ENGLISH)} = { {{"simple", "past"}, {"past", "participle"}}}

-- tag ~~''nom~~//~~acc~~//~~voc'' will expand to~~

-- ~~{"nominative", "accusative", "vocative"}~~

Example 3:

-- ~~and the two-level multipart tag ''1~~:s//~~3:p'' will expand to~~

-- ~~{{"first~~-~~person", "singular"}, {"third~~-~~person", "plural"}}.~~

{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}]==]

-- ~~If RECOMBINE_TAGS is given, multipart tags will be represented in string~~

function export.normalize_tag_set(tag_set, lang, do_track)

-- ~~form, i.e. as canonical-form tags joined by "~~//~~" and~~/~~or ":".~~

-- We track usage of shortcuts, normalized forms and (in the case of multipart tags or list tags) intermediate

~~function export.normalize_tags(tags, recombine_multitags)~~

-- forms. For example, if the tags 1s|mn|gen|indefinite are passed in, we track the following:

local ~~ntags~~ = {}

-- [[Wiktionary:Tracking/inflection of/tag/1s]]

for _, tag in ipairs(~~tags~~) do

-- [[Wiktionary:Tracking/inflection of/tag/1]]

-- Expand the tag, which may generate a new tag (either a

-- [[Wiktionary:Tracking/inflection of/tag/s]]

-- fully canonicalized tag, a multipart tag, or a list of tags).

-- [[Wiktionary:Tracking/inflection of/tag/first-person]]

tag = ~~export.~~lookup_shortcut(tag)

-- [[Wiktionary:Tracking/inflection of/tag/singular]]

-- [[Wiktionary:Tracking/inflection of/tag/mn]]

-- [[Wiktionary:Tracking/inflection of/tag/m//n]]

-- [[Wiktionary:Tracking/inflection of/tag/m]]

-- [[Wiktionary:Tracking/inflection of/tag/n]]

-- [[Wiktionary:Tracking/inflection of/tag/masculine]]

-- [[Wiktionary:Tracking/inflection of/tag/neuter]]

-- [[Wiktionary:Tracking/inflection of/tag/gen]]

-- [[Wiktionary:Tracking/inflection of/tag/genitive]]

-- [[Wiktionary:Tracking/inflection of/tag/indefinite]]

local output_tag_set = {}

local saw_semicolon = false

for _, tag in ipairs(tag_set) do

if do_track then

-- Track the raw tag.

track("tag/" .. tag)

end

-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list

-- of tags).

tag = lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

if contains(tag, ";") then

-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single

-- tag set into multiple.

saw_semicolon = true

break

end

for _, t in ipairs(tag) do

~~table~~.insert(~~ntags~~, normalize_tag(t, ~~recombine_multitags~~))

if do_track then

-- If the tag expands to a list of raw tags, track each of those.

track("tag/" .. t)

end

insert(output_tag_set, normalize_tag(t, lang, do_track))

end

else

~~table.~~insert(~~ntags~~, normalize_tag(tag, ~~recombine_multitags~~))

insert(output_tag_set, normalize_tag(tag, lang, do_track))

end

~~return ntags~~

~~end~~

if not saw_semicolon then

return {output_tag_set}

end

-- ~~Split~~ a ~~tag set containing two-level multipart tags into one or~~ more ~~tag sets not containing such tags.~~

-- Use a more general algorithm that handles conjoined shortcuts.

~~-- Single-level multipart tags are left alone. (If we need to, a slight modification of the following code~~

output_tag_set = {}

~~-- will also split single-level multipart tags.) This assumes~~ that ~~multipart tags are represented as lists~~

~~-- and two-level multipart tags are represented as lists of lists, as is output by normalize_tags()~~.

~~-- NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as~~

~~-- <code>def:s//p</code> (or the reverse, <code>s//def:p</code>).~~

~~function export.split_two_level_multipart_tag_set(tag_set)~~

~~-- This would be a whole lot easier in Python, with built-in support for~~

~~-- slicing and array concatenation.~~

for i, tag in ipairs(tag_set) do

if do_track then

-- Track the raw tag.

track("tag/" .. tag)

end

-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list

-- of tags).

tag = lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

~~-- We saw a multipart~~ tag~~. Check if any of the parts are two-level.~~

local output_tag_sets = {}

local ~~saw_two_level_tag~~ = ~~false~~

local shortcut_tag_sets = split_tag_set(tag)

for _, ~~first_level_tag~~ in ipairs(~~tag~~) do

local normalized_shortcut_tag_sets = {}

~~if type~~(~~first_level_tag) == "table" then~~

for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do

~~saw_two_level_tag = true~~

extend(normalized_shortcut_tag_sets,

~~break~~

normalize_tag_set(shortcut_tag_set, lang, do_track))

~~end~~

end

~~if saw_two_level_tag then~~

local after_tags = slice(tag_set, i + 1)

~~-- We found a two-level multipart tag.~~

local normalized_after_tags_sets = normalize_tag_set(after_tags, lang, do_track)

-- (1) ~~Extract the preceding tags.~~

for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do

local ~~pre_tags~~ = {}

for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do

for ~~j=1~~,~~i-1~~ do

insert(output_tag_sets, append(output_tag_set, normalized_shortcut_tag_set,

~~table.~~insert(~~pre_tags~~, ~~tag_set[j]~~)

normalized_after_tags_set))

end

~~-- (2) Extract the following tags.~~

~~local post_tags = {}~~

~~for j=i+1,#tag_set do~~

~~table.insert(post_tags, tag_set[j])~~

~~end~~

~~-- (3) Loop over each tag set alternant in the two-level multipart tag.~~

~~-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,~~

~~-- and recursively split that tag set.~~

~~local resulting_tag_sets = {}~~

~~for _, first_level_tag_set in ipairs(tag) do~~

~~local expanded_tag_set = {}~~

~~for _, pre_tag in ipairs(pre_tags) do~~

~~table.insert(expanded_tag_set, pre_tag)~~

~~end~~

~~-- The second level may have a string or a list.~~

~~if type(first_level_tag_set) == "table" then~~

~~for _, second_level_tag in ipairs(first_level_tag_set) do~~

~~table.insert(expanded_tag_set, second_level_tag)~~

~~end~~

~~else~~

~~table.insert(expanded_tag_set, first_level_tag_set)~~

~~end~~

~~for _, post_tag in ipairs(post_tags) do~~

~~table.insert(expanded_tag_set, post_tag)~~

~~end~~

~~for _, split_tag_set in ipairs(export.split_two_level_multipart_tag_set(expanded_tag_set)) do~~

~~table.insert(resulting_tag_sets, split_tag_set)~~

~~end~~

~~return resulting_tag_sets~~

end

return output_tag_sets

else

insert(output_tag_set, normalize_tag(tag, lang, do_track))

end

~~return {tag_set}~~

error("Internal error: Should not get here")

end

normalize_tag_set = export.normalize_tag_set

--[==[

-- ~~Given~~ a ~~list~~ of ~~tags, split~~ into tag sets ~~(separated by semicolons in the initial list of tags)~~.

Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.]==]

function export.~~split_tags_into_tag_sets~~(~~tags~~)

function export.split_tag_set(tag_set)

local ~~tag_set_group~~ = {}

local split_tag_sets = {}

local cur_tag_set = {}

for _, tag in ipairs(~~tags~~) do

for _, tag in ipairs(tag_set) do

if tag == ";" then

if #cur_tag_set > 0 then

~~table.~~insert(~~tag_set_group~~, cur_tag_set)

insert(split_tag_sets, cur_tag_set)

end

cur_tag_set = {}

else

~~table.~~insert(cur_tag_set, tag)

insert(cur_tag_set, tag)

end

if #cur_tag_set > 0 then

~~table.~~insert(~~tag_set_group~~, cur_tag_set)

insert(split_tag_sets, cur_tag_set)

end

return ~~tag_set_group~~

return split_tag_sets

end

split_tag_set = export.split_tag_set

local tag_set_param_mods = {

lb = {

item_dest = "labels",

convert = function(arg, parse_err)

return split(arg, "//", true)

end,

}

-- ~~Given~~ a ~~list of tags, split into~~ tag ~~sets~~ (~~separated by semicolons in the initial~~ list of tags).

--[==[

-- ~~Then, potentially split each~~ tag set ~~into multiple~~ tag ~~sets if there are any two~~-~~level multipart~~

Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only

~~-- tags in those~~ tag ~~sets~~.

per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be

function export.~~split_tags_into_tag_sets_and_expand_two_level_multipart_tags~~(tags)

attached to the last tag.]==]

-- ~~First~~, ~~split into tag sets~~.

function export.parse_tag_set_properties(tag_set)

~~local tag_sets = export~~.~~split_tags_into_tag_sets(~~tags)

local function generate_tag_set_obj(last_tag)

-- ~~Now split any two~~-level ~~multipart tags~~.

tag_set[#tag_set] = last_tag

~~local resulting_tag_sets =~~ {}

return {tags = tag_set}

~~for _~~, ~~tag_set in ipairs~~(~~tag_sets~~) do

end

~~for _~~, ~~resulting_tag_set in ipairs(export.split_two_level_multipart_tag_set(tag_set)) do~~

local last_tag = tag_set[#tag_set]

~~table.insert(resulting_tag_sets~~, ~~resulting_tag_set~~)

-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with , , or

~~end~~

-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of

-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <lb:...>, so if

-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the

-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.

if last_tag:find("<", nil, true) and not last_tag:find("^[^<]*<%l*[^%l:]") then

return parse_inline_modifiers(last_tag, {

param_mods = tag_set_param_mods,

generate_obj = generate_tag_set_obj,

})

else

return generate_tag_set_obj(last_tag)

end

~~return resulting_tag_sets~~

end

parse_tag_set_properties = export.parse_tag_set_properties

local function normalize_pos(pos)

function ~~export.~~normalize_pos(pos)

if not pos then

return ~~m_pos~~[pos] or pos

return nil

end

return (m_pos_data or get_m_pos_data())[pos] or pos

end

-- Return the display form of a single canonical-form tag. The value

-- passed in must be a string (i.e. it cannot be a list describing a

-- multipart tag). To handle multipart tags, use get_tag_display_form().

local function get_single_tag_display_form(normtag)

-- A truthy `nolink` suppresses linking.

local data = ~~export.~~lookup_tag(normtag)

local function get_single_tag_display_form(normtag, lang, nolink)

local data = lookup_tag(normtag, lang)

local display = normtag

-- If the tag has a special display form, use it

if data and data.display then

~~normtag~~ = data.display

display = data.display

if nolink then

display = remove_links(display)

end

-- If there is a nonempty glossary index, then show a link to it

if data and data.glossary then

if not nolink then

if ~~data~~.~~glossary_type =~~= "wikt" then

local glossary = data and data[(m_data or get_m_data()).GLOSSARY]

~~normtag~~ = "[[" .. ~~data~~.glossary .. "|" .. ~~normtag~~ .. "]]"

if glossary ~= nil then

elseif ~~data~~.~~glossary_type~~ == "wp" then

if glossary == m_data.WIKT then

~~normtag~~ = "[[w:" .. ~~data.glossary~~ .. "|" .. ~~normtag~~ .. "]]"

display = "[[wikt:" .. normtag .. "|" .. display .. "]]"

~~else~~

elseif glossary == m_data.WP then

~~normtag~~ = "[[wikt:Appendix:Glossary#" .. ~~mw.uri.anchorEncode~~(~~data.~~glossary) .. "|" .. ~~normtag~~ .. "]]"

display = "[[w:" .. normtag .. "|" .. display .. "]]"

elseif glossary == m_data.APPENDIX then

display = "[[wikt:Appendix:Glossary#" .. anchor_encode(normtag) .. "|" .. display .. "]]"

elseif type(glossary) ~= "string" then

error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(

type(glossary), dump(glossary), normtag))

else

local link = glossary:match("^(wikt:.*)")

if link then

display = "[[" .. link .. "|" .. display .. "]]"

end

if not link then

link = glossary:match("^w:(.*)")

if link then

display = "[[w:" .. link .. "|" .. display .. "]]"

end

if not link then

display = "[[wikt:Appendix:Glossary#" .. anchor_encode(glossary) .. "|" .. display .. "]]"

end

return ~~normtag~~

return display

end

--[==[

-- Turn a canonicalized tag spec (which describes a single, possibly

Turn a canonicalized tag spec (which describes a single, possibly multipart tag) into the displayed form. The tag spec

-- multipart tag) into the displayed form. The tag spec may be a string

may be a string (a canonical-form tag); a list of canonical-form tags (in the case of a simple multipart tag); or a

-- (a canonical-form tag)~~, or~~ a list of canonical-form tags (in the

list of mixed canonical-form tags and lists of such tags (in the case of a two-level multipart tag). `joiner` indicates

-- case of a simple multipart tag), or a list of mixed canonical-form

how to join the parts of a multipart tag, and can be either {"and"} ("foo and bar", or "foo, bar and baz" for 3 or

-- tags and lists of such tags (in the case of a two-level multipart tag).

more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in

~~-- JOINER indicates~~ how to join the parts of a multipart tag, and can

{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems

-- be either "and" ("foo and bar", or "foo, bar and baz" for 3 or more),

unlikely to change.) A truthy `nolink` suppresses linking.]==]

-- "slash" ("foo/bar"), "en-dash" ("foo–bar") or nil, which uses the

function export.get_tag_display_form(tagspec, lang, joiner, nolink)

-- global default found in multipart_join_strategy() in

-- [[Module:form of/functions]].

function export.get_tag_display_form(tagspec, joiner)

if type(tagspec) == "string" then

return get_single_tag_display_form(tagspec)

return get_single_tag_display_form(tagspec, lang, nolink)

end

-- We have a multipart tag. See if there's a display handler to

-- We have a multipart tag. See if there's a display handler to display them specially.

-- display them specially.

for _, handler in ipairs(display_handlers or get_display_handlers()) do

for _, handler in ipairs(~~m_functions.~~display_handlers) do

local displayval = handler(tagspec, joiner)

if displayval then

if nolink then

displayval = remove_links(displayval)

end

return displayval

end

Line 510:

Line 926:

for _, first_level_tag in ipairs(tagspec) do

if type(first_level_tag) == "string" then

~~table.~~insert(displayed_tags, get_single_tag_display_form(first_level_tag))

insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang, nolink))

else

-- A first-level element of a two-level multipart tag.

-- A first-level element of a two-level multipart tag. Currently we just separate the individual components

-- Currently we just separate the individual components

-- with spaces, but other ways are possible, e.g. using an underscore, colon, parens or braces.

-- with spaces, but other ways are possible, e.g. using

-- an underscore, colon, parens or braces.

local components = {}

for _, component in ipairs(first_level_tag) do

~~table.~~insert(components, get_single_tag_display_form(component))

insert(components, get_single_tag_display_form(component, lang, nolink))

end

~~table.~~insert(displayed_tags, ~~table.~~concat(components, " "))

insert(displayed_tags, concat(components, " "))

end

return ~~m_functions.~~join_multiparts(displayed_tags, joiner)

return join_multiparts(displayed_tags, joiner)

end

get_tag_display_form = export.get_tag_display_form

--[==[

Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are

represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See

{get_tag_display_form()} for the meaning of `joiner`. A truthy `nolink` suppresses linking.]==]

function export.get_tag_set_display_form(normalized_tag_set, lang, joiner, nolink)

local parts = {}

-- ~~Return true~~ if the ~~list `tags1`~~, ~~treated as a~~ set, is ~~a subset of~~ the ~~list `tags2`~~, ~~also~~

for _, tagspec in ipairs(normalized_tag_set) do

-- ~~treated~~ as ~~a set~~.

local to_insert = get_tag_display_form(tagspec, lang, joiner, nolink)

local ~~function is_subset~~(~~tags1~~, ~~tags2~~)

-- Maybe insert a space before inserting the display form of the tag. We insert a space if

~~tags1~~ = ~~m_table~~.~~listToSet~~(~~tags1~~)

-- (a) we're not the first tag; and

~~tags2 = m_table~~.~~listToSet(tags2~~)

-- (b) the tag we're about to insert doesn't have the "no_space_on_left" property; and

~~for tag, _ in pairs~~(~~tags1~~) do

-- (c) the preceding tag doesn't have the "no_space_on_right" property.

~~if not tags2[tag]~~ then

-- NOTE: We depend here on the fact that

~~return false~~

-- (1) all tags with either of the above properties set have the same display form as canonical form, and

-- (2) all tags with either of the above properties set are single-character tags.

-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,

-- which won't be found and which will trigger loading of [[Module:form of/data/2]]. If multichar punctuation is

-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.

--

-- If the first property above fails to hold in the future, we need to track the canonical form of each tag

-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.

if #parts > 0 then

local most_recent_tagobj = parts[#parts]:match("^.[\128-\191]*$") and lookup_tag(parts[#parts], lang)

local to_insert_tagobj = to_insert:match("^.[\128-\191]*$") and lookup_tag(to_insert, lang)

if (

(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and

(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)

) then

insert(parts, " ")

end

insert(parts, to_insert)

end

return ~~true~~

return concat(parts)

end

get_tag_set_display_form = export.get_tag_set_display_form

--[==[

Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.

Single-level multipart tags are left alone. (If we need to, a slight modification of the following code

will also split single-level multipart tags.) This assumes that multipart tags are represented as lists

and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.

NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as

`def:s//p` (or the reverse, `s//def:p`).]==]

local function split_two_level_multipart_tag_set(tag_set)

for i, tag in ipairs(tag_set) do

if type(tag) == "table" then

-- We saw a multipart tag. Check if any of the parts are two-level.

local saw_two_level_tag = false

for _, first_level_tag in ipairs(tag) do

if type(first_level_tag) == "table" then

saw_two_level_tag = true

break

end

if saw_two_level_tag then

-- We found a two-level multipart tag.

-- (1) Extract the preceding tags.

local pre_tags = slice(tag_set, 1, i - 1)

-- (2) Extract the following tags.

local post_tags = slice(tag_set, i + 1)

-- (3) Loop over each tag set alternant in the two-level multipart tag.

-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,

-- and recursively split that tag set.

local resulting_tag_sets = {}

for _, first_level_tag_set in ipairs(tag) do

local expanded_tag_set = {}

extend(expanded_tag_set, pre_tags)

-- The second level may have a string or a list.

if type(first_level_tag_set) == "table" then

extend(expanded_tag_set, first_level_tag_set)

else

insert(expanded_tag_set, first_level_tag_set)

end

extend(expanded_tag_set, post_tags)

extend(resulting_tag_sets, split_two_level_multipart_tag_set(expanded_tag_set))

end

return resulting_tag_sets

end

return {tag_set}

end

~~-- Compute and return the appropriate categories for the tags in `tags`~~ (~~user-specified tags~~,

local function try_lang_specific_module(langcode, modules_tried, name, data)

~~-- which may consist of multiple tag sets separated by semicolons) and the language in `lang`~~.

local lang_specific_module = form_of_lang_data_module_prefix .. langcode .. "/functions"

~~-- This checks both language-specific and language-agnostic category specs in [[Module:form of/cats]].~~

local langdata = safe_require(lang_specific_module)

~~-- `POS` is the user-specified part of speech,~~ if ~~any, and `terminfo` is currently unused.~~

if langdata then

~~function export.fetch_lang_categories~~(~~lang, tags, terminfo~~, ~~POS~~)

insert(modules_tried, lang_specific_module)

local ~~m_cats~~ = mw.~~loadData~~(~~"Module:form of/cats"~~)

if langdata.cat_functions then

local fn = langdata.cat_functions[name]

if fn then

return fn(data), true

end

return nil, false

end

local ~~categories~~ = {}

-- Call a named function, either from the lang-specific data in

-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].

local function call_named_function(name, funtype, normalized_tag_set, lang, POS, pagename, lemmas)

local data = {

pagename = pagename or default_pagename or get_default_pagename(),

lemmas = lemmas,

tag_set = normalized_tag_set,

lang = lang,

POS = POS

}

local modules_tried = {}

-- First try lang-specific.

while lang do

local retval, found_it = try_lang_specific_module(lang:getCode(), modules_tried, name, data)

if found_it then

return retval

end

-- If the language has a parent (i.e. a superordinate variety), try again with that.

lang = lang:getParent()

end

-- Try lang-independent.

insert(modules_tried, form_of_functions_module)

local fn = (cat_functions or get_cat_functions())[name]

if fn then

return fn(data)

end

for i, modname in ipairs(modules_tried) do

modules_tried[i] = "[[" .. modname .. "]]"

end

error(("No %s function named '%s' in %s"):format(funtype, name, list_to_text(modules_tried, nil, " or ")))

end

~~local normalized_tags = export~~.~~normalize_tags~~(tags)

-- Given a tag from the current tag set (which may be a list in case of a multipart tag),

~~local split_tag_sets~~ = ~~export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags~~(~~normalized_tags~~)

-- and a tag from a categorization spec, check that the two match.

~~POS~~ = ~~export.normalize_pos~~(~~POS~~)

-- (1) If both are strings, we just check for equality.

-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a

-- multipart tag), we check that the spec tag is in the list. This is because we want to treat

-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.

-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},

-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag

-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the

-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization

-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"

-- in it. "1//3" works, as does "1//2//3".

local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)

if type(spec_tag) == "table" then

if type(tag_set_tag) == "table" and is_subset_list(spec_tag, tag_set_tag) then

return true

end

elseif type(tag_set_tag) == "table" then

if contains(tag_set_tag, spec_tag) then

return true

end

elseif tag_set_tag == spec_tag then

return true

end

return false

end

-- ~~Loop over each~~ tag set and ~~compute categories for each one~~.

-- Check that the current tag set matches the given spec tag. This means that any of the tags

for _, ~~tag_set~~ in ipairs(~~split_tag_sets~~) do

-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the

~~local function make_function_table~~()

-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a

return {

-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more

~~lang=lang,~~

-- complex when multipart tags are present.

~~tags=normalized_tags,~~

local function tag_set_matches_spec_tag(spec_tag, tag_set, lang)

~~term=term,~~

spec_tag = normalize_tag(spec_tag, lang)

~~p=POS~~

for _, tag_set_tag in ipairs(tag_set) do

}

if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then

return true

end

return false

end

-- ~~Given a tag from~~ the current tag set ~~(which may be a list in case of a multipart tag),~~

-- Check whether the given spec matches the current tag set. Two values are returned:

~~-- and a tag from a categorization spec, check that the two match~~.

-- (1) whether the spec matches the tag set; (2) the index of the category to add if

-- (1) ~~If both are strings, we just check for equality.~~

-- the spec matches.

~~-- (2) If~~ the spec ~~tag is a string and~~ the tag set ~~tag is a list~~ (~~i.e. it originates from a~~

local function check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

~~-- multipart tag~~)~~, we check that~~ the ~~spec tag is in~~ the ~~list. This is because we want~~ to ~~treat~~

if type(spec) == "boolean" then

-- ~~multipart tags in user~~-~~specified tag sets as if~~ the ~~user had specified multiple tag sets~~.

return spec

~~-- For example~~, if ~~the user said "1//3|s|pres|ind" and the categorization~~ spec ~~says {~~"~~has~~"~~, "1"},~~

elseif type(spec) ~= "table" then

~~-- we want this to match, because~~ "~~1//3|s|pres|ind~~" ~~should be treated equivalently to two tag~~

error("Wrong type of condition " .. spec .. ": " .. type(spec))

end

-- (3) ~~If the spec tag is a list (i.e. it originates from a multipart tag~~)~~, we check that the~~

local predicate = spec[1]

~~-- tag set tag is also a list and is a superset of the~~ spec ~~tag. For example,~~ if ~~the categorization~~

if predicate == "has" then

~~-- spec says {~~"has"~~, "1//3"},~~ then ~~the tag set tag must be a multipart tag that has both "1" and "3"~~

return tag_set_matches_spec_tag(spec[2], tag_set, lang), 3

~~-- in it. "1//3" works~~, ~~as does "1//2//3".~~

elseif predicate == "hasall" then

~~local function tag_set_tag_matches_spec_tag(tag_set_tag~~, ~~spec_tag~~)

for _, tag in ipairs(spec[2]) do

~~if type(spec_tag)~~ == "~~table~~" then

if not tag_set_matches_spec_tag(tag, tag_set, lang) then

~~if type~~(~~tag_set_tag~~) ~~== "table" and is_subset~~(~~spec_tag~~, ~~tag_set_tag~~) then

return false, 3

return ~~true~~

end

elseif ~~type(tag_set_tag)~~ == "~~table~~" then

return true, 3

if ~~m_table.contains~~(~~tag_set_tag~~, ~~spec_tag~~) ~~then~~

elseif predicate == "hasany" then

~~return true~~

for _, tag in ipairs(spec[2]) do

~~end~~

if tag_set_matches_spec_tag(tag, tag_set, lang) then

~~elseif tag_set_tag == spec_tag~~ then

return true, 3

return true

end

~~return false~~

end

return false, 3

~~-- Check that the current tag set matches the given~~ spec ~~tag. This means that any of the tags~~

elseif predicate == "tags=" then

-- in ~~the current~~ tag set ~~match~~, ~~according to tag_set_tag_matches_spec_tag(~~)~~; see above. If the~~

local normalized_spec_tag_sets = normalize_tag_set(spec[2], lang)

-- ~~current tag set contains only string tags (i.e. no~~ multipart tags~~), and~~ the ~~spec tag is a~~

if #normalized_spec_tag_sets > 1 then

~~-- string (i.e~~. ~~not a multipart tag),~~ this ~~boils down to list containment, but it gets more~~

error("Internal error: No support for conjoined shortcuts in category/label specs in "

-- ~~complex when~~ multipart tags ~~are present~~.

.. "[[Module:form of/cats]] when processing spec tag set " .. concat(spec[2], "|"))

~~local function tag_set_matches_spec_tag~~(~~spec_tag~~)

end

~~spec_tag = normalize_tag~~(~~spec_tag~~)

local normalized_spec_tag_set = normalized_spec_tag_sets[1]

for _, ~~tag_set_tag~~ in ipairs(~~tag_set~~) do

-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove

if ~~tag_set_tag_matches_spec_tag~~(~~tag_set_tag~~, ~~spec_tag~~) ~~then~~

-- support for two-level multipart tags.

~~return true~~

for _, tag in ipairs(normalized_spec_tag_set) do

if type(tag) == "table" then

for _, subtag in ipairs(tag) do

if type(subtag) == "table" then

error("Internal error: No support for two-level multipart tags in category/label specs"

.. "[[Module:form of/cats]] when processing spec tag set "

.. concat(spec[2], "|"))

end

~~return false~~

end

-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,

-- ~~Check whether the given~~ spec ~~matches~~ the ~~current tag set~~. ~~Two values are returned:~~

-- we first check that both tag set tags and spec tags have the same length. If so, we sort the

-- ~~(1) whether the spec matches~~ the tag set~~; (2)~~ the ~~index of the category to add if~~

-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are

-- the ~~spec matches~~.

-- in the tag set tags.

local ~~function check_condition~~(~~spec~~)

if #tag_set ~= #normalized_spec_tag_set then

if type(~~spec~~) == "~~boolean~~" then

return false, 3

~~return spec~~

end

~~elseif~~ type(~~spec~~) ~= "table" then

local tag_set_tags = deep_copy(tag_set)

~~error("Wrong type of condition " .. spec .. ": " .. type~~(~~spec)~~)

for i=1,#tag_set_tags do

if type(tag_set_tags[i]) == "table" then

sort(tag_set_tags[i])

end

if type(normalized_spec_tag_set[i]) == "table" then

sort(normalized_spec_tag_set[i])

end

~~local predicate~~ = ~~spec[~~1]

end

~~if predicate == "has" then~~

for i=1,#tag_set_tags do

~~return tag_set_matches_spec_tag(spec[2])~~, 3

if not contains(tag_set_tags, normalized_spec_tag_set[i]) then

~~elseif predicate == "hasall" then~~

~~for _, tag in ipairs(spec[2]) do~~

if not ~~tag_set_matches_spec_tag~~(~~tag) then~~

~~return false~~, 3

~~end~~

~~return true, 3~~

~~elseif predicate == "hasany" then~~

~~for _, tag in ipairs(spec~~[2]~~) do~~

~~if tag_set_matches_spec_tag(tag~~) then

~~return true, 3~~

~~end~~

return false, 3

elseif predicate == "~~tags~~=" then

end

~~local normalized_spec_tags~~ = ~~export.normalize_tags~~(spec[2])

end

~~-- Allow tags to be in different orders, and multipart tags to~~

return true, 3

~~-- be in different orders. To handle this, we first check that~~

elseif predicate == "p=" then

~~-- both tag set tags and spec tags have the same length. If so,~~

return POS == normalize_pos(spec[2]), 3

~~-- we sort the multipart tags in the tag set tags and spec tags,~~

elseif predicate == "pany" then

~~-- and then check that all tags in the spec tags are in the~~

for _, specpos in ipairs(spec[2]) do

~~-- tag set tags.~~

if POS == normalize_pos(specpos) then

~~if #tag_set ~= #normalized_spec_tags then~~

~~return false~~, 3

~~end~~

~~local tag_set_tags = m_table.deepcopy(tag_set)~~

~~for i=1,#tag_set_tags do~~

~~if type(tag_set_tags[i])~~ == "~~table~~" then

~~table.sort~~(~~tag_set_tags~~[i])

~~end~~

if ~~type(normalized_spec_tags[i])~~ == ~~"table" then~~

~~table.sort~~(~~normalized_spec_tags[i])~~

~~end~~

~~for i=1,#tag_set_tags do~~

~~if not m_table.contains(tag_set_tags, normalized_spec_tags[i], "deepCompare"~~) then

~~return false, 3~~

~~end~~

return true, 3

~~elseif predicate == "p=" then~~

~~return POS == export.normalize_pos(spec[2]), 3~~

~~elseif predicate == "pany" then~~

~~for _, specpos in ipairs(spec[2]) do~~

~~if POS == export.normalize_pos(specpos) then~~

~~return true, 3~~

~~end~~

~~return false, 3~~

~~elseif predicate == "pexists" then~~

~~return POS ~= nil, 2~~

~~elseif predicate == "not" then~~

~~local condval = check_condition(spec[2])~~

~~return not condval, 3~~

~~elseif predicate == "and" then~~

~~local condval = check_condition(spec[2])~~

~~if condval then~~

~~condval = check_condition(spec[3])~~

~~end~~

~~return condval, 4~~

~~elseif predicate == "or" then~~

~~local condval = check_condition(spec[2])~~

~~if not condval then~~

~~condval = check_condition(spec[3])~~

~~end~~

~~return condval, 4~~

~~elseif predication == "call" then~~

~~local fn = m_functions.cat_functions[spec[2]]~~

~~if not fn then~~

~~error("No condition function named '" .. spec[2] .. "'")~~

~~end~~

~~return fn(make_function_table()), 3~~

~~else~~

~~error("Unrecognized predicate: " .. predicate)~~

end

return false, 3

elseif predicate == "pexists" then

return POS ~= nil, 2

elseif predicate == "not" then

local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

return not condval, 3

elseif predicate == "and" then

local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

if condval then

condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

end

return condval, 4

elseif predicate == "or" then

local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

if not condval then

condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

end

return condval, 4

elseif predicate == "call" then

return call_named_function(spec[2], "condition", normalized_tag_set, lang, POS, pagename, lemmas), 3

else

error("Unrecognized predicate: " .. predicate)

end

-- Process a given spec. This checks any conditions in the spec against the

-- tag set, and insert any resulting categories into `categories`. Return value

-- is true if the outermost condition evaluated to true and a category was inserted

-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates

-- to true).

local function process_spec(spec)

local function process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

if not spec then

return false

elseif type(spec) == "string" then

-- Substitute POS request with user-specified part of speech

-- A category. Substitute POS request with user-specified part of speech or default.

-- or default

spec = spec:gsub("<<p=(.-)>>", function(default)

spec = ~~rsub~~(~~spec,~~ "<<p=(.-)>>", function(default)

return POS or normalize_pos(default)

return POS or ~~export.~~normalize_pos(default)

end)

insert(categories, lang:getFullName() .. " " .. spec)

~~table.~~insert(categories, lang:~~getCanonicalName~~() .. " " .. spec)

return true

elseif type(spec) == "table" and spec.labels then

-- A label spec.

for _, label in ipairs(spec.labels) do

insert_if_not(labels, label)

end

return true

elseif type(spec) ~= "table" then

error("Wrong type of specification " .. spec .. ": " .. type(spec))

end

local predicate = spec[1]

if predicate == "multi" then

for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.

process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

end

return true

elseif predicate == "cond" then

for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.

if process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels) then

return true

~~elseif type(spec) ~= "table" then~~

~~error("Wrong type of specification " .. spec .. ": " .. type(spec))~~

~~end~~

~~local predicate = spec[1]~~

~~if predicate == "multi" then~~

~~-- WARNING! #spec doesn't work for objects loaded from loadData()~~

~~for i, sp in ipairs(spec) do~~

~~if i > 1 then~~

~~process_spec(sp)~~

~~end~~

~~return true~~

~~elseif predicate == "cond" then~~

~~-- WARNING! #spec doesn't work for objects loaded from loadData()~~

~~for i, sp in ipairs(spec) do~~

~~if i > 1 and process_spec(sp) then~~

~~return true~~

~~end~~

~~return false~~

~~elseif predicate == "call" then~~

~~local fn = m_functions.cat_functions[spec[2]]~~

~~if not fn then~~

~~error("No spec function named '" .. spec[2] .. "'")~~

~~end~~

~~return process_spec(fn(make_function_table()))~~

~~else~~

~~local condval, ifspec = check_condition(spec)~~

~~if condval then~~

~~process_spec(spec[ifspec])~~

~~return true~~

~~else~~

~~process_spec(spec[ifspec + 1])~~

~~-- FIXME: Are we sure this is correct?~~

~~return false~~

~~end~~

end

return false

elseif predicate == "call" then

return process_spec(

call_named_function(spec[2], "spec", normalized_tag_set, lang, POS, pagename, lemmas),

tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels

)

else

local condval, ifspec = check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)

if condval then

process_spec(spec[ifspec], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

return true

else

process_spec(spec[ifspec + 1], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

-- FIXME: Are we sure this is correct?

return false

end

local ~~langspecs~~ = ~~m_cats[~~lang:getCode()]

--[==[

Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are

represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.

Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,

and `POS` is the user-provided part of speech (which may be {nil}).]==]

function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)

local categories, labels = {}, {}

POS = normalize_pos(POS)

-- First split any two-level multipart tags into multiple sets, to make our life easier.

for _, tag_set in ipairs(split_two_level_multipart_tag_set(normalized_tag_set)) do

local langcode = lang:getCode()

local langspecs = (m_cats_data or get_m_cats_data())[langcode]

if langspecs then

for _, spec in ipairs(langspecs) do

process_spec(spec)

process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

end

if lang:~~getCode~~() ~= ~~"und"~~ then

local full_code = lang:getFullCode()

local langspecs = ~~m_cats~~[~~"und"~~]

if full_code ~= langcode then

local langspecs = (m_cats_data or get_m_cats_data())[full_code]

if langspecs then

for _, spec in ipairs(langspecs) do

process_spec(spec)

process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

end

if full_code ~= "und" then

local langspecs = (m_cats_data or get_m_cats_data())["und"]

if langspecs then

for _, spec in ipairs(langspecs) do

process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)

end

return categories, labels

end

fetch_categories_and_labels = export.fetch_categories_and_labels

return ~~categories~~

local function format_labels(labels, data, notext)

if labels and #labels > 0 then

return show_labels{

labels = labels,

lang = data.lang,

sort = data.sort,

nocat = data.nocat

} .. (notext and (data.pretext or "") == "" and "" or " ")

else

return ""

end

--[==[

Implementation of templates that display inflection tags, such as the general {{tl|inflection of}}, semi-specific

variants such as {{tl|participle of}}, and specific variants such as {{tl|past participle of}}. `data` contains all the

information controlling the display, with the following fields:

~~function export~~.tagged_inflections(data, ~~terminfo~~, ~~notext~~, ~~capfirst~~, ~~posttext~~, ~~joiner~~)

* `.lang`: ('''''required''''') Language to use when looking up language-specific inflection tags, categories and

if not data.tags then

labels, and for displaying categories and labels.

error("First argument must ~~now~~ be a table of arguments")

* `.tags`: ('''''required''' unless `.tag_sets` is given'') List of non-canonicalized inflection tags. Multiple tag sets

can be indicated by a {";"} as one of the tags, and tag-set properties may be attached to the last tag of a tag set.

The tags themselves may come directly from the user (as in {{tl|inflection of}}); come partly from the user (as in

{{tl|participle of}}, which adds the tag `part` to user-specified inflection tags); or be entirely specified by the

template (as in {{tl|past participle of}}).

* `.tag_sets`: ('''''required''' unless `.tags` is given'') List of non-canonicalized tag sets and associated

per-tag-set properties. Each element of the list is an object of the form

{ {tags = {"TAG", "TAG", ...}, labels = {"LABEL", "LABEL", ...}}. If `.tag_sets` is specified, `.tags` should not be

given and vice-versa. Specifying `.tag_sets` in place of tags allowed per-tag set labels to be specified; otherwise,

there is no advantage. [[Module:pt-gl-inflections]] uses this functionality to supply labels like {"Brazil"} and

{"Portugal"} associated with specific tag sets.

* `.lemmas`: ('''''recommended''''') List of objects describing the lemma(s) of which the term in question is a

non-lemma form. These are passed directly to {full_link()} in [[Module:links]]. Each object should have at minimum a

`.lang` field containing the language of the lemma and a `.term` field containing the lemma itself. Each object is

formatted using {full_link()} and then if there are more than one, they are joined using {serialCommaJoin()} in

[[Module:table]]. Alternatively, `.lemmas` can be a string, which is displayed directly. If omitted entirely, no lemma

links are shown and the connecting "of" is also omitted.

* `.lemma_face`: ('''''recommended''''') "Face" to use when displaying the lemma objects. Usually should be set to

{"term"}.

* `.POS`: ('''''recommended''''') Categorizing part-of-speech tag. Comes from the {{para|p}} or {{para|POS}} argument of

{{tl|inflection of}}.

* `.pagename`: Page name of "current" page or nil to use the actual page title; for testing purposes.

* `.conj`: Conjunction or separator to use when joining multiple lemma objects. Defaults to {"and"}.

* `.enclitics`: List of enclitics to display after the lemmas, in parens.

* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.

* `.no_format_categories`: If true, don't format the categories derived from the inflection tags; just return them.

* `.sort`: Sort key for formatted categories. Ignored when `.no_format_categories` = {true}.

* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).

* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)

* `.nolink`: Suppress linking of terms in inflection tags.

* `.capfirst`: Capitalize the first word displayed.

* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.

* `.posttext`: Additional text to display after the lemma links.

* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is

{"form-of-definition use-with-mention"}.

* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.

* `.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.

* `.nowrap`: Do not wrap the form-of definitions in a span, unless `.text_classes` is specified.

* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified

in-place to save memory; otherwise they will be copied before modifying.

A typical call might look like this (for {{m+|es|amo}}): {

local lang = require("Module:languages").getByCode("es")

local lemma_obj = {

lang = lang,

term = "amar",

}

return m_form_of.tagged_inflections({

lang = lang, tags = {"1", "s", "pres", "ind"}, lemmas = {lemma_obj}, lemma_face = "term", POS = "verb"

})

}

Normally, one value is returned, the formatted text, which has appended to it the formatted categories derived from the

tag-set-related categories generated by the specs in [Module:form of/cats]]. To suppress this, set

`data.no_format_categories` = {true}, in which case two values are returned, the formatted text without any formatted

categories appended and a list of the categories to be formatted.

NOTE: There are two sets of categories that may be generated: (1) categories derived directly from the tag sets, as

specified in [[Module:form of/cats]]; (2) categories derived from tag-set labels, either (a) set explicitly by the

caller in `data.tag_sets`, (b) specified by the user using `<lb:...>` attached to the last tag in a tag set, or

(c) specified in [[Module:form of/cats]]. The second type (label-related categories) are currently not returned in

the second return value of {tagged_inflections()}, and are currently inserted into the output text even if

`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also

suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is

set to {true}).]==]

function export.tagged_inflections(data)

if not data.tags and not data.tag_sets then

error("First argument must be a table of arguments, and `.tags` or `.tag_sets` must be specified")

end

if data.tags and data.tag_sets then

error("Both `.tags` and `.tag_sets` cannot be specified")

end

local tag_sets = data.tag_sets

if not tag_sets then

tag_sets = split_tag_set(data.tags)

for i, tag_set in ipairs(tag_sets) do

tag_sets[i] = parse_tag_set_properties(tag_set)

end

~~local cur_infl = {}~~

local inflections = {}

local categories = {}

for _, tag_set in ipairs(tag_sets) do

local normalized_tag_sets = normalize_tag_set(tag_set.tags, data.lang, "do-track")

local ~~ntags~~ = ~~export.normalize_tags~~(data.~~tags~~, ~~nil~~, ~~"do-track")~~

for _, normalized_tag_set in ipairs(normalized_tag_sets) do

local this_categories, this_labels = fetch_categories_and_labels(normalized_tag_set, data.lang,

~~for i~~, ~~tagspec in ipairs~~(~~ntags~~) do

data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)

~~if tagspec~~ == ";" ~~then~~

if not data.nocat then

if ~~#cur_infl > 0~~ then

extend(categories, this_categories)

~~table.insert~~(~~inflections~~, ~~table.concat(cur_infl)~~)

end

local cur_infl = get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner, data.nolink)

cur_infl = {}

~~else~~

~~local to_insert = export.get_tag_display_form~~(~~tagspec~~, data.~~joiner)~~

~~-- Maybe insert a space before inserting the display form~~

~~-- of the tag. We insert a space if~~

~~-- (a) we're not the first tag; and~~

~~-- (b) the tag we're about to insert doesn't have the~~

~~-- "no_space_on_left" property; and~~

~~-- (c) the preceding tag doesn't have the "no_space_on_right"~~

~~-- property.~~

~~-- NOTE: We depend here on the fact that~~

~~-- (1) all tags with either of the above properties set have the~~

~~-- same display form as canonical form~~, ~~and~~

~~-- (2) all tags with either of the above properties set are~~

~~-- single-character tags~~.

~~-- The second property is an optimization to avoid looking up~~

~~-- display forms resulting from multipart tags~~, ~~which won't be~~

~~-- found and which will trigger loading of [[Module:form of/data2]].~~

~~-- If multichar punctuation is added in the future, it's ok to~~

~~-- change the == 1 below to <= 2 or <= 3~~.

--

~~-- If the first property above fails to hold in the future, we~~

~~-- need to track the canonical form of each tag (including the~~

~~-- previous one~~) ~~as well as the display form. This would also~~

~~-- avoid the need for the == 1 check.~~

if #cur_infl > 0 then

~~local most_recent_tagobj = ulen(cur_infl[#cur_infl]) == 1 and~~

if tag_set.labels then

~~export.lookup_tag(cur_infl[#cur_infl])~~

this_labels = append(tag_set.labels, this_labels)

~~local to_insert_tagobj = ulen(to_insert) == 1 and~~

~~export.lookup_tag(to_insert)~~

if (

~~(not most_recent_tagobj or~~

~~not most_recent_tagobj~~.~~no_space_on_right) and~~

(~~not to_insert_tagobj or~~

~~not to_insert_tagobj.no_space_on_left)~~

~~) then~~

~~table~~.~~insert(cur_infl~~, ~~" "~~)

end

insert(inflections, {infl_text = cur_infl, labels = this_labels})

end

~~table.insert(cur_infl, to_insert)~~

end

if ~~#cur_infl > 0~~ then

local overall_labels, need_per_tag_set_labels

~~table.insert~~(~~inflections~~, ~~table~~.~~concat(cur_infl)~~)

for _, inflection in ipairs(inflections) do

if overall_labels == nil then

overall_labels = inflection.labels

elseif not deep_equals(overall_labels, inflection.labels) then

need_per_tag_set_labels = true

overall_labels = nil

break

end

~~local format_data = require~~(~~"Module:table"~~).~~shallowcopy(data)~~

if not need_per_tag_set_labels then

for _, inflection in ipairs(inflections) do

inflection.labels = nil

end

local format_data = shallow_copy(data)

local of_text = data.lemmas and " of" or ""

local formatted_text, this_categories

if #inflections == 1 then

format_data.text =

if need_per_tag_set_labels then

data.notext and "" or ((data.capfirst and ~~require("Module:string utilities").~~ucfirst(inflections[1]) or inflections[1]) ..

error("Internal error: need_per_tag_set_labels should not be set with one inflection")

~~(data.terminfo and " of" or ""~~))

end

~~return export.~~format_form_of(format_data)

format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or

((data.capfirst and ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))

formatted_text, this_categories = format_form_of(format_data)

else

format_data.text = data.notext and "" or ((data.capfirst and "Inflection" or "inflection") ..

format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or

~~(data.terminfo and " of" or ""~~))

((data.capfirst and "Inflection" or "inflection") .. of_text))

format_data.posttext = (data.posttext or "") .. ":"

local link = ~~export.~~format_form_of(format_data)

local link

local text_classes = data.text_classes ~~or "form-of-definition"~~

link, this_categories = format_form_of(format_data)

~~return link .."\n## " ..~~

local text_classes = data.text_classes

~~table.concat(inflections, "\n## ") .. ""~~

if text_classes == nil and not data.nowrap then

~~end~~

text_classes = "form-of-definition use-with-mention"

~~end~~

~~function export.to_Wikidata_IDs(tags, skip_tags_without_ids)~~

~~if type(tags) == "string" then~~

~~tags = mw.text.split(tags, "|", true)~~

~~end~~

~~local ret = {}~~

~~local function get_wikidata_id(tag)~~

~~if tag == ";"~~ and not ~~skip_tags_without_ids~~ then

~~error(~~"~~Semicolon is not supported for Wikidata IDs~~")

~~else~~

~~return nil~~

end

for i, inflection in ipairs(inflections) do

~~local data = export.lookup_tag~~(~~tag~~)

inflections[i] = "\n## " .. format_labels(inflection.labels, data, false) ..

wrap_in_span(inflection.infl_text, text_classes)

~~if not data or not data.wikidata then~~

~~if not skip_tags_without_ids then~~

~~error(~~"~~The tag~~ \"" .. ~~tag~~ .~~. "\" does not have a Wikidata ID defined in [[Module:form of/~~data~~]]"~~)

~~else~~

~~return nil~~

~~end~~

~~else~~

~~return data~~.~~wikidata~~

end

formatted_text = link .. concat(inflections)

end

if this_categories[1] then

extend(categories, this_categories)

end

~~for i, tag in ipairs(export~~.~~normalize_tags(tags)) do~~

if not data.no_format_categories then

if ~~type(tag) == "table"~~ then

if categories[1] then

~~local ids~~ = {}

formatted_text = formatted_text .. format_categories(categories, data.lang,

~~for _, onetag in ipairs(tag) do~~

data.sort, nil, export.force_cat)

~~table~~.~~insert~~(~~ids~~, ~~get_wikidata_id(onetag))~~

~~end~~

~~table~~.~~insert(ret~~, ~~ids)~~

~~else~~

~~table~~.~~insert(ret~~, ~~get_wikidata_id(tag)~~)

end

return formatted_text

end

return formatted_text, categories

return ~~ret~~

end

function export.dump_form_of_data(frame)

local data = {

~~data =~~ require(~~"Module:form of/data"~~),

require(form_of_data1_module),

~~data2 =~~ require(~~"Module:form of/data2"~~)

require(form_of_data2_module)

}

return require(~~"Module:JSON"~~).toJSON(data)

return require(json_module).toJSON(data)

end

export.form_of_cats_module = form_of_cats_module

export.form_of_data1_module = form_of_data1_module

export.form_of_data2_module = form_of_data2_module

export.form_of_functions_module = form_of_functions_module

export.form_of_lang_data_module_prefix = form_of_lang_data_module_prefix

export.headword_data_module = headword_data_module -- so all form-of modules stay in sync

return export

@@ Line 1: / Line 1: @@
-local m_links = require("Module:links")
+local export = {}
-local m_table = require("Module:table")
-local m_pos = mw.loadData("Module:form of/pos")
+export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages
-local m_functions = require("Module:form of/functions")
+local debug_track_module = "Module:debug/track"
+local etymology_module = "Module:etymology"
+local form_of_cats_module = "Module:form of/cats"
+local form_of_data_module = "Module:form of/data"
+local form_of_data1_module = "Module:form of/data/1"
+local form_of_data2_module = "Module:form of/data/2"
+local form_of_functions_module = "Module:form of/functions"
+local form_of_lang_data_module_prefix = "Module:form of/lang-data/"
+local function_module = "Module:fun"
+local headword_data_module = "Module:headword/data"
+local json_module = "Module:JSON"
+local labels_module = "Module:labels"
+local links_module = "Module:links"
+local load_module = "Module:load"
+local parse_utilities_module = "Module:parse utilities"
+local string_utilities_module = "Module:string utilities"
+local table_module = "Module:table"
+local table_deep_equals_module = "Module:table/deepEquals"
+local utilities_module = "Module:utilities"
+local anchor_encode = mw.uri.anchorEncode
+local concat = table.concat
+local dump = mw.dumpObject
+local fetch_categories_and_labels -- Defined below.
+local format_form_of -- Defined below.
+local get_tag_display_form -- Defined below.
+local get_tag_set_display_form -- Defined below.
+local insert = table.insert
+local ipairs = ipairs
+local is_link_or_html -- Defined below.
+local list_to_text = mw.text.listToText
+local lookup_shortcut -- Defined below.
+local lookup_tag -- Defined below.
+local normalize_tag_set -- Defined below.
+local parse_tag_set_properties -- Defined below.
+local require = require
+local sort = table.sort
+local split_tag_set -- Defined below.
+local type = type
+--[==[
+Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
+local function append(...)
+	append = require(table_module).append
+	return append(...)
+end
+local function contains(...)
+	contains = require(table_module).contains
+	return contains(...)
+end
+local function debug_track(...)
+	debug_track = require(debug_track_module)
+	return debug_track(...)
+end
+local function deep_copy(...)
+	deep_copy = require(table_module).deepCopy
+	return deep_copy(...)
+end
+local function deep_equals(...)
+	deep_equals = require(table_deep_equals_module)
+	return deep_equals(...)
+end
+local function extend(...)
+	extend = require(table_module).extend
+	return extend(...)
+end
+local function format_categories(...)
+	format_categories = require(utilities_module).format_categories
+	return format_categories(...)
+end
+local function full_link(...)
+	full_link = require(links_module).full_link
+	return full_link(...)
+end
+local function insert_if_not(...)
+	insert_if_not = require(table_module).insertIfNot
+	return insert_if_not(...)
+end
+local function is_subset_list(...)
+	is_subset_list = require(table_module).isSubsetList
+	return is_subset_list(...)
+end
+local function iterate_from(...)
+	iterate_from = require(function_module).iterateFrom
+	return iterate_from(...)
+end
+local function join_multiparts(...)
+	join_multiparts = require(form_of_functions_module).join_multiparts
+	return join_multiparts(...)
+end
+local function load_data(...)
+	load_data = require(load_module).load_data
+	return load_data(...)
+end
+local function parse_inline_modifiers(...)
+	parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
+	return parse_inline_modifiers(...)
+end
+local function remove_links(...)
+	remove_links = require(links_module).remove_links
+	return remove_links(...)
+end
+local function safe_load_data(...)
+	safe_load_data = require(load_module).safe_load_data
+	return safe_load_data(...)
+end
+local function safe_require(...)
+	safe_require = require(load_module).safe_require
+	return safe_require(...)
+end
+local function serial_comma_join(...)
+	serial_comma_join = require(table_module).serialCommaJoin
+	return serial_comma_join(...)
+end
+local function shallow_copy(...)
+	shallow_copy = require(table_module).shallowCopy
+	return shallow_copy(...)
+end
+local function show_labels(...)
+	show_labels = require(labels_module).show_labels
+	return show_labels(...)
+end
+local function slice(...)
+	slice = require(table_module).slice
+	return slice(...)
+end
+local function split(...)
+	split = require(string_utilities_module).split
+	return split(...)
+end
+local function ucfirst(...)
+	ucfirst = require(string_utilities_module).ucfirst
+	return ucfirst(...)
+end
+--[==[
+Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
+local cat_functions
+local function get_cat_functions()
+	cat_functions, get_cat_functions = require(form_of_functions_module).cat_functions, nil
+	return cat_functions
+end
+local default_pagename
+local function get_default_pagename()
+	default_pagename, get_default_pagename = load_data(headword_data_module).pagename, nil
+	return default_pagename
+end
+local display_handlers
+local function get_display_handlers()
+	display_handlers, get_display_handlers = require(form_of_functions_module).display_handlers, nil
+	return display_handlers
+end
+local m_cats_data
+local function get_m_cats_data()
+	m_cats_data, get_m_cats_data = load_data(form_of_cats_module), nil
+	return m_cats_data
+end
-local ulen = mw.ustring.len
+local m_data
-local rsubn = mw.ustring.gsub
+local function get_m_data()
-local rmatch = mw.ustring.match
+	-- Needs require.
-local rsplit = mw.text.split
+	m_data, get_m_data = require(form_of_data_module), nil
+	return m_data
+end
-local export = {}
+local m_data1
+local function get_m_data1()
+	m_data1, get_m_data1 = load_data(form_of_data1_module), nil
+	return m_data1
+end
+local m_data2
+local function get_m_data2()
+	m_data2, get_m_data2 = load_data(form_of_data2_module), nil
+	return m_data2
+end
+local m_pos_data
+local function get_m_pos_data()
+	m_pos_data, get_m_pos_data = load_data(headword_data_module).pos_aliases, nil
+	return m_pos_data
+end
---[=[
+--[==[ intro:
-This module implements the underlying processing of {{form of}},
+This module implements the underlying processing of {{tl|form of}}, {{tl|inflection of}} and specific variants such as
-{{inflection of}} and specific variants such as {{past participle of}}
+{{tl|past participle of}} and {{tl|alternative spelling of}}. Most of the logic in this file is to handle tags in
-and {{alternative spelling of}}. Most of the logic in this file is to
+{{tl|inflection of}}. Other related files:
-handle tags in {{inflection of}}. Other related files:
-* [[Module:form of/templates]] contains the majority of the logic that
+* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.
-  implements the templates themselves.
+* [[Module:form of/data/1]] is a data-only file containing information on the more common inflection tags, listing the
-* [[Module:form of/data]] is a data-only file containing information on
+  tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate
-  the more common inflection tags, listing the tags, their shortcuts,
+  glossary link and the wikidata ID.
-  the category they belong to (tense-aspect, case, gender, voice-valence,
+* [[Module:form of/data/2]] is a data-only file containing information on the less common inflection tags, in the same
-  etc.), the appropriate glossary link and the wikidata ID.
+  format as [[Module:form of/data/1]].
-* [[Module:form of/data2]] is a data-only file containing information on
+* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection
-  the less common inflection tags, in the same format as
+   tags for the language with code LANGCODE, in the same format as [[Module:form of/data/1]]. Language-specific tags
-  [[Module:form of/data]].
+   override general tags.
-* [[Module:form of/cats]] is a data-only file listing the
+* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the
-   language-specific categories that are added when the appropriate
+   appropriate combinations of tags are seen for a given language.
-   combinations of tags are seen for a given language.
+* [[Module:form of/functions]] contains functions for use with [[Module:form of/data/1]] and [[Module:form of/cats]].
-* [[Module:form of/pos]] is a data-only file listing the recognized
+  They are contained in this module because data-only modules can't contain code. The functions in this file are of two
-   parts of speech and their abbreviations, used for categorization.
+  types:
-  FIXME: This should be unified with the parts of speech listed in
+*# Display handlers allow for customization of the display of multipart tags (see below). Currently there is only
-  [[Module:links]].
+   one such handler, for handling multipart person tags such as `1//2//3`.
-* [[Module:form of/functions]] contains functions for use with
+*# Cat functions allow for more complex categorization logic, and are referred to by name in [[Module:form of/cats]].
-  [[Module:form of/data]] and [[Module:form of/cats]]. They are
+   Currently no such functions exist.
-  contained in this module because data-only modules can't contain
-  code. The functions in this file are of two types:
-  (1) Display handlers allow for customization of the display of
+The following terminology is used in conjunction with {{tl|inflection of}}:
-      multipart tags (see below). Currently there is only one
-	  such handler, for handling multipart person tags such as
-	  '1//2//3'.
-  (2) Cat functions allow for more complex categorization logic,
-      and are referred to by name in [[Module:form of/cats]].
-	  Currently no such functions exist.
-The following terminology is used in conjunction with {{inflection of}}:
+* A ''tag'' is a single grammatical item, as specified in a single numbered parameter of {{tl|inflection of}}. Examples
+  are `masculine`, `nominative`, or `first-person`. Tags may be abbreviated, e.g. `m` for `masculine`, `nom` for
+  `nominative`, or `1` for `first-person`. Such abbreviations are called ''aliases'', and some tags have multiple
+  equivalent aliases (e.g. `p` or `pl` for `plural`). The full, non-abbreviated form of a tag is called its
+  ''canonical form''.
+* The ''display form'' of a tag is the way it's displayed to the user. Usually the displayed text of the tag is the same
+  as its canonical form, and it normally functions as a link to a glossary entry explaining the tag. Usually the link is
+  to an entry in [[Appendix:Glossary]], but sometimes the tag is linked to an individual dictionary entry or to a
+  Wikipedia entry. Occasionally, the display text differs from the canonical form of the tag. An example is the tag
+  `comparative case`, which has the display text read as simply `comparative`. Normally, tags referring to cases don't
+  have the word "case" in them, but in this case the tag `comparative` was already used as an alias for the tag
+  `comparative degree`, so the tag was named `comparative case` to avoid clashing. A similar situation occurs with
+  `adverbial case` vs. the grammar tag `adverbial` (as in `adverbial participle`).
+* A ''tag set'' is an ordered list of tags, which together express a single inflection, for example, `1|s|pres|ind`,
+  which can be expanded to canonical-form tags as `first-person|singular|present|indicative`.
+* A ''conjoined tag set'' is a tag set that consists of multiple individual tag sets separated by a semicolon, e.g.
+  `1|s|pres|ind|;|2|s|imp`, which specifies two tag sets, `1|s|pres|ind` as above and `2|s|imp` (in canonical form,
+  `second-person|singular|imperative`). Multiple tag sets specified in a single call to {{tl|inflection of}} are
+  specified in this fashion. Conjoined tag sets can also occur in list-tag shortcuts.
+* A ''multipart tag'' is a tag that embeds multiple tags within it, such as `f//n` or `nom//acc//voc`. These are used in
+  the case of [[syncretism]], when the same form applies to multiple inflections. Examples are the Spanish present
+  subjunctive, where the first-person and third-person singular have the same form (e.g. {{m|es|siga}} from
+  {{m|es|seguir|t=to follow}}), or Latin third-declension adjectives, where the dative and ablative plural of all
+  genders have the same form (e.g. {{m|la|omnibus}} from {{m|la|omnis|t=all}}). These would be expressed respectively as
+  `1//3|s|pres|sub` and `dat//abl|m//f//n|p`, where the use of the multipart tag compactly encodes the syncretism and
+  avoids the need to individually list out all of the inflections. Multipart tags currently display as a list separated
+  by a slash, e.g.  ''dative/ablative'' or ''masculine/feminine/neuter'' where each individual word is linked
+  appropriately. As a special case, multipart tags involving persons display specially; for example, the multipart tag
+  `1//2//3` displays as ''first-, second- and third-person'', with the word "person" occurring only once.
+* A ''two-level multipart tag'' is a special type of multipart tag that joins two or more tag sets instead of joining
+  individual tags. The tags within the tag set are joined by a colon, e.g. `1:s//3:p`, which is displayed as
+  ''first-person singular and third-person plural'', e.g. for use with the form {{m|grc|μέλλον}} of the verb
+  {{m|grc|μέλλω|t=to intend}}, which uses the tag set `1:s//3:p|impf|actv|indc|unaugmented` to express the syncretism
+  between the first singular and third plural forms of the imperfect active indicative unaugmented conjugation.
+  Two-level multipart tags should be used sparingly; if in doubt, list out the inflections separately. [FIXME: Make
+  two-level multipart tags obsolete.]
+* A ''shortcut'' is a tag that expands to any type of tag described above, or to any type of tag set described above.
+  Aliases are a particular type of shortcut whose expansion is a single non-multipart tag.
+* A ''multipart shortcut'' is a shortcut that expands into a multipart tag, for example `123`, which expands to the
+  multipart tag `1//2//3`. Only the most common such combinations exist as shortcuts.
+* A ''list shortcut'' is a special type of shortcut that expands to a list of tags instead of a single tag. For example,
+  the shortcut `1s` expands to `1|s` (first-person singular). Only the most common such combinations exist as shortcuts.
+* A ''conjoined shortcut'' is a special type of list shortcut that consists of a conjoined tag set (multiple logical tag
+  sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the
+  common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
+  shortcut that expands to `simple|past`).]==]
-* A TAG is a single grammatical item, as specified in a single numbered
+-- Add tracking category for PAGE when called from {{inflection of}} or
-  parameter of {{inflection of}}. Examples are 'masculine', 'nominative',
+-- similar TEMPLATE. The tracking category linked to is
-  or 'first-person'. Tags may be abbreviated, e.g. 'm' for 'masculine',
+-- [[Wiktionary:Tracking/inflection of/PAGE]].
-  'nom' for 'nominative', or '1' for 'first-person'. Such abbreviations
+local function track(page)
-  are called SHORTCUTS, and some tags have multiple equivalent shortcuts
+	debug_track("inflection of/" ..
-  (e.g. 'p' or 'pl' for 'plural'). The full, non-abbreviated form of
+		-- avoid including links in pages (may cause error)
-  a tag is called its CANONICAL FORM.
+		page:gsub("%[", "("):gsub("%]", ")"):gsub("|", "!")
-* The DISPLAY FORM of a tag is the way it's displayed to the user. Usually
+	)
-  the displayed text of the tag is the same as its canonical form, and it
+end
-  normally functions as a link to a glossary entry explaining the tag.
-  Usually the link is to an entry in [[Appendix:Glossary]], but sometimes
-  the tag is linked to an individual dictionary entry or to a Wikipedia
-  entry. Occasionally, the display text differs from the canonical form of
-  the tag. An example is the tag 'comparative case', which has the display
-  text read as simply 'comparative'. Normally, tags referring to cases don't
-  have the word "case" in them, but in this case the tag 'comparative' was
-  already used as a shortcut for the tag 'comparative degree', so the tag was
-  named 'comparative case' to avoid clashing. A similar situation occurs
-  with 'adverbial case' vs. the grammar tag 'adverbial' (as in 'adverbial
-  participle').
-* A TAG SET is an ordered list of tags, which together express a single
-  inflection, for example, '1|s|pres|ind', which can be expanded to
-  canonical-form tags as 'first-person|singular|present|indicative'.
-  Multiple tag sets can be specified in a single call to {{inflection of}}
-  by separating the individual tag sets with a semicolon, e.g.
-  '1|s|pres|ind|;|2|s|imp', which specifies two tag sets, '1|s|pres|ind'
-  as above and '2|s|imp' (in canonical form,
-  'second-person|singular|imperative').
-* A MULTIPART TAG is a tag that embeds multiple tags within it, such as
-  'f//n' or 'nom//acc//voc'. These are used in the case of [[syncretism]],
-  when the same form applies to multiple inflections. Examples are the
-  Spanish present subjunctive, where the first-person and third-person
-  singular have the same form (e.g. [[siga]] from [[seguir]] "to follow"),
-  or Latin third-declension adjectives, where the dative and ablative
-  plural of all genders have the same form (e.g. [[omnibus]] from [[omnis]]
-  "all"). These would be expressed respectively as '1//3|s|pres|sub'
-  and 'dat//abl|m//f//n|p', where the use of the multipart tag compactly
-  encodes the syncretism and avoids the need to individually list out
-  all of the inflections. Multipart tags currently display as a list
-  separated by "and", ''dative and ablative'' or
-  ''masculine, feminine and neuter'' where each individual word is linked
-  appropriately. As a special case, multipart tags involving persons display
-  specially; for example, the multipart tag ''1//2//3'' displays as
-  ''first-, second- and third-person'', with the word "person" occurring
-  only once.
-* A TWO-LEVEL MULTIPART TAG is a special type of multipart tag that
-  joins two or more tag sets instead of joining individual tags. The tags
-  within the tag set are joined by a colon, e.g. '1:s//3:p', which is
-  displayed as ''first-person singular and third-person plural'', e.g.
-  for use with the form [[μέλλον]] of the verb [[μέλλω]] "to intend",
-  which uses the tag set '1:s//3:p|impf|actv|indc|unaugmented' to express
-  the syncretism between the first singular and third plural forms of the
-  imperfect active indicative unaugmented conjugation. Two-level multipart
-  tags should be used sparingly; if in doubt, list out the inflections
-  separately.
-* A MULTIPART TAG SHORTCUT is a shortcut that expands into a multipart
-  tag, for example '123', which expands to the multipart tag '1//2//3'.
-  Only the most common such combinations exist as shortcuts.
-* A LIST TAG SHORTCUT is a special type of shortcut that expands to a list
-  of tags instead of a single tag. For example, the shortcut '1s' expands to
-  '1|s' (first-person singular). Only the most common such combinations
-  exist as shortcuts.
-]=]
+local function wrap_in_span(text, classes)
+	if classes then
+		return ("<span class='%s'>%s</span>"):format(classes, text)
+	else
+		return text
+	end
+end
--- version of rsubn() that discards all but the first return value
+local function show_linked_term(data)
-local function rsub(term, foo, bar)
+	local termobj, face, span_classes, ok_to_destructively_modify, overall_lang, text_classes =
-	local retval = rsubn(term, foo, bar)
+		data.termobj, data.face, data.span_classes, data.ok_to_destructively_modify, data.overall_lang,
-	return retval
+		data.text_classes
+	local need_to_copy, pretext_lang
+	local categories = {}
+	if overall_lang and overall_lang:getCode() ~= termobj.lang:getCode() then
+		local lang_display
+		lang_display, categories = require(etymology_module).insert_source_cat_get_display {
+			lang = data.overall_lang,
+			source = termobj.lang,
+		}
+		pretext_lang = wrap_in_span(lang_display .. " ", text_classes)
+	end
+	local need_to_show_qualifiers = termobj.q or termobj.qq or termobj.a or termobj.aa or termobj.l or termobj.ll or
+		termobj.refs
+	need_to_copy = not ok_to_destructively_modify and (pretext_lang or need_to_show_qualifiers)
+	if need_to_copy then
+		termobj = shallow_copy(termobj)
+	end
+	if pretext_lang then
+		termobj.pretext = pretext_lang
+	end
+	if need_to_show_qualifiers then
+		termobj.show_qualifiers = true
+	end
+	return wrap_in_span(full_link(termobj, face), span_classes), categories
 end
+--[==[
+Lowest-level implementation of form-of templates, including the general {{tl|form of}} as well as those that deal with
+inflection tags, such as the general {{tl|inflection of}}, semi-specific variants such as {{tl|participle of}}, and
+specific variants such as {{tl|past participle of}}. `data` contains all the information controlling the display, with
+the following fields:
+* `.lang`: Overall language of the form-of template. If specified, any lemmas, enclitics or base lemmas that are of a
+   different language will have that language displayed before the term in question.
+* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.
+* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed
+   directly to {full_link()} in [[Module:links]]. Each object should have at minimum a `.lang` field containing the
+   language of the lemma and a `.term` field containing the lemma itself. Each object is formatted using {full_link()}
+   and then if there are more than one, they are joined using {serialCommaJoin()} in [[Module:table]]. Alternatively,
+   `.lemmas` can be a string, which is displayed directly, or omitted, to show no lemma links and omit the connecting
+   text.
+* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.
+* `.conj`: Conjunction or separator to use when joining multiple lemma objects. If {nil}, defaults to {"and"}. If this
+   has the value {false}, the lemmas are preceded with the `.separator` field in each lemma.
+* `.enclitics`: List of enclitics to display after the lemmas, in parens.
+* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.
+* `.base_lemmas`: List of base lemmas to display after the lemmas, in the case where the lemmas in `.lemmas` are
+   themselves forms of another lemma (the base lemma), e.g. a comparative, superlative or participle. Each object is of
+   the form { { paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...}, conj = "CONJ" }} where PARAM_OBJ describes
+   the properties of the base lemma parameter (i.e. the relationship between the intermediate and base lemmas);
+   LEMMA_OBJ is an object suitable to be passed to {full_link()} in [[Module:links]]; and CONJ is the conjunction to
+   join multiple lemmas with, defaulting to {"and"}. PARAM_OBJ is of the format
+   { { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{tl|inflection of}}
+   etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as
+   { {"comd"}} or { {"past", "part"}}.
+* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is
+   {"form-of-definition use-with-mention"}. Use `false` for no wrapping.
+* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
+   Use `false` for no wrapping.
+* `.posttext`: Additional text to display after the lemma links.
+* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified
+  in-place to save memory; otherwise they will be copied before modifying.
+Returns two values, the formatted string and any categories to add the page to (which will arise if `.lang` is
+specified and a language other than `.lang` is given in one of the lemmas in `.lemmas` or enclitics in `.enclitics`).
+]==]
 function export.format_form_of(data)
 	if type(data) ~= "table" then
-		error("First argument must now be a table of arguments")
+		error("Internal error: First argument must now be a table of arguments")
+	end
+	local text_classes = data.text_classes
+	if text_classes == nil and not data.nowrap then
+		text_classes = "form-of-definition use-with-mention"
+	end
+	local lemma_classes = data.lemma_classes
+	if lemma_classes == nil then
+		lemma_classes = "form-of-definition-link"
 	end
-	local text_classes = data.text_classes or "form-of-definition"
-	local terminfo_classes = data.text_classes or "form-of-definition-link"
 	local parts = {}
-	table.insert(parts, "<span class='" .. text_classes .. "'  style='font-weight: bold;'>")
+	if text_classes then
-	table.insert(parts, data.text)
+		insert(parts, "<span class='" .. text_classes .. "'>")
-	if data.text ~= "" and data.terminfo then
+	end
-		table.insert(parts, " ")
+	insert(parts, data.text)
+	if data.text ~= "" and data.lemmas then
+		insert(parts, " ")
 	end
-	if data.terminfo then
+	local categories = {}
-		table.insert(parts, "<span class='" .. terminfo_classes .. "'>")
+	if data.lemmas then
-		if type(data.terminfo) == "string" then
+		if type(data.lemmas) == "string" then
-			table.insert(parts, data.terminfo)
+			insert(parts, wrap_in_span(data.lemmas, lemma_classes))
 		else
-			table.insert(parts, m_links.full_link(data.terminfo, data.terminfo_face, false))
+			local formatted_terms = {}
+			for _, lemma in ipairs(data.lemmas) do
+				local linked_term, this_categories = show_linked_term {
+					termobj = lemma,
+					face = data.lemma_face,
+					span_classes = lemma_classes,
+					ok_to_destructively_modify = data.ok_to_destructively_modify,
+					overall_lang = data.lang,
+					text_classes = text_classes
+				}
+				if this_categories[1] then
+					extend(categories, this_categories)
+				end
+				if data.conj == false and lemma.separator then
+					insert(formatted_terms, lemma.separator)
+				end
+				insert(formatted_terms, linked_term)
+			end
+			if data.conj == false then
+				insert(parts, concat(formatted_terms))
+			else
+				insert(parts, serial_comma_join(formatted_terms, {conj = data.conj or "and"}))
+			end
+		end
+		if data.lit then
+			insert(parts, ", literally " .. require(links_module).mark(data.lit, "gloss"))
 		end
-		table.insert(parts, "</span>")
 	end
+	if data.enclitics and #data.enclitics > 0 then
+		-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or
+		-- they will clash with upright parens generated by link annotations such as transliterations and pos=.
+		if text_classes then
+			insert(parts, "</span>")
+		end
+		local formatted_terms = {}
+		for _, enclitic in ipairs(data.enclitics) do
+			-- FIXME, should we have separate clitic face and/or classes?
+			local linked_term, this_categories = show_linked_term {
+				termobj = enclitic,
+				face = data.lemma_face,
+				span_classes = lemma_classes,
+				ok_to_destructively_modify = data.ok_to_destructively_modify,
+				overall_lang = data.lang,
+				text_classes = text_classes
+			}
+			if this_categories[1] then
+				extend(categories, this_categories)
+			end
+			insert(formatted_terms, linked_term)
+		end
+		insert(parts, " (")
+		insert(parts, wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))
+		insert(parts, serial_comma_join(formatted_terms, {conj = data.enclitic_conj or "and"}))
+		insert(parts, ")")
+		if text_classes then
+			insert(parts, "<span class='" .. text_classes .. "'>")
+		end
+	end
+	if data.base_lemmas and #data.base_lemmas > 0 then
+		for _, base_lemma in ipairs(data.base_lemmas) do
+			insert(parts, ", the ")
+			if text_classes then
+				insert(parts, "</span>")
+			end
+			insert(parts, (export.tagged_inflections {
+				lang = data.lang or base_lemma.lemmas[1].lang,
+				tags = base_lemma.paramobj.tags,
+				lemmas = base_lemma.lemmas,
+				conj = base_lemma.conj or "and",
+				lemma_face = data.lemma_face,
+				no_format_categories = true,
+				nocat = true,
+				text_classes = data.text_classes,
+				ok_to_destructively_modify = ok_to_destructively_modify,
+			}))
+			if text_classes then
+				insert(parts, "<span class='" .. text_classes .. "'>")
+			end
+		end
+	end
+	-- FIXME, should posttext go before enclitics? If so we need to have separate handling for the
+	-- final colon when there are multiple tag sets in tagged_inflections().
 	if data.posttext then
-		table.insert(parts, data.posttext)
+		insert(parts, data.posttext)
+	end
+	if text_classes then
+		insert(parts, "</span>")
 	end
-	table.insert(parts, "</span>")
+	return concat(parts), categories
-	return table.concat(parts)
+end
+format_form_of = export.format_form_of
+--[==[
+Return true if `tag` contains an internal link or HTML.]==]
+function export.is_link_or_html(tag)
+	return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)
 end
+is_link_or_html = export.is_link_or_html
+--[==[
+Look up a tag (either a shortcut of any sort of a canonical long-form tag) and return its expansion. The expansion
+will be a string unless the shortcut is a list-tag shortcut such as `1s`; in that case, the expansion will be a
+list. The caller must handle both cases. Only one level of expansion happens; hence, `acc` expands to {"accusative"},
+`1s` expands to { {"1", "s"}} (not to { {"first", "singular"}}) and `123` expands to {"1//2//3"}. The expansion will be
+the same as the passed-in tag in the following circumstances:
-local function is_link_or_html(tag)
+# The tag is `;` (this is special-cased, and no lookup is done).
-	return tag:find("[[", nil, true) or tag:find("|", nil, true) or
+# The tag is a multipart tag such as `nom//acc` (this is special-cased, and no lookup is done).
-		tag:find("<", nil, true)
+# The tag contains a raw link (this is special-cased, and no lookup is done).
-end
+# The tag contains HTML (this is special-cased, and no lookup is done).
+# The tag is already a canonical long-form tag.
+# The tag is unrecognized.
+This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
+[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and finally (only if the tag is not
+recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data/2]].
--- Look up a tag (either a shortcut of any sort of a canonical long-form tag)
+If the expansion is a string and is different from the tag, track it if `do_track` is true.]==]
--- and return its expansion. The expansion will be a string unless the
+function export.lookup_shortcut(tag, lang, do_track)
--- shortcut is a list-tag shortcut such as "1s"; in that case, the expansion
--- will be a list. The caller must handle both cases. Only one level of
--- expansion happens; hence, "acc" expands to "accusative", "1s" expands to
--- {"1", "s"} (not to {"first", "singular"}) and "123" expands to "1//2//3".
--- The expansion will be the same as the passed-in tag in the following
--- circumstances:
---
--- 1. The tag is ";" (this is special-cased, and no lookup is done).
--- 2. The tag is a multipart tag such as "nom//acc" (this is special-cased,
---    and no lookup is done).
--- 3. The tag contains a raw link (this is special-cased, and no lookup is
---    done).
--- 4. The tag contains HTML (this is special-cased, and no lookup is done).
--- 5. The tag is already a canonical long-form tag.
--- 6. The tag is unrecognized.
---
--- This function first looks up in [[Module:form of/data]] (which includes
--- more common tags) and then (only if the tag is not recognized as a
--- shortcut or canonical tag, and is not of types 1-4 above) in
--- [[Module:form of/data2]].
---
--- If the expansion is a string and is different from the tag, track it if
--- DO_TRACK is true.
-function export.lookup_shortcut(tag)
 	-- If there is HTML or a link in the tag, return it directly; don't try
 	-- to look it up, which will fail.
@@ Line 188: / Line 521: @@
 		return tag
 	end
-	local m_data = mw.loadData("Module:form of/data")
+	local expansion
-	-- If this is a canonical long-form tag, just return it, and don't
+	while lang do
-	-- check for shortcuts (which will cause [[Module:form of/data2]] to be
+		local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())
-	-- loaded).
+		-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
-	if m_data.tags[tag] then
+		-- optimization; see below.
-		return tag
+		if langdata then
+			if langdata.tags[tag] then
+				return tag
+			end
+			expansion = langdata.shortcuts[tag]
+			if expansion then
+				break
+			end
+		end
+		-- If the language has a parent (i.e. a superordinate variety), try again with that.
+		lang = lang:getParent()
+	end
+	if not expansion then
+		-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause
+		-- [[Module:form of/data/2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data/1]] --
+		-- or, for that matter, in [[Module:form of/data/2]]). This is an optimization; the code will still work without
+		-- it, but will use up more memory.
+		if (m_data1 or get_m_data1()).tags[tag] then
+			return tag
+		end
+		expansion = m_data1.shortcuts[tag]
 	end
-	local expansion = m_data.shortcuts[tag]
 	if not expansion then
-		local m_data2 = mw.loadData("Module:form of/data2")
+		expansion = (m_data2 or get_m_data2()).shortcuts[tag]
-		expansion = m_data2.shortcuts[tag]
 	end
 	if not expansion then
 		return tag
+	end
+	-- Maybe track the expansion if it's not the same as the raw tag.
+	if do_track and expansion ~= tag and type(expansion) == "string" then
+		track("tag/" .. tag)
 	end
 	return expansion
 end
+lookup_shortcut = export.lookup_shortcut
+--[==[
--- Look up a normalized/canonicalized tag and return the data object
+Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return
--- associated with it. If the tag isn't found, return nil. This first looks up
+nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
--- in [[Module:form of/data]] (which includes more common tags) and then in
+[[Module:form of/data/1]] (which includes more common non-lang-specific tags) and then finally in
--- [[Module:form of/data2]].
+[[Module:form of/data/2]].]==]
-function export.lookup_tag(tag)
+function export.lookup_tag(tag, lang)
-	local m_data = mw.loadData("Module:form of/data")
+	while lang do
-	local tagobj = m_data.tags[tag]
+		local langdata = safe_load_data(form_of_lang_data_module_prefix .. lang:getCode())
+		local tag = langdata and langdata.tags[tag]
+		if tag then
+			return tag
+		end
+		-- If the language has a parent (i.e. a superordinate variety), try again with that.
+		lang = lang:getParent()
+	end
+	local tagobj = (m_data1 or get_m_data1()).tags[tag]
 	if tagobj then
 		return tagobj
 	end
-	local m_data2 = mw.loadData("Module:form of/data2")
+	local tagobj2 = (m_data2 or get_m_data2()).tags[tag]
-	local tagobj2 = m_data2.tags[tag]
 	if tagobj2 then
 		return tagobj2
@@ Line 224: / Line 587: @@
 	return nil
 end
+lookup_tag = export.lookup_tag
+-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list
--- Normalize a single tag, which may be a shortcut but should not be a
+-- shortcut.
--- multipart tag, a multipart-tag shortcut or a list-tag shortcut.
+local function normalize_single_tag(tag, lang, do_track)
-local function normalize_single_tag(tag)
+	local expansion = lookup_shortcut(tag, lang, do_track)
-	local expansion = export.lookup_shortcut(tag)
 	if type(expansion) ~= "string" then
-		error("Tag '" .. tag .. "' is a list-tag shortcut, which is not allowed here")
+		error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")
 	end
 	tag = expansion
+	if not lookup_tag(tag, lang) and do_track then
+		-- If after all expansions and normalizations we don't recognize the canonical tag, track it.
+		track("unknown")
+		track("unknown/" .. tag)
+	end
 	return tag
 end
+--[=[
--- Normalize a component of a multipart tag. This should not have any // in it,
+Normalize a component of a multipart tag. This should not have any // in it, but may join multiple individual tags with
--- but may join multiple individual tags with a colon, and may be a single
+a colon, and may be a single list-tag shortcut, which is treated as if colon-separated. The return value may be a list
--- list-tag shortcut, which is treates as if colon-separated. If
+of tags.
--- RECOMBINE_TAGS isn't given, the return value may be a list of tags;
+]=]
--- otherwise, it will always be a string, and multiple tags will be
+local function normalize_multipart_component(tag, lang, do_track)
--- represented as canonical-form tags joined by ":".
+	-- If there is HTML or a link in the tag, don't try to split on colon. A colon may legitimately occur in either one,
-local function normalize_multipart_component(tag, recombine_tags)
+	-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't
-	-- If there is HTML or a link in the tag, don't try to split on colon.
+	-- expect to occur in links or HTML; see comment in normalize_tag().
-	-- A colon may legitimately occur in either one, and we don't want
-	-- these things parsed. Note that we don't do this check before splitting
-	-- on //, which we don't expect to occur in links or HTML; see comment
-	-- in normalize_tag().
 	if is_link_or_html(tag) then
 		return tag
 	end
-	local components = rsplit(tag, ":", true)
+	local components = split(tag, ":", true)
 	if #components == 1 then
 		-- We allow list-tag shortcuts inside of multipart tags, e.g.
 		-- '1s//3p'. Check for this now.
-		tag = export.lookup_shortcut(tag)
+		tag = lookup_shortcut(tag, lang, do_track)
 		if type(tag) == "table" then
+			-- Temporary tracking as we will disallow this.
+			track("list-tag-inside-of-multipart")
 			-- We found a list-tag shortcut; treat as if colon-separated.
 			components = tag
 		else
-			return normalize_single_tag(tag)
+			return normalize_single_tag(tag, lang, do_track)
 		end
 	end
 	local normtags = {}
+	-- Temporary tracking as we will disallow this.
+	track("two-level-multipart")
 	for _, component in ipairs(components) do
-		table.insert(normtags, normalize_single_tag(component))
+		if do_track then
+			-- There are multiple components; track each of the individual
+			-- raw tags.
+			track("tag/" .. component)
+		end
+		insert(normtags, normalize_single_tag(component, lang, do_track))
 	end
-	if recombine_tags then
+	return normtags
-		return table.concat(normtags, ":")
-	else
-		return normtags
-	end
 end
+--[=[
--- Normalize a single tag. If RECOMBINE_TAGS isn't given, the return value
+Normalize a single tag. The return value may be a list (in the case of multipart tags), which will contain nested lists
--- may be a list (in the case of multipart tags), which will contain nested
+in the case of two-level multipart tags.
--- lists in the case of two-level multipart tags; otherwise, it will always
+]=]
--- be a string, and multipart tags will be represented as canonical-form tags
+local function normalize_tag(tag, lang, do_track)
--- joined by "//" and/or ":".
+	-- We don't check for links or HTML before splitting on //, which we don't expect to occur in links or HTML. Doing
-local function normalize_tag(tag, recombine_multitags)
+	-- it this way allows for a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly (the template calls
-	-- We don't check for links or HTML before splitting on //, which we
+	-- will be expanded before we process the tag, and will contain links and HTML). The only check we do is for a URL,
-	-- don't expect to occur in links or HTML. Doing it this way allows for
+	-- which shouldn't normally occur, but might if the user tries to put an external link into the tag. URL's with //
-	-- a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly
+	-- normally have the sequence ://, which should never normally occur when // and : are used in their normal ways.
-	-- (the template calls will be expanded before we process the tag, and
-	-- will contain links and HTML). The only check we do is for a URL,
-	-- which shouldn't normally occur, but might if the user tries to put
-	-- an external link into the tag. URL's with // normally have the
-	-- sequence ://, which should never normally occur when // and : are
-	-- used in their normal ways.
 	if tag:find("://", nil, true) then
 		return tag
 	end
-	local split_tags = rsplit(tag, "//", true)
+	local split_tags = split(tag, "//", true)
 	if #split_tags == 1 then
-		local retval = normalize_multipart_component(tag, recombine_multitags)
+		local retval = normalize_multipart_component(tag, lang, do_track)
 		if type(retval) == "table" then
-			-- The user gave a tag like '1:s', i.e. with colon but without
+			-- The user gave a tag like '1:s', i.e. with colon but without //. Allow this, but we need to return a
-			-- //. Allow this, but we need to return a nested list. Note,
+			-- nested list.
-			-- this will never happen when RECOMBINE_TAGS is given.
 			return {retval}
 		end
@@ Line 310: / Line 671: @@
 	local normtags = {}
 	for _, single_tag in ipairs(split_tags) do
-		table.insert(normtags, normalize_multipart_component(single_tag,
+		if do_track then
-			recombine_multitags))
+			-- If the tag was a multipart tag, track each of individual raw tags.
-	end
+			track("tag/" .. single_tag)
-	if recombine_multitags then
+		end
-		return table.concat(normtags, "//")
+		insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
-	else
-		return normtags
 	end
+	return normtags
 end
+--[==[
+Normalize a tag set (a list of tags) into its canonical-form tags. The return value is a list of normalized tag sets
+(a list because of there may be conjoined shortcuts among the input tags). A normalized tag set is a list of tag
+elements, where each element is either a string (the canonical form of a tag), a list of such strings (in the case of
+multipart tags) or a list of lists of such strings (in the case of two-level multipart tags). For example, the multipart
+tag `nom//acc//voc` will be represented in canonical form as { {"nominative", "accusative", "vocative"}}, and the
+two-level multipart tag `1:s//3:p` will be represented as { {{"first-person", "singular"}, {"third-person", "plural"}}}.
--- Normalize a tag set (a list of tags) into a list of canonical-form tags
+Example 1:
--- (which -- may be larger due to the possibility of list-tag shortcuts).
--- If RECOMBINE_TAGS isn't given, the return list may itself contains lists;
+{normalize_tag_set({"nom//acc//voc", "n", "p"})} = { {{{"nominative", "accusative", "vocative"}, "masculine", "plural"}}}
--- in particular, multipart tags will be represented as lists. Specifically,
--- the list will consist of the elements of the multipart tag, which will
+Example 2:
--- either be canonical-form strings or (in the case of two-level multipart
--- tags) nested lists of canonical-form strings. For example, the multipart
+{normalize_tag_set({"ed-form"}, ENGLISH)} = { {{"simple", "past"}, {"past", "participle"}}}
--- tag ''nom//acc//voc'' will expand to
---   {"nominative", "accusative", "vocative"}
+Example 3:
--- and the two-level multipart tag ''1:s//3:p'' will expand to
---   {{"first-person", "singular"}, {"third-person", "plural"}}.
+{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}]==]
--- If RECOMBINE_TAGS is given, multipart tags will be represented in string
+function export.normalize_tag_set(tag_set, lang, do_track)
--- form, i.e. as canonical-form tags joined by "//" and/or ":".
+	-- We track usage of shortcuts, normalized forms and (in the case of multipart tags or list tags) intermediate
-function export.normalize_tags(tags, recombine_multitags)
+	-- forms. For example, if the tags 1s|mn|gen|indefinite are passed in, we track the following:
-	local ntags = {}
+	-- [[Wiktionary:Tracking/inflection of/tag/1s]]
-	for _, tag in ipairs(tags) do
+	-- [[Wiktionary:Tracking/inflection of/tag/1]]
-		-- Expand the tag, which may generate a new tag (either a
+	-- [[Wiktionary:Tracking/inflection of/tag/s]]
-		-- fully canonicalized tag, a multipart tag, or a list of tags).
+	-- [[Wiktionary:Tracking/inflection of/tag/first-person]]
-		tag = export.lookup_shortcut(tag)
+	-- [[Wiktionary:Tracking/inflection of/tag/singular]]
+	-- [[Wiktionary:Tracking/inflection of/tag/mn]]
+	-- [[Wiktionary:Tracking/inflection of/tag/m//n]]
+	-- [[Wiktionary:Tracking/inflection of/tag/m]]
+	-- [[Wiktionary:Tracking/inflection of/tag/n]]
+	-- [[Wiktionary:Tracking/inflection of/tag/masculine]]
+	-- [[Wiktionary:Tracking/inflection of/tag/neuter]]
+	-- [[Wiktionary:Tracking/inflection of/tag/gen]]
+	-- [[Wiktionary:Tracking/inflection of/tag/genitive]]
+	-- [[Wiktionary:Tracking/inflection of/tag/indefinite]]
+	local output_tag_set = {}
+	local saw_semicolon = false
+	for _, tag in ipairs(tag_set) do
+		if do_track then
+			-- Track the raw tag.
+			track("tag/" .. tag)
+		end
+		-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
+		-- of tags).
+		tag = lookup_shortcut(tag, lang, do_track)
 		if type(tag) == "table" then
+			if contains(tag, ";") then
+				-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single
+				-- tag set into multiple.
+				saw_semicolon = true
+				break
+			end
 			for _, t in ipairs(tag) do
-				table.insert(ntags, normalize_tag(t, recombine_multitags))
+				if do_track then
+					-- If the tag expands to a list of raw tags, track each of those.
+					track("tag/" .. t)
+				end
+				insert(output_tag_set, normalize_tag(t, lang, do_track))
 			end
 		else
-			table.insert(ntags, normalize_tag(tag, recombine_multitags))
+			insert(output_tag_set, normalize_tag(tag, lang, do_track))
 		end
 	end
-	return ntags
-end
+	if not saw_semicolon then
+		return {output_tag_set}
+	end
--- Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
+	-- Use a more general algorithm that handles conjoined shortcuts.
--- Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
+	output_tag_set = {}
--- will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
--- and two-level multipart tags are represented as lists of lists, as is output by normalize_tags().
--- NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
--- <code>def:s//p</code> (or the reverse, <code>s//def:p</code>).
-function export.split_two_level_multipart_tag_set(tag_set)
-	-- This would be a whole lot easier in Python, with built-in support for
-	-- slicing and array concatenation.
 	for i, tag in ipairs(tag_set) do
+		if do_track then
+			-- Track the raw tag.
+			track("tag/" .. tag)
+		end
+		-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
+		-- of tags).
+		tag = lookup_shortcut(tag, lang, do_track)
 		if type(tag) == "table" then
-			-- We saw a multipart tag. Check if any of the parts are two-level.
+			local output_tag_sets = {}
-			local saw_two_level_tag = false
+			local shortcut_tag_sets = split_tag_set(tag)
-			for _, first_level_tag in ipairs(tag) do
+			local normalized_shortcut_tag_sets = {}
-				if type(first_level_tag) == "table" then
+			for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do
-					saw_two_level_tag = true
+				extend(normalized_shortcut_tag_sets,
-					break
+					normalize_tag_set(shortcut_tag_set, lang, do_track))
-				end
 			end
-			if saw_two_level_tag then
+			local after_tags = slice(tag_set, i + 1)
-				-- We found a two-level multipart tag.
+			local normalized_after_tags_sets = normalize_tag_set(after_tags, lang, do_track)
-				-- (1) Extract the preceding tags.
+			for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do
-				local pre_tags = {}
+				for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do
-				for j=1,i-1 do
+					insert(output_tag_sets, append(output_tag_set, normalized_shortcut_tag_set,
-					table.insert(pre_tags, tag_set[j])
+						normalized_after_tags_set))
 				end
-				-- (2) Extract the following tags.
-				local post_tags = {}
-				for j=i+1,#tag_set do
-					table.insert(post_tags, tag_set[j])
-				end
-				-- (3) Loop over each tag set alternant in the two-level multipart tag.
-				-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
-				-- and recursively split that tag set.
-				local resulting_tag_sets = {}
-				for _, first_level_tag_set in ipairs(tag) do
-					local expanded_tag_set = {}
-					for _, pre_tag in ipairs(pre_tags) do
-						table.insert(expanded_tag_set, pre_tag)
-					end
-					-- The second level may have a string or a list.
-					if type(first_level_tag_set) == "table" then
-						for _, second_level_tag in ipairs(first_level_tag_set) do
-							table.insert(expanded_tag_set, second_level_tag)
-						end
-					else
-						table.insert(expanded_tag_set, first_level_tag_set)
-					end
-					for _, post_tag in ipairs(post_tags) do
-						table.insert(expanded_tag_set, post_tag)
-					end
-					for _, split_tag_set in ipairs(export.split_two_level_multipart_tag_set(expanded_tag_set)) do
-						table.insert(resulting_tag_sets, split_tag_set)
-					end
-				end
-				return resulting_tag_sets
 			end
+			return output_tag_sets
+		else
+			insert(output_tag_set, normalize_tag(tag, lang, do_track))
 		end
 	end
-	return {tag_set}
+	error("Internal error: Should not get here")
 end
+normalize_tag_set = export.normalize_tag_set
+--[==[
--- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
+Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.]==]
-function export.split_tags_into_tag_sets(tags)
+function export.split_tag_set(tag_set)
-	local tag_set_group = {}
+	local split_tag_sets = {}
 	local cur_tag_set = {}
-	for _, tag in ipairs(tags) do
+	for _, tag in ipairs(tag_set) do
 		if tag == ";" then
 			if #cur_tag_set > 0 then
-				table.insert(tag_set_group, cur_tag_set)
+				insert(split_tag_sets, cur_tag_set)
 			end
 			cur_tag_set = {}
 		else
-			table.insert(cur_tag_set, tag)
+			insert(cur_tag_set, tag)
 		end
 	end
 	if #cur_tag_set > 0 then
-		table.insert(tag_set_group, cur_tag_set)
+		insert(split_tag_sets, cur_tag_set)
 	end
-	return tag_set_group
+	return split_tag_sets
 end
+split_tag_set = export.split_tag_set
+local tag_set_param_mods = {
+	lb = {
+		item_dest = "labels",
+		convert = function(arg, parse_err)
+			return split(arg, "//", true)
+		end,
+	}
+}
--- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
+--[==[
--- Then, potentially split each tag set into multiple tag sets if there are any two-level multipart
+Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only
--- tags in those tag sets.
+per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be
-function export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(tags)
+attached to the last tag.]==]
-	-- First, split into tag sets.
+function export.parse_tag_set_properties(tag_set)
-	local tag_sets = export.split_tags_into_tag_sets(tags)
+	local function generate_tag_set_obj(last_tag)
-	-- Now split any two-level multipart tags.
+		tag_set[#tag_set] = last_tag
-	local resulting_tag_sets = {}
+		return {tags = tag_set}
-	for _, tag_set in ipairs(tag_sets) do
+	end
-		for _, resulting_tag_set in ipairs(export.split_two_level_multipart_tag_set(tag_set)) do
+	local last_tag = tag_set[#tag_set]
-			table.insert(resulting_tag_sets, resulting_tag_set)
+	-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or
-		end
+	-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of
+	-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <lb:...>, so if
+	-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
+	-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
+	if last_tag:find("<", nil, true) and not last_tag:find("^[^<]*<%l*[^%l:]") then
+		return parse_inline_modifiers(last_tag, {
+			param_mods = tag_set_param_mods,
+			generate_obj = generate_tag_set_obj,
+		})
+	else
+		return generate_tag_set_obj(last_tag)
 	end
-	return resulting_tag_sets
 end
+parse_tag_set_properties = export.parse_tag_set_properties
+local function normalize_pos(pos)
-function export.normalize_pos(pos)
+	if not pos then
-	return m_pos[pos] or pos
+		return nil
+	end
+	return (m_pos_data or get_m_pos_data())[pos] or pos
 end
 -- Return the display form of a single canonical-form tag. The value
 -- passed in must be a string (i.e. it cannot be a list describing a
 -- multipart tag). To handle multipart tags, use get_tag_display_form().
-local function get_single_tag_display_form(normtag)
+-- A truthy `nolink` suppresses linking.
-	local data = export.lookup_tag(normtag)
+local function get_single_tag_display_form(normtag, lang, nolink)
+	local data = lookup_tag(normtag, lang)
+	local display = normtag
 	-- If the tag has a special display form, use it
 	if data and data.display then
-		normtag = data.display
+		display = data.display
+		if nolink then
+			display = remove_links(display)
+		end
 	end
 	-- If there is a nonempty glossary index, then show a link to it
-	if data and data.glossary then
+	if not nolink then
-		if data.glossary_type == "wikt" then
+		local glossary = data and data[(m_data or get_m_data()).GLOSSARY]
-			normtag = "[[" .. data.glossary .. "|" .. normtag .. "]]"
+		if glossary ~= nil then
-		elseif data.glossary_type == "wp" then
+			if glossary == m_data.WIKT then
-			normtag = "[[w:" .. data.glossary .. "|" .. normtag .. "]]"
+				display = "[[wikt:" .. normtag .. "|" .. display .. "]]"
-		else
+			elseif glossary == m_data.WP then
-			normtag = "[[wikt:Appendix:Glossary#" .. mw.uri.anchorEncode(data.glossary) .. "|" .. normtag .. "]]"
+				display = "[[w:" .. normtag .. "|" .. display .. "]]"
+			elseif glossary == m_data.APPENDIX then
+				display = "[[wikt:Appendix:Glossary#" .. anchor_encode(normtag) .. "|" .. display .. "]]"
+			elseif type(glossary) ~= "string" then
+				error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
+					type(glossary), dump(glossary), normtag))
+			else
+				local link = glossary:match("^(wikt:.*)")
+				if link then
+					display = "[[" .. link .. "|" .. display .. "]]"
+				end
+				if not link then
+					link = glossary:match("^w:(.*)")
+					if link then
+						display = "[[w:" .. link .. "|" .. display .. "]]"
+					end
+				end
+				if not link then
+					display = "[[wikt:Appendix:Glossary#" .. anchor_encode(glossary) .. "|" .. display .. "]]"
+				end
+			end
 		end
 	end
-	return normtag
+	return display
 end
+--[==[
--- Turn a canonicalized tag spec (which describes a single, possibly
+Turn a canonicalized tag spec (which describes a single, possibly multipart tag) into the displayed form. The tag spec
--- multipart tag) into the displayed form. The tag spec may be a string
+may be a string (a canonical-form tag); a list of canonical-form tags (in the case of a simple multipart tag); or a
--- (a canonical-form tag), or a list of canonical-form tags (in the
+list of mixed canonical-form tags and lists of such tags (in the case of a two-level multipart tag). `joiner` indicates
--- case of a simple multipart tag), or a list of mixed canonical-form
+how to join the parts of a multipart tag, and can be either {"and"} ("foo and bar", or "foo, bar and baz" for 3 or
--- tags and lists of such tags (in the case of a two-level multipart tag).
+more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
--- JOINER indicates how to join the parts of a multipart tag, and can
+{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
--- be either "and" ("foo and bar", or "foo, bar and baz" for 3 or more),
+unlikely to change.) A truthy `nolink` suppresses linking.]==]
--- "slash" ("foo/bar"), "en-dash" ("foo–bar") or nil, which uses the
+function export.get_tag_display_form(tagspec, lang, joiner, nolink)
--- global default found in multipart_join_strategy() in
--- [[Module:form of/functions]].
-function export.get_tag_display_form(tagspec, joiner)
 	if type(tagspec) == "string" then
-		return get_single_tag_display_form(tagspec)
+		return get_single_tag_display_form(tagspec, lang, nolink)
 	end
-	-- We have a multipart tag. See if there's a display handler to
+	-- We have a multipart tag. See if there's a display handler to display them specially.
-	-- display them specially.
+	for _, handler in ipairs(display_handlers or get_display_handlers()) do
-	for _, handler in ipairs(m_functions.display_handlers) do
 		local displayval = handler(tagspec, joiner)
 		if displayval then
+			if nolink then
+				displayval = remove_links(displayval)
+			end
 			return displayval
 		end
@@ Line 510: / Line 926: @@
 	for _, first_level_tag in ipairs(tagspec) do
 		if type(first_level_tag) == "string" then
-			table.insert(displayed_tags, get_single_tag_display_form(first_level_tag))
+			insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang, nolink))
 		else
-			-- A first-level element of a two-level multipart tag.
+			-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
-			-- Currently we just separate the individual components
+			-- with spaces, but other ways are possible, e.g. using an underscore, colon, parens or braces.
-			-- with spaces, but other ways are possible, e.g. using
-			-- an underscore, colon, parens or braces.
 			local components = {}
 			for _, component in ipairs(first_level_tag) do
-				table.insert(components, get_single_tag_display_form(component))
+				insert(components, get_single_tag_display_form(component, lang, nolink))
 			end
-			table.insert(displayed_tags, table.concat(components, " "))
+			insert(displayed_tags, concat(components, " "))
 		end
 	end
-	return m_functions.join_multiparts(displayed_tags, joiner)
+	return join_multiparts(displayed_tags, joiner)
 end
+get_tag_display_form = export.get_tag_display_form
+--[==[
+Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
+represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
+{get_tag_display_form()} for the meaning of `joiner`. A truthy `nolink` suppresses linking.]==]
+function export.get_tag_set_display_form(normalized_tag_set, lang, joiner, nolink)
+	local parts = {}
--- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also
+	for _, tagspec in ipairs(normalized_tag_set) do
--- treated as a set.
+		local to_insert = get_tag_display_form(tagspec, lang, joiner, nolink)
-local function is_subset(tags1, tags2)
+		-- Maybe insert a space before inserting the display form of the tag. We insert a space if
-	tags1 = m_table.listToSet(tags1)
+		-- (a) we're not the first tag; and
-	tags2 = m_table.listToSet(tags2)
+		-- (b) the tag we're about to insert doesn't have the "no_space_on_left" property; and
-	for tag, _ in pairs(tags1) do
+		-- (c) the preceding tag doesn't have the "no_space_on_right" property.
-		if not tags2[tag] then
+		-- NOTE: We depend here on the fact that
-			return false
+		-- (1) all tags with either of the above properties set have the same display form as canonical form, and
+		-- (2) all tags with either of the above properties set are single-character tags.
+		-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,
+		-- which won't be found and which will trigger loading of [[Module:form of/data/2]]. If multichar punctuation is
+		-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.
+		--
+		-- If the first property above fails to hold in the future, we need to track the canonical form of each tag
+		-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.
+		if #parts > 0 then
+			local most_recent_tagobj = parts[#parts]:match("^.[\128-\191]*$") and lookup_tag(parts[#parts], lang)
+			local to_insert_tagobj = to_insert:match("^.[\128-\191]*$") and lookup_tag(to_insert, lang)
+			if (
+				(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and
+				(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)
+			) then
+				insert(parts, " ")
+			end
 		end
+		insert(parts, to_insert)
 	end
-	return true
+	return concat(parts)
 end
+get_tag_set_display_form = export.get_tag_set_display_form
+--[==[
+Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
+Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
+will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
+and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.
+NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
+`def:s//p` (or the reverse, `s//def:p`).]==]
+local function split_two_level_multipart_tag_set(tag_set)
+	for i, tag in ipairs(tag_set) do
+		if type(tag) == "table" then
+			-- We saw a multipart tag. Check if any of the parts are two-level.
+			local saw_two_level_tag = false
+			for _, first_level_tag in ipairs(tag) do
+				if type(first_level_tag) == "table" then
+					saw_two_level_tag = true
+					break
+				end
+			end
+			if saw_two_level_tag then
+				-- We found a two-level multipart tag.
+				-- (1) Extract the preceding tags.
+				local pre_tags = slice(tag_set, 1, i - 1)
+				-- (2) Extract the following tags.
+				local post_tags = slice(tag_set, i + 1)
+				-- (3) Loop over each tag set alternant in the two-level multipart tag.
+				-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
+				-- and recursively split that tag set.
+				local resulting_tag_sets = {}
+				for _, first_level_tag_set in ipairs(tag) do
+					local expanded_tag_set = {}
+					extend(expanded_tag_set, pre_tags)
+					-- The second level may have a string or a list.
+					if type(first_level_tag_set) == "table" then
+						extend(expanded_tag_set, first_level_tag_set)
+					else
+						insert(expanded_tag_set, first_level_tag_set)
+					end
+					extend(expanded_tag_set, post_tags)
+					extend(resulting_tag_sets, split_two_level_multipart_tag_set(expanded_tag_set))
+				end
+				return resulting_tag_sets
+			end
+		end
+	end
+	return {tag_set}
+end
--- Compute and return the appropriate categories for the tags in `tags` (user-specified tags,
+local function try_lang_specific_module(langcode, modules_tried, name, data)
--- which may consist of multiple tag sets separated by semicolons) and the language in `lang`.
+	local lang_specific_module = form_of_lang_data_module_prefix .. langcode .. "/functions"
--- This checks both language-specific and language-agnostic category specs in [[Module:form of/cats]].
+	local langdata = safe_require(lang_specific_module)
--- `POS` is the user-specified part of speech, if any, and `terminfo` is currently unused.
+	if langdata then
-function export.fetch_lang_categories(lang, tags, terminfo, POS)
+		insert(modules_tried, lang_specific_module)
-	local m_cats = mw.loadData("Module:form of/cats")
+		if langdata.cat_functions then
+			local fn = langdata.cat_functions[name]
+			if fn then
+				return fn(data), true
+			end
+		end
+	end
+	return nil, false
+end
-	local categories = {}
+-- Call a named function, either from the lang-specific data in
+-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].
+local function call_named_function(name, funtype, normalized_tag_set, lang, POS, pagename, lemmas)
+	local data = {
+		pagename = pagename or default_pagename or get_default_pagename(),
+		lemmas = lemmas,
+		tag_set = normalized_tag_set,
+		lang = lang,
+		POS = POS
+	}
+	local modules_tried = {}
+	-- First try lang-specific.
+	while lang do
+		local retval, found_it = try_lang_specific_module(lang:getCode(), modules_tried, name, data)
+		if found_it then
+			return retval
+		end
+		-- If the language has a parent (i.e. a superordinate variety), try again with that.
+		lang = lang:getParent()
+	end
+	-- Try lang-independent.
+	insert(modules_tried, form_of_functions_module)
+	local fn = (cat_functions or get_cat_functions())[name]
+	if fn then
+		return fn(data)
+	end
+	for i, modname in ipairs(modules_tried) do
+		modules_tried[i] = "[[" .. modname .. "]]"
+	end
+	error(("No %s function named '%s' in %s"):format(funtype, name, list_to_text(modules_tried, nil, " or ")))
+end
-	local normalized_tags = export.normalize_tags(tags)
+-- Given a tag from the current tag set (which may be a list in case of a multipart tag),
-	local split_tag_sets = export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(normalized_tags)
+-- and a tag from a categorization spec, check that the two match.
-	POS = export.normalize_pos(POS)
+-- (1) If both are strings, we just check for equality.
+-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a
+-- multipart tag), we check that the spec tag is in the list. This is because we want to treat
+-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.
+-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},
+-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag
+-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.
+-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the
+-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization
+-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"
+-- in it. "1//3" works, as does "1//2//3".
+local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)
+	if type(spec_tag) == "table" then
+		if type(tag_set_tag) == "table" and is_subset_list(spec_tag, tag_set_tag) then
+			return true
+		end
+	elseif type(tag_set_tag) == "table" then
+		if contains(tag_set_tag, spec_tag) then
+			return true
+		end
+	elseif tag_set_tag == spec_tag then
+		return true
+	end
+	return false
+end
-	-- Loop over each tag set and compute categories for each one.
+-- Check that the current tag set matches the given spec tag. This means that any of the tags
-	for _, tag_set in ipairs(split_tag_sets) do
+-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the
-		local function make_function_table()
+-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a
-			return {
+-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more
-				lang=lang,
+-- complex when multipart tags are present.
-				tags=normalized_tags,
+local function tag_set_matches_spec_tag(spec_tag, tag_set, lang)
-				term=term,
+	spec_tag = normalize_tag(spec_tag, lang)
-				p=POS
+	for _, tag_set_tag in ipairs(tag_set) do
-			}
+		if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
+			return true
 		end
+	end
+	return false
+end
-		-- Given a tag from the current tag set (which may be a list in case of a multipart tag),
+-- Check whether the given spec matches the current tag set. Two values are returned:
-		-- and a tag from a categorization spec, check that the two match.
+-- (1) whether the spec matches the tag set; (2) the index of the category to add if
-		-- (1) If both are strings, we just check for equality.
+-- the spec matches.
-		-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a
+local function check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
-		-- multipart tag), we check that the spec tag is in the list. This is because we want to treat
+	if type(spec) == "boolean" then
-		-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.
+		return spec
-		-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},
+	elseif type(spec) ~= "table" then
-		-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag
+		error("Wrong type of condition " .. spec .. ": " .. type(spec))
-		-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.
+	end
-		-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the
+	local predicate = spec[1]
-		-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization
+	if predicate == "has" then
-		-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"
+		return tag_set_matches_spec_tag(spec[2], tag_set, lang), 3
-		-- in it. "1//3" works, as does "1//2//3".
+	elseif predicate == "hasall" then
-		local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)
+		for _, tag in ipairs(spec[2]) do
-			if type(spec_tag) == "table" then
+			if not tag_set_matches_spec_tag(tag, tag_set, lang) then
-				if type(tag_set_tag) == "table" and is_subset(spec_tag, tag_set_tag) then
+				return false, 3
-					return true
+			end
-				end
+		end
-			elseif type(tag_set_tag) == "table" then
+		return true, 3
-				if m_table.contains(tag_set_tag, spec_tag) then
+	elseif predicate == "hasany" then
-					return true
+		for _, tag in ipairs(spec[2]) do
-				end
+			if tag_set_matches_spec_tag(tag, tag_set, lang) then
-			elseif tag_set_tag == spec_tag then
+				return true, 3
-				return true
 			end
-			return false
 		end
+		return false, 3
-		-- Check that the current tag set matches the given spec tag. This means that any of the tags
+	elseif predicate == "tags=" then
-		-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the
+		local normalized_spec_tag_sets = normalize_tag_set(spec[2], lang)
-		-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a
+		if #normalized_spec_tag_sets > 1 then
-		-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more
+			error("Internal error: No support for conjoined shortcuts in category/label specs in "
-		-- complex when multipart tags are present.
+				.. "[[Module:form of/cats]] when processing spec tag set " .. concat(spec[2], "|"))
-		local function tag_set_matches_spec_tag(spec_tag)
+		end
-			spec_tag = normalize_tag(spec_tag)
+		local normalized_spec_tag_set = normalized_spec_tag_sets[1]
-			for _, tag_set_tag in ipairs(tag_set) do
+		-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove
-				if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
+		-- support for two-level multipart tags.
-					return true
+		for _, tag in ipairs(normalized_spec_tag_set) do
+			if type(tag) == "table" then
+				for _, subtag in ipairs(tag) do
+					if type(subtag) == "table" then
+						error("Internal error: No support for two-level multipart tags in category/label specs"
+							.. "[[Module:form of/cats]] when processing spec tag set "
+							.. concat(spec[2], "|"))
+					end
 				end
 			end
-			return false
 		end
+		-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,
-		-- Check whether the given spec matches the current tag set. Two values are returned:
+		-- we first check that both tag set tags and spec tags have the same length. If so, we sort the
-		-- (1) whether the spec matches the tag set; (2) the index of the category to add if
+		-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are
-		-- the spec matches.
+		-- in the tag set tags.
-		local function check_condition(spec)
+		if #tag_set ~= #normalized_spec_tag_set then
-			if type(spec) == "boolean" then
+			return false, 3
-				return spec
+		end
-			elseif type(spec) ~= "table" then
+		local tag_set_tags = deep_copy(tag_set)
-				error("Wrong type of condition " .. spec .. ": " .. type(spec))
+		for i=1,#tag_set_tags do
+			if type(tag_set_tags[i]) == "table" then
+				sort(tag_set_tags[i])
+			end
+			if type(normalized_spec_tag_set[i]) == "table" then
+				sort(normalized_spec_tag_set[i])
 			end
-			local predicate = spec[1]
+		end
-			if predicate == "has" then
+		for i=1,#tag_set_tags do
-				return tag_set_matches_spec_tag(spec[2]), 3
+			if not contains(tag_set_tags, normalized_spec_tag_set[i]) then
-			elseif predicate == "hasall" then
-				for _, tag in ipairs(spec[2]) do
-					if not tag_set_matches_spec_tag(tag) then
-						return false, 3
-					end
-				end
-				return true, 3
-			elseif predicate == "hasany" then
-				for _, tag in ipairs(spec[2]) do
-					if tag_set_matches_spec_tag(tag) then
-						return true, 3
-					end
-				end
 				return false, 3
-			elseif predicate == "tags=" then
+			end
-				local normalized_spec_tags = export.normalize_tags(spec[2])
+		end
-				-- Allow tags to be in different orders, and multipart tags to
+		return true, 3
-				-- be in different orders. To handle this, we first check that
+	elseif predicate == "p=" then
-				-- both tag set tags and spec tags have the same length. If so,
+		return POS == normalize_pos(spec[2]), 3
-				-- we sort the multipart tags in the tag set tags and spec tags,
+	elseif predicate == "pany" then
-				-- and then check that all tags in the spec tags are in the
+		for _, specpos in ipairs(spec[2]) do
-				-- tag set tags.
+			if POS == normalize_pos(specpos) then
-				if #tag_set ~= #normalized_spec_tags then
-					return false, 3
-				end
-				local tag_set_tags = m_table.deepcopy(tag_set)
-				for i=1,#tag_set_tags do
-					if type(tag_set_tags[i]) == "table" then
-						table.sort(tag_set_tags[i])
-					end
-					if type(normalized_spec_tags[i]) == "table" then
-						table.sort(normalized_spec_tags[i])
-					end
-				end
-				for i=1,#tag_set_tags do
-					if not m_table.contains(tag_set_tags, normalized_spec_tags[i], "deepCompare") then
-						return false, 3
-					end
-				end
 				return true, 3
-			elseif predicate == "p=" then
-				return POS == export.normalize_pos(spec[2]), 3
-			elseif predicate == "pany" then
-				for _, specpos in ipairs(spec[2]) do
-					if POS == export.normalize_pos(specpos) then
-						return true, 3
-					end
-				end
-				return false, 3
-			elseif predicate == "pexists" then
-				return POS ~= nil, 2
-			elseif predicate == "not" then
-				local condval = check_condition(spec[2])
-				return not condval, 3
-			elseif predicate == "and" then
-				local condval = check_condition(spec[2])
-				if condval then
-					condval = check_condition(spec[3])
-				end
-				return condval, 4
-			elseif predicate == "or" then
-				local condval = check_condition(spec[2])
-				if not condval then
-					condval = check_condition(spec[3])
-				end
-				return condval, 4
-			elseif predication == "call" then
-				local fn = m_functions.cat_functions[spec[2]]
-				if not fn then
-					error("No condition function named '" .. spec[2] .. "'")
-				end
-				return fn(make_function_table()), 3
-			else
-				error("Unrecognized predicate: " .. predicate)
 			end
 		end
+		return false, 3
+	elseif predicate == "pexists" then
+		return POS ~= nil, 2
+	elseif predicate == "not" then
+		local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		return not condval, 3
+	elseif predicate == "and" then
+		local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		if condval then
+			condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		end
+		return condval, 4
+	elseif predicate == "or" then
+		local condval = check_condition(spec[2], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		if not condval then
+			condval = check_condition(spec[3], tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		end
+		return condval, 4
+	elseif predicate == "call" then
+		return call_named_function(spec[2], "condition", normalized_tag_set, lang, POS, pagename, lemmas), 3
+	else
+		error("Unrecognized predicate: " .. predicate)
+	end
+end
-		-- Process a given spec. This checks any conditions in the spec against the
+-- Process a given spec. This checks any conditions in the spec against the
-		-- tag set, and insert any resulting categories into `categories`. Return value
+-- tag set, and insert any resulting categories into `categories`. Return value
-		-- is true if the outermost condition evaluated to true and a category was inserted
+-- is true if the outermost condition evaluated to true and a category was inserted
-		-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates
+-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates
-		-- to true).
+-- to true).
-		local function process_spec(spec)
+local function process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
-			if not spec then
+	if not spec then
-				return false
+		return false
-			elseif type(spec) == "string" then
+	elseif type(spec) == "string" then
-				-- Substitute POS request with user-specified part of speech
+		-- A category. Substitute POS request with user-specified part of speech or default.
-				-- or default
+		spec = spec:gsub("<<p=(.-)>>", function(default)
-				spec = rsub(spec, "<<p=(.-)>>", function(default)
+			return POS or normalize_pos(default)
-					return POS or export.normalize_pos(default)
+		end)
-				end)
+		insert(categories, lang:getFullName() .. " " .. spec)
-				table.insert(categories, lang:getCanonicalName() .. " " .. spec)
+		return true
+	elseif type(spec) == "table" and spec.labels then
+		-- A label spec.
+		for _, label in ipairs(spec.labels) do
+			insert_if_not(labels, label)
+		end
+		return true
+	elseif type(spec) ~= "table" then
+		error("Wrong type of specification " .. spec .. ": " .. type(spec))
+	end
+	local predicate = spec[1]
+	if predicate == "multi" then
+		for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.
+			process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
+		end
+		return true
+	elseif predicate == "cond" then
+		for _, sp in iterate_from(2, ipairs(spec)) do -- Iterate from 2.
+			if process_spec(sp, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels) then
 				return true
-			elseif type(spec) ~= "table" then
-				error("Wrong type of specification " .. spec .. ": " .. type(spec))
-			end
-			local predicate = spec[1]
-			if predicate == "multi" then
-				-- WARNING! #spec doesn't work for objects loaded from loadData()
-				for i, sp in ipairs(spec) do
-					if i > 1 then
-						process_spec(sp)
-					end
-				end
-				return true
-			elseif predicate == "cond" then
-				-- WARNING! #spec doesn't work for objects loaded from loadData()
-				for i, sp in ipairs(spec) do
-					if i > 1 and process_spec(sp) then
-						return true
-					end
-				end
-				return false
-			elseif predicate == "call" then
-				local fn = m_functions.cat_functions[spec[2]]
-				if not fn then
-					error("No spec function named '" .. spec[2] .. "'")
-				end
-				return process_spec(fn(make_function_table()))
-			else
-				local condval, ifspec = check_condition(spec)
-				if condval then
-					process_spec(spec[ifspec])
-					return true
-				else
-					process_spec(spec[ifspec + 1])
-					-- FIXME: Are we sure this is correct?
-					return false
-				end
 			end
 		end
+		return false
+	elseif predicate == "call" then
+		return process_spec(
+			call_named_function(spec[2], "spec", normalized_tag_set, lang, POS, pagename, lemmas),
+			tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels
+		)
+	else
+		local condval, ifspec = check_condition(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas)
+		if condval then
+			process_spec(spec[ifspec], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
+			return true
+		else
+			process_spec(spec[ifspec + 1], tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
+			-- FIXME: Are we sure this is correct?
+			return false
+		end
+	end
+end
-		local langspecs = m_cats[lang:getCode()]
+--[==[
+Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
+represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.
+Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,
+and `POS` is the user-provided part of speech (which may be {nil}).]==]
+function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)
+	local categories, labels = {}, {}
+	POS = normalize_pos(POS)
+	-- First split any two-level multipart tags into multiple sets, to make our life easier.
+	for _, tag_set in ipairs(split_two_level_multipart_tag_set(normalized_tag_set)) do
+		local langcode = lang:getCode()
+		local langspecs = (m_cats_data or get_m_cats_data())[langcode]
 		if langspecs then
 			for _, spec in ipairs(langspecs) do
-				process_spec(spec)
+				process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
 			end
 		end
-		if lang:getCode() ~= "und" then
+		local full_code = lang:getFullCode()
-			local langspecs = m_cats["und"]
+		if full_code ~= langcode then
+			local langspecs = (m_cats_data or get_m_cats_data())[full_code]
 			if langspecs then
 				for _, spec in ipairs(langspecs) do
-					process_spec(spec)
+					process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
+				end
+			end
+		end
+		if full_code ~= "und" then
+			local langspecs = (m_cats_data or get_m_cats_data())["und"]
+			if langspecs then
+				for _, spec in ipairs(langspecs) do
+					process_spec(spec, tag_set, normalized_tag_set, lang, POS, pagename, lemmas, categories, labels)
 				end
 			end
 		end
 	end
+	return categories, labels
+end
+fetch_categories_and_labels = export.fetch_categories_and_labels
-	return categories
+local function format_labels(labels, data, notext)
+	if labels and #labels > 0 then
+		return show_labels{
+			labels = labels,
+			lang = data.lang,
+			sort = data.sort,
+			nocat = data.nocat
+		} .. (notext and (data.pretext or "") == "" and "" or " ")
+	else
+		return ""
+	end
 end
+--[==[
+Implementation of templates that display inflection tags, such as the general {{tl|inflection of}}, semi-specific
+variants such as {{tl|participle of}}, and specific variants such as {{tl|past participle of}}. `data` contains all the
+information controlling the display, with the following fields:
-function export.tagged_inflections(data, terminfo, notext, capfirst, posttext, joiner)
+* `.lang`: ('''''required''''') Language to use when looking up language-specific inflection tags, categories and
-	if not data.tags then
+  labels, and for displaying categories and labels.
-		error("First argument must now be a table of arguments")
+* `.tags`: ('''''required''' unless `.tag_sets` is given'') List of non-canonicalized inflection tags. Multiple tag sets
+  can be indicated by a {";"} as one of the tags, and tag-set properties may be attached to the last tag of a tag set.
+  The tags themselves may come directly from the user (as in {{tl|inflection of}}); come partly from the user (as in
+  {{tl|participle of}}, which adds the tag `part` to user-specified inflection tags); or be entirely specified by the
+  template (as in {{tl|past participle of}}).
+* `.tag_sets`: ('''''required''' unless `.tags` is given'') List of non-canonicalized tag sets and associated
+  per-tag-set properties. Each element of the list is an object of the form
+  { {tags = {"TAG", "TAG", ...}, labels = {"LABEL", "LABEL", ...}}. If `.tag_sets` is specified, `.tags` should not be
+  given and vice-versa. Specifying `.tag_sets` in place of tags allowed per-tag set labels to be specified; otherwise,
+  there is no advantage. [[Module:pt-gl-inflections]] uses this functionality to supply labels like {"Brazil"} and
+  {"Portugal"} associated with specific tag sets.
+* `.lemmas`: ('''''recommended''''') List of objects describing the lemma(s) of which the term in question is a
+  non-lemma form. These are passed directly to {full_link()} in [[Module:links]]. Each object should have at minimum a
+  `.lang` field containing the language of the lemma and a `.term` field containing the lemma itself. Each object is
+  formatted using {full_link()} and then if there are more than one, they are joined using {serialCommaJoin()} in
+  [[Module:table]]. Alternatively, `.lemmas` can be a string, which is displayed directly. If omitted entirely, no lemma
+  links are shown and the connecting "of" is also omitted.
+* `.lemma_face`: ('''''recommended''''') "Face" to use when displaying the lemma objects. Usually should be set to
+  {"term"}.
+* `.POS`: ('''''recommended''''') Categorizing part-of-speech tag. Comes from the {{para|p}} or {{para|POS}} argument of
+  {{tl|inflection of}}.
+* `.pagename`: Page name of "current" page or nil to use the actual page title; for testing purposes.
+* `.conj`: Conjunction or separator to use when joining multiple lemma objects. Defaults to {"and"}.
+* `.enclitics`: List of enclitics to display after the lemmas, in parens.
+* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.
+* `.no_format_categories`: If true, don't format the categories derived from the inflection tags; just return them.
+* `.sort`: Sort key for formatted categories. Ignored when `.no_format_categories` = {true}.
+* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).
+* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)
+* `.nolink`: Suppress linking of terms in inflection tags.
+* `.capfirst`: Capitalize the first word displayed.
+* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.
+* `.posttext`: Additional text to display after the lemma links.
+* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is
+   {"form-of-definition use-with-mention"}.
+* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
+* `.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.
+* `.nowrap`: Do not wrap the form-of definitions in a span, unless `.text_classes` is specified.
+* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified
+  in-place to save memory; otherwise they will be copied before modifying.
+A typical call might look like this (for {{m+|es|amo}}): {
+	local lang = require("Module:languages").getByCode("es")
+	local lemma_obj = {
+		lang = lang,
+		term = "amar",
+	}
+	return m_form_of.tagged_inflections({
+		lang = lang, tags = {"1", "s", "pres", "ind"}, lemmas = {lemma_obj}, lemma_face = "term", POS = "verb"
+	})
+}
+Normally, one value is returned, the formatted text, which has appended to it the formatted categories derived from the
+tag-set-related categories generated by the specs in [Module:form of/cats]]. To suppress this, set
+`data.no_format_categories` = {true}, in which case two values are returned, the formatted text without any formatted
+categories appended and a list of the categories to be formatted.
+NOTE: There are two sets of categories that may be generated: (1) categories derived directly from the tag sets, as
+specified in [[Module:form of/cats]]; (2) categories derived from tag-set labels, either (a) set explicitly by the
+caller in `data.tag_sets`, (b) specified by the user using `<lb:...>` attached to the last tag in a tag set, or
+(c) specified in [[Module:form of/cats]]. The second type (label-related categories) are currently not returned in
+the second return value of {tagged_inflections()}, and are currently inserted into the output text even if
+`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also
+suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is
+set to {true}).]==]
+function export.tagged_inflections(data)
+	if not data.tags and not data.tag_sets then
+		error("First argument must be a table of arguments, and `.tags` or `.tag_sets` must be specified")
+	end
+	if data.tags and data.tag_sets then
+		error("Both `.tags` and `.tag_sets` cannot be specified")
+	end
+	local tag_sets = data.tag_sets
+	if not tag_sets then
+		tag_sets = split_tag_set(data.tags)
+		for i, tag_set in ipairs(tag_sets) do
+			tag_sets[i] = parse_tag_set_properties(tag_set)
+		end
 	end
-	local cur_infl = {}
 	local inflections = {}
+	local categories = {}
+	for _, tag_set in ipairs(tag_sets) do
+		local normalized_tag_sets = normalize_tag_set(tag_set.tags, data.lang, "do-track")
-	local ntags = export.normalize_tags(data.tags, nil, "do-track")
+		for _, normalized_tag_set in ipairs(normalized_tag_sets) do
+			local this_categories, this_labels = fetch_categories_and_labels(normalized_tag_set, data.lang,
-	for i, tagspec in ipairs(ntags) do
+				data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)
-		if tagspec == ";" then
+			if not data.nocat then
-			if #cur_infl > 0 then
+				extend(categories, this_categories)
-				table.insert(inflections, table.concat(cur_infl))
 			end
+			local cur_infl = get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner, data.nolink)
-			cur_infl = {}
-		else
-			local to_insert = export.get_tag_display_form(tagspec, data.joiner)
-			-- Maybe insert a space before inserting the display form
-			-- of the tag. We insert a space if
-			-- (a) we're not the first tag; and
-			-- (b) the tag we're about to insert doesn't have the
-			--     "no_space_on_left" property; and
-			-- (c) the preceding tag doesn't have the "no_space_on_right"
-			--     property.
-			-- NOTE: We depend here on the fact that
-			-- (1) all tags with either of the above properties set have the
-			--     same display form as canonical form, and
-			-- (2) all tags with either of the above properties set are
-			--     single-character tags.
-			-- The second property is an optimization to avoid looking up
-			-- display forms resulting from multipart tags, which won't be
-			-- found and which will trigger loading of [[Module:form of/data2]].
-			-- If multichar punctuation is added in the future, it's ok to
-			-- change the == 1 below to <= 2 or <= 3.
-			--
-			-- If the first property above fails to hold in the future, we
-			-- need to track the canonical form of each tag (including the
-			-- previous one) as well as the display form. This would also
-			-- avoid the need for the == 1 check.
 			if #cur_infl > 0 then
-				local most_recent_tagobj = ulen(cur_infl[#cur_infl]) == 1 and
+				if tag_set.labels then
-					export.lookup_tag(cur_infl[#cur_infl])
+					this_labels = append(tag_set.labels, this_labels)
-				local to_insert_tagobj = ulen(to_insert) == 1 and
-					export.lookup_tag(to_insert)
-				if (
-					(not most_recent_tagobj or
-					 not most_recent_tagobj.no_space_on_right) and
-					(not to_insert_tagobj or
-					 not to_insert_tagobj.no_space_on_left)
-				) then
-					table.insert(cur_infl, " ")
 				end
+				insert(inflections, {infl_text = cur_infl, labels = this_labels})
 			end
-			table.insert(cur_infl, to_insert)
 		end
 	end
-	if #cur_infl > 0 then
+	local overall_labels, need_per_tag_set_labels
-		table.insert(inflections, table.concat(cur_infl))
+	for _, inflection in ipairs(inflections) do
+		if overall_labels == nil then
+			overall_labels = inflection.labels
+		elseif not deep_equals(overall_labels, inflection.labels) then
+			need_per_tag_set_labels = true
+			overall_labels = nil
+			break
+		end
 	end
-	local format_data = require("Module:table").shallowcopy(data)
+	if not need_per_tag_set_labels then
+		for _, inflection in ipairs(inflections) do
+			inflection.labels = nil
+		end
+	end
+	local format_data = shallow_copy(data)
+	local of_text = data.lemmas and " of" or ""
+	local formatted_text, this_categories
 	if #inflections == 1 then
-		format_data.text =
+		if need_per_tag_set_labels then
-			data.notext and "" or ((data.capfirst and require("Module:string utilities").ucfirst(inflections[1]) or inflections[1]) ..
+			error("Internal error: need_per_tag_set_labels should not be set with one inflection")
-				(data.terminfo and " of" or ""))
+		end
-		return export.format_form_of(format_data)
+		format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
+			((data.capfirst and ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
+		formatted_text, this_categories = format_form_of(format_data)
 	else
-		format_data.text = data.notext and "" or ((data.capfirst and "Inflection" or "inflection") ..
+		format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
-			(data.terminfo and " of" or ""))
+			((data.capfirst and "Inflection" or "inflection") .. of_text))
 		format_data.posttext = (data.posttext or "") .. ":"
-		local link = export.format_form_of(format_data)
+		local link
-		local text_classes = data.text_classes or "form-of-definition"
+		link, this_categories = format_form_of(format_data)
-		return link .."\n## <span class='" .. text_classes .. "' style='font-weight: bold;'>" ..
+		local text_classes = data.text_classes
-			table.concat(inflections, "</span>\n## <span class='" .. text_classes .. "' style='font-weight: bold;'>") .. "</span>"
+		if text_classes == nil and not data.nowrap then
-	end
+			text_classes = "form-of-definition use-with-mention"
-end
-function export.to_Wikidata_IDs(tags, skip_tags_without_ids)
-	if type(tags) == "string" then
-		tags = mw.text.split(tags, "|", true)
-	end
-	local ret = {}
-	local function get_wikidata_id(tag)
-		if tag == ";" and not skip_tags_without_ids then
-			error("Semicolon is not supported for Wikidata IDs")
-		else
-			return nil
 		end
+		for i, inflection in ipairs(inflections) do
-		local data = export.lookup_tag(tag)
+			inflections[i] = "\n## " .. format_labels(inflection.labels, data, false) ..
+				wrap_in_span(inflection.infl_text, text_classes)
-		if not data or not data.wikidata then
-			if not skip_tags_without_ids then
-				error("The tag \"" .. tag .. "\" does not have a Wikidata ID defined in [[Module:form of/data]]")
-			else
-				return nil
-			end
-		else
-			return data.wikidata
 		end
+		formatted_text = link .. concat(inflections)
+	end
+	if this_categories[1] then
+		extend(categories, this_categories)
 	end
-	for i, tag in ipairs(export.normalize_tags(tags)) do
+	if not data.no_format_categories then
-		if type(tag) == "table" then
+		if categories[1] then
-			local ids = {}
+			formatted_text = formatted_text .. format_categories(categories, data.lang,
-			for _, onetag in ipairs(tag) do
+				data.sort, nil, export.force_cat)
-				table.insert(ids, get_wikidata_id(onetag))
-			end
-			table.insert(ret, ids)
-		else
-			table.insert(ret, get_wikidata_id(tag))
 		end
+		return formatted_text
 	end
+	return formatted_text, categories
-	return ret
 end
 function export.dump_form_of_data(frame)
 	local data = {
-		data = require("Module:form of/data"),
+		require(form_of_data1_module),
-		data2 = require("Module:form of/data2")
+		require(form_of_data2_module)
 	}
-	return require("Module:JSON").toJSON(data)
+	return require(json_module).toJSON(data)
 end
+export.form_of_cats_module = form_of_cats_module
+export.form_of_data1_module = form_of_data1_module
+export.form_of_data2_module = form_of_data2_module
+export.form_of_functions_module = form_of_functions_module
+export.form_of_lang_data_module_prefix = form_of_lang_data_module_prefix
+export.headword_data_module = headword_data_module -- so all form-of modules stay in sync
 return export