Module:form of: Difference between revisions

Line 1:

local export = {}

export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages

local m_links = require("Module:links")

local m_string_utils = require("Module:string utilities")

local m_table = require("Module:table")

local ~~m_pos~~ = mw.~~loadData(~~"Module:form of/pos")

local parse_utilities_module = "Module:parse utilities"

~~local m_functions~~ = ~~require(~~"Module:form of/functions")

local labels_module = "Module:labels"

local utilities_module = "Module:utilities"

export.form_of_pos_module = "Module:form of/pos"

export.form_of_functions_module = "Module:form of/functions"

export.form_of_cats_module = "Module:form of/cats"

export.form_of_lang_data_module_prefix = "Module:form of/lang-data/"

export.form_of_data_module = "Module:form of/data"

export.form_of_data2_module = "Module:form of/data2"

local ulen = ~~mw.ustring~~.len

local ulen = m_string_utils.len

local rsubn = ~~mw.ustring~~.gsub

local rsubn = m_string_utils.gsub

local rmatch = ~~mw.ustring~~.match

local rmatch = m_string_utils.match

local rsplit = ~~mw.text~~.split

local rsplit = m_string_utils.split

~~local~~ export = {}

export.TAG_TYPE = 1

export.GLOSSARY = 2

export.SHORTCUTS = 3

export.WIKIDATA = 4

~~--[~~=[

export.APPENDIX = true

export.WP = false

export.WIKT = 0

~~This module implements~~ the ~~underlying processing of~~ {~~{form of}}~~,

--[==[

~~{{inflection of}} and specific variants such as {{past participle of}}~~

Set listing the languages with lang-specific tags. If a language isn't listed here, the tags for that language won't be

~~and {{alternative spelling of}}. Most of the logic in this file is to~~

recognized.

~~handle tags in {{inflection of~~}~~}. Other related files:~~

]==]

export.langs_with_lang_specific_tags = {

["en"] = true,

["got"] = true,

["lt"] = true,

["lv"] = true,

["nl"] = true,

["pi"] = true,

["sw"] = true,

["ttj"] = true,

}

* [[Module:form of/templates]] contains the majority of the logic that

--[==[ intro:

~~implements the templates themselves.~~

* [[Module:form of/data]] is a data-only file containing information on

~~the more common inflection tags, listing the tags, their shortcuts,~~

~~the category they belong to (tense-aspect, case, gender, voice-valence,~~

~~etc.), the appropriate glossary link and the wikidata ID.~~

* [[Module:form of/data2]] is a data-only file containing information on

~~the less common inflection tags, in the same format as~~

~~[[Module:form of/data]].~~

* [[Module:form of/cats]] is a data-only file listing the

~~language~~-~~specific categories that are added when the appropriate~~

~~combinations of tags are seen for a given language.~~

* [[Module:form of/pos]] is a data-~~only file listing the recognized~~

~~parts of speech and their abbreviations, used for categorization.~~

~~FIXME: This should be unified with the parts of speech listed in~~

~~[[Module:links]].~~

* [[Module:form of/functions]] contains functions for use with

[[~~Module:form of/data]] and~~ [~~[Module:form of/cats]]. They are~~

~~contained in this module because data-only modules can't contain~~

~~code. The functions in this file are of two types~~:

~~(1) Display handlers allow for customization~~ of ~~the display~~ of

This module implements the underlying processing of {{tl|form of}}, {{tl|inflection of}} and specific variants such as

~~multipart tags (see below). Currently there is only one~~

{{tl|past participle of}} and {{tl|alternative spelling of}}. Most of the logic in this file is to handle tags in

~~such handler~~, ~~for handling multipart person tags~~ such as

{{tl|inflection of}}. Other related files:

~~'1//2//3'~~.

~~(2) Cat functions allow for more complex categorization~~ logic,

~~and are referred~~ to ~~by name~~ in ~~[[Module:form~~ of~~/cats]].~~

~~Currently no such functions exist~~.

~~The following terminology~~ is used in ~~conjunction~~ with ~~{{inflection~~ of}}:

* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.

* [[Module:form of/data]] is a data-only file containing information on the more common inflection tags, listing the

tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate

glossary link and the wikidata ID.

* [[Module:form of/data2]] is a data-only file containing information on the less common inflection tags, in the same

format as [[Module:form of/data]].

* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection

tags for the language with code LANGCODE, in the same format as [[Module:form of/data]]. Language-specific tags

override general tags.

* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the

appropriate combinations of tags are seen for a given language.

* [[Module:form of/pos]] is a data-only file listing the recognized parts of speech and their abbreviations, used for

categorization. FIXME: This should be unified with the parts of speech listed in [[Module:links]].

* [[Module:form of/functions]] contains functions for use with [[Module:form of/data]] and [[Module:form of/cats]].

They are contained in this module because data-only modules can't contain code. The functions in this file are of two

types:

*# Display handlers allow for customization of the display of multipart tags (see below). Currently there is only

one such handler, for handling multipart person tags such as `1//2//3`.

*# Cat functions allow for more complex categorization logic, and are referred to by name in [[Module:form of/cats]].

Currently no such functions exist.

* A TAG is a single grammatical item, as specified in a single numbered

The following terminology is used in conjunction with {{tl|inflection of}}:

~~parameter of {{inflection of}}. Examples are 'masculine', 'nominative',~~

~~or 'first-person'. Tags may be abbreviated, e.g. 'm' for 'masculine',~~

~~'nom' for 'nominative', or '1' for 'first-person'. Such abbreviations~~

~~are called SHORTCUTS, and some tags have multiple equivalent shortcuts~~

~~(e.g. 'p' or 'pl' for 'plural').~~ The ~~full, non-abbreviated form of~~

~~a tag~~ is ~~called its CANONICAL FORM.~~

* The DISPLAY FORM of a tag is the way it's displayed to the user. Usually

~~the displayed text of the tag is the same as its canonical form, and it~~

~~normally functions as a link to a glossary entry explaining the tag.~~

~~Usually the link is to an entry~~ in ~~[[Appendix:Glossary]], but sometimes~~

~~the tag is linked to an individual dictionary entry or to a Wikipedia~~

~~entry. Occasionally, the display text differs from the canonical form of~~

~~the tag. An example is the tag 'comparative case', which has the display~~

~~text read as simply 'comparative'. Normally, tags referring to cases don't~~

~~have the word "case" in them, but in this case the tag 'comparative' was~~

~~already used as a shortcut for the tag 'comparative degree', so the tag was~~

~~named 'comparative case' to avoid clashing. A similar situation occurs~~

with ~~'adverbial case' vs. the grammar tag 'adverbial' (as in 'adverbial~~

~~participle').~~

* A TAG SET is an ordered list of tags, which together express a single

~~inflection, for example, '1|s|pres|ind', which can be expanded to~~

~~canonical-form tags as 'first-person|singular|present|indicative'.~~

~~Multiple tag sets can be specified in a single call to~~ {{inflection of}}

~~by separating the individual tag sets with a semicolon, e.g.~~

~~'1|s|pres|ind|;|2|s|imp', which specifies two tag sets, '1|s|pres|ind'~~

~~as above and '2|s|imp' (in canonical form,~~

~~'second-person|singular|imperative').~~

* A MULTIPART TAG is a tag that embeds multiple tags within it, such as

~~'f//n' or 'nom//acc//voc'. These are used in the case of [[syncretism]],~~

~~when the same form applies to multiple inflections. Examples are the~~

~~Spanish present subjunctive, where the first-person and third-person~~

~~singular have the same form (e.g. [[siga]] from [[seguir]] "to follow"),~~

~~or Latin third-declension adjectives, where the dative and ablative~~

~~plural of all genders have the same form (e.g. [[omnibus]] from [[omnis]]~~

~~"all"). These would be expressed respectively as '1//3|s|pres|sub'~~

~~and 'dat//abl|m//f//n|p', where the use of the multipart tag compactly~~

~~encodes the syncretism and avoids the need to individually list out~~

~~all of the inflections. Multipart tags currently display as a list~~

~~separated by "and", ''dative and ablative'' or~~

~~''masculine, feminine and neuter'' where each individual word is linked~~

~~appropriately. As a special case, multipart tags involving persons display~~

~~specially; for example, the multipart tag ''1//2//3'' displays as~~

~~''first-, second- and third-person'', with the word "person" occurring~~

~~only once.~~

* A TWO-LEVEL MULTIPART TAG is a special type of multipart tag that

~~joins two or more tag sets instead of joining individual tags. The tags~~

~~within the tag set are joined by a colon, e.g. '1:s//3~~:~~p', which is~~

~~displayed as ''first-person singular and third-person plural'', e.g.~~

~~for use with the form [[μέλλον]] of the verb [[μέλλω]] "to intend",~~

~~which uses the tag set '1:s//3:p|impf|actv|indc|unaugmented' to express~~

~~the syncretism between the first singular and third plural forms of the~~

~~imperfect active indicative unaugmented conjugation. Two-level multipart~~

~~tags should be used sparingly; if in doubt, list out the inflections~~

~~separately.~~

* A MULTIPART TAG SHORTCUT is a shortcut that expands into a multipart

~~tag, for example '123', which expands to the multipart tag '1//2//3'.~~

~~Only the most common such combinations exist as shortcuts.~~

* A LIST TAG SHORTCUT is a special type of shortcut that expands to a list

~~of tags instead of a single tag. For example, the shortcut '1s' expands to~~

~~'1|s' (first-person singular). Only the most common such combinations~~

~~exist as shortcuts.~~

]=]

* A ''tag'' is a single grammatical item, as specified in a single numbered parameter of {{tl|inflection of}}. Examples

are `masculine`, `nominative`, or `first-person`. Tags may be abbreviated, e.g. `m` for `masculine`, `nom` for

`nominative`, or `1` for `first-person`. Such abbreviations are called ''aliases'', and some tags have multiple

equivalent aliases (e.g. `p` or `pl` for `plural`). The full, non-abbreviated form of a tag is called its

''canonical form''.

* The ''display form'' of a tag is the way it's displayed to the user. Usually the displayed text of the tag is the same

as its canonical form, and it normally functions as a link to a glossary entry explaining the tag. Usually the link is

to an entry in [[Appendix:Glossary]], but sometimes the tag is linked to an individual dictionary entry or to a

Wikipedia entry. Occasionally, the display text differs from the canonical form of the tag. An example is the tag

`comparative case`, which has the display text read as simply `comparative`. Normally, tags referring to cases don't

have the word "case" in them, but in this case the tag `comparative` was already used as an alias for the tag

`comparative degree`, so the tag was named `comparative case` to avoid clashing. A similar situation occurs with

`adverbial case` vs. the grammar tag `adverbial` (as in `adverbial participle`).

* A ''tag set'' is an ordered list of tags, which together express a single inflection, for example, `1|s|pres|ind`,

which can be expanded to canonical-form tags as `first-person|singular|present|indicative`.

* A ''conjoined tag set'' is a tag set that consists of multiple individual tag sets separated by a semicolon, e.g.

`1|s|pres|ind|;|2|s|imp`, which specifies two tag sets, `1|s|pres|ind` as above and `2|s|imp` (in canonical form,

`second-person|singular|imperative`). Multiple tag sets specified in a single call to {{tl|inflection of}} are

specified in this fashion. Conjoined tag sets can also occur in list-tag shortcuts.

* A ''multipart tag'' is a tag that embeds multiple tags within it, such as `f//n` or `nom//acc//voc`. These are used in

the case of [[syncretism]], when the same form applies to multiple inflections. Examples are the Spanish present

subjunctive, where the first-person and third-person singular have the same form (e.g. {{m|es|siga}} from

{{m|es|seguir|t=to follow}}), or Latin third-declension adjectives, where the dative and ablative plural of all

genders have the same form (e.g. {{m|la|omnibus}} from {{m|la|omnis|t=all}}). These would be expressed respectively as

`1//3|s|pres|sub` and `dat//abl|m//f//n|p`, where the use of the multipart tag compactly encodes the syncretism and

avoids the need to individually list out all of the inflections. Multipart tags currently display as a list separated

by a slash, e.g. ''dative/ablative'' or ''masculine/feminine/neuter'' where each individual word is linked

appropriately. As a special case, multipart tags involving persons display specially; for example, the multipart tag

`1//2//3` displays as ''first-, second- and third-person'', with the word "person" occurring only once.

* A ''two-level multipart tag'' is a special type of multipart tag that joins two or more tag sets instead of joining

individual tags. The tags within the tag set are joined by a colon, e.g. `1:s//3:p`, which is displayed as

''first-person singular and third-person plural'', e.g. for use with the form {{m|grc|μέλλον}} of the verb

{{m|grc|μέλλω|t=to intend}}, which uses the tag set `1:s//3:p|impf|actv|indc|unaugmented` to express the syncretism

between the first singular and third plural forms of the imperfect active indicative unaugmented conjugation.

Two-level multipart tags should be used sparingly; if in doubt, list out the inflections separately. [FIXME: Make

two-level multipart tags obsolete.]

* A ''shortcut'' is a tag that expands to any type of tag described above, or to any type of tag set described above.

Aliases are a particular type of shortcut whose expansion is a single non-multipart tag.

* A ''multipart shortcut'' is a shortcut that expands into a multipart tag, for example `123`, which expands to the

multipart tag `1//2//3`. Only the most common such combinations exist as shortcuts.

* A ''list shortcut'' is a special type of shortcut that expands to a list of tags instead of a single tag. For example,

the shortcut `1s` expands to `1|s` (first-person singular). Only the most common such combinations exist as shortcuts.

* A ''conjoined shortcut'' is a special type of list shortcut that consists of a conjoined tag set (multiple logical tag

sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the

common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list

shortcut that expands to `simple|past`).

]==]

-- version of rsubn() that discards all but the first return value

Line 121:

Line 129:

local function normalize_index(list, index)

if index < 0 then

return #list + index + 1

end

return index

end

-- FIXME, consider moving to [[Module:table]]

-- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also treated as a set.

local function is_subset(tags1, tags2)

tags1 = m_table.listToSet(tags1)

tags2 = m_table.listToSet(tags2)

for tag, _ in pairs(tags1) do

if not tags2[tag] then

return false

end

return true

end

-- FIXME, move to [[Module:table]]

local function slice(list, i, j)

--checkType("slice", 1, list, "table")

--checkType("slice", 2, i, "number", true)

--checkType("slice", 3, j, "number", true)

if i == nil then

i = 1

else

i = normalize_index(list, i)

end

j = normalize_index(list, j or -1)

local retval = {}

local k = 0

for index = i, j do

k = k + 1

retval[k] = list[index]

end

return retval

end

local function wrap_in_span(text, classes)

return ("%s"):format(classes, text)

end

--[==[

Lowest-level implementation of form-of templates, including the general {{tl|form of}} as well as those that deal with

inflection tags, such as the general {{tl|inflection of}}, semi-specific variants such as {{tl|participle of}}, and

specific variants such as {{tl|past participle of}}. `data` contains all the information controlling the display, with

the following fields:

* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.

* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed

directly to {full_link()} in [[Module:links]]. Each object should have at minimum a `.lang` field containing the

language of the lemma and a `.term` field containing the lemma itself. Each object is formatted using {full_link()}

and then if there are more than one, they are joined using {serialCommaJoin()} in [[Module:table]]. Alternatively,

`.lemmas` can be a string, which is displayed directly, or omitted, to show no lemma links and omit the connecting

text.

* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.

* `.enclitics`: List of enclitics to display after the lemmas, in parens.

* `.base_lemmas`: List of base lemmas to display after the lemmas, in the case where the lemmas in `.lemmas` are

themselves forms of another lemma (the base lemma), e.g. a comparative, superlative or participle. Each object is of

the form { { paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...} }} where PARAM_OBJ describes the properties

of the base lemma parameter (i.e. the relationship between the intermediate and base lemmas) and LEMMA_OBJ is an

object suitable to be passed to {full_link()} in [[Module:links]]. PARAM_OBJ is of the format

{ { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{tl|inflection of}}

etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as

{ {"comd"}} or { {"past", "part"}}.

* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is {"form-of-definition use-with-mention"}

for the tag text and lemma links, and additionally {"form-of-definition-link"} specifically for the lemma links.

(FIXME: Should separate out the lemma links into their own field.)

* `.posttext`: Additional text to display after the lemma links.

]==]

function export.format_form_of(data)

if type(data) ~= "table" then

error("First argument must now be a table of arguments")

error("Internal error: First argument must now be a table of arguments")

end

local text_classes = data.text_classes or "form-of-definition use-with-mention"

local ~~terminfo_classes~~ = data.text_classes or "form-of-definition-link"

local lemma_classes = data.text_classes or "form-of-definition-link"

local parts = {}

table.insert(parts, "")

local function ins(text)

~~table.insert~~(~~parts,~~ data.text)

table.insert(parts, text)

if data.text ~= "" and data.~~terminfo~~ then

end

~~table.insert~~(~~parts,~~ " ")

ins("")

ins(data.text)

if data.text ~= "" and data.lemmas then

ins(" ")

end

if data.~~terminfo~~ then

if data.lemmas then

~~table.insert(parts, "")~~

if type(data.lemmas) == "string" then

if type(data.~~terminfo~~) == "string" then

ins(wrap_in_span(data.lemmas, lemma_classes))

~~table.insert~~(~~parts,~~ data.~~terminfo~~)

else

table.insert(~~parts~~, m_links.full_link(data.~~terminfo~~, data.~~terminfo_face~~, ~~false~~))

local formatted_terms = {}

for _, lemma in ipairs(data.lemmas) do

table.insert(formatted_terms, wrap_in_span(

m_links.full_link(lemma, data.lemma_face), lemma_classes

))

end

ins(m_table.serialCommaJoin(formatted_terms))

end

if data.enclitics and #data.enclitics > 0 then

-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or

-- they will clash with upright parens generated by link annotations such as transliterations and pos=.

ins("")

local formatted_terms = {}

for _, enclitic in ipairs(data.enclitics) do

-- FIXME, should we have separate clitic face and/or classes?

table.insert(formatted_terms, wrap_in_span(

m_links.full_link(enclitic, data.lemma_face, nil, "show qualifiers"), lemma_classes

))

end

~~table~~.~~insert~~(~~parts~~, "")

ins(" (")

ins(wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))

ins(m_table.serialCommaJoin(formatted_terms))

ins(")")

ins("")

end

if data.base_lemmas and #data.base_lemmas > 0 then

for _, base_lemma in ipairs(data.base_lemmas) do

ins(", the ")

ins(export.tagged_inflections {

lang = base_lemma.lemmas[1].lang,

tags = base_lemma.paramobj.tags,

lemmas = base_lemma.lemmas,

lemma_face = data.lemma_face,

no_format_categories = true,

nocat = true,

text_classes = data.text_classes,

})

ins("")

end

-- FIXME, should posttext go before enclitics? If so we need to have separate handling for the

-- final colon when there are multiple tag sets in tagged_inflections().

if data.posttext then

~~table.insert~~(~~parts,~~ data.posttext)

ins(data.posttext)

end

~~table.insert~~(~~parts,~~ "")

ins("")

return table.concat(parts)

end

~~local~~ function is_link_or_html(tag)

--[==[

return tag:find("[[", nil, true) or tag:find("|", nil, true) or

Return true if `tag` contains an internal link or HTML.

tag:find("<", nil, true)

]==]

function export.is_link_or_html(tag)

return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)

end

-- Look up a tag (either a shortcut of any sort of a canonical long-form tag)

--[==[

-- and return its expansion. The expansion will be a string unless the

Look up a tag (either a shortcut of any sort of a canonical long-form tag) and return its expansion. The expansion

-- shortcut is a list-tag shortcut such as "1s"; in that case, the expansion

will be a string unless the shortcut is a list-tag shortcut such as `1s`; in that case, the expansion will be a

-- will be a list. The caller must handle both cases. Only one level of

list. The caller must handle both cases. Only one level of expansion happens; hence, `acc` expands to {"accusative"},

-- expansion happens; hence, "acc" expands to "accusative", "1s" expands to

`1s` expands to { {"1", "s"}} (not to { {"first", "singular"}}) and `123` expands to {"1//2//3"}. The expansion will be

-- {"1", "s"} (not to {"first", "singular"}) and "123" expands to "1//2//3".

the same as the passed-in tag in the following circumstances:

-- The expansion will be the same as the passed-in tag in the following

-- circumstances:

# The tag is `;` (this is special-cased, and no lookup is done).

--

# The tag is a multipart tag such as `nom//acc` (this is special-cased, and no lookup is done).

~~-- 1.~~ The tag is ";" (this is special-cased, and no lookup is done).

# The tag contains a raw link (this is special-cased, and no lookup is done).

~~-- 2.~~ The tag is a multipart tag such as "nom//acc" (this is special-cased,

# The tag contains HTML (this is special-cased, and no lookup is done).

-- and no lookup is done).

# The tag is already a canonical long-form tag.

~~-- 3.~~ The tag contains a raw link (this is special-cased, and no lookup is

# The tag is unrecognized.

-- done).

~~-- 4.~~ The tag contains HTML (this is special-cased, and no lookup is done).

This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in

~~-- 5.~~ The tag is already a canonical long-form tag.

[[Module:form of/data]] (which includes more common non-lang-specific tags) and finally (only if the tag is not

~~-- 6.~~ The tag is unrecognized.

recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data2]].

--

-- This function first looks up in [[Module:form of/data]] (which includes

If the expansion is a string and is different from the tag, track it if `do_track` is true.

-- ~~more common~~ tags) and ~~then~~ (only if the tag is not recognized as a

]==]

-- shortcut or canonical tag, and is not of types 1-4 above) in

function export.lookup_shortcut(tag, lang, do_track)

-- [[Module:form of/data2]].

--

-- If the expansion is a string and is different from the tag, track it if

~~-- DO_TRACK~~ is true.

function export.lookup_shortcut(tag)

-- If there is HTML or a link in the tag, return it directly; don't try

-- to look it up, which will fail.

if tag == ";" or tag:find("//", nil, true) or is_link_or_html(tag) then

if tag == ";" or tag:find("//", nil, true) or export.is_link_or_html(tag) then

return tag

end

local ~~m_data~~ = mw.loadData(~~"Module~~:~~form of/data"~~)

local expansion

-- If this is a canonical long-form tag, just return it, and don't

local langcode = lang and lang:getCode()

-- check for shortcuts (which will cause [[Module:form of/data2]] to be

if langcode and export.langs_with_lang_specific_tags[langcode] then

-- ~~loaded~~).

local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)

if m_data.tags[tag] then

-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an

~~return~~ tag

-- optimization; see below.

if langdata.tags[tag] then

return tag

end

expansion = langdata.shortcuts[tag]

end

if not expansion and lang then

-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.

local full_langcode = lang:getFullCode()

if full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then

local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)

-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an

-- optimization; see below.

if langdata.tags[tag] then

return tag

end

expansion = langdata.shortcuts[tag]

end

if not expansion then

local m_data = mw.loadData(export.form_of_data_module)

-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause

-- [[Module:form of/data2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data]] --

-- or, for that matter, in [[Module:form of/data2]]). This is an optimization; the code will still work without

-- it, but will use up more memory.

if m_data.tags[tag] then

return tag

end

expansion = m_data.shortcuts[tag]

end

~~local expansion = m_data.shortcuts[tag]~~

if not expansion then

local m_data2 = mw.loadData(~~"Module:form of/data2"~~)

local m_data2 = mw.loadData(export.form_of_data2_module)

expansion = m_data2.shortcuts[tag]

end

Line 203:

Line 351:

return tag

end

return expansion

end

-- Look up a normalized/canonicalized tag and return the data object

--[==[

-- associated with it. If the tag isn't found, return nil. This first looks up

Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return

-- in [[Module:form of/data]] (which includes more common tags) and then in

nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in

-- [[Module:form of/data2]].

[[Module:form of/data]] (which includes more common non-lang-specific tags) and then finally in

function export.lookup_tag(tag)

[[Module:form of/data2]].

local m_data = mw.loadData(~~"Module:form of/data"~~)

]==]

function export.lookup_tag(tag, lang)

local langcode = lang and lang:getCode()

if langcode and export.langs_with_lang_specific_tags[langcode] then

local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)

if langdata.tags[tag] then

return langdata.tags[tag]

end

local full_langcode = lang and lang:getFullCode()

if full_langcode and full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then

-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.

local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)

if langdata.tags[tag] then

return langdata.tags[tag]

end

local m_data = mw.loadData(export.form_of_data_module)

local tagobj = m_data.tags[tag]

if tagobj then

return tagobj

end

local m_data2 = mw.loadData(~~"Module:form of/data2"~~)

local m_data2 = mw.loadData(export.form_of_data2_module)

local tagobj2 = m_data2.tags[tag]

if tagobj2 then

Line 226:

Line 392:

-- Normalize a single tag, which may be a shortcut but should not be a

-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list

-- multipart tag, a multipart~~-tag~~ shortcut or a list-~~tag~~ shortcut.

-- shortcut.

local function normalize_single_tag(tag)

local function normalize_single_tag(tag, lang, do_track)

local expansion = export.lookup_shortcut(tag)

local expansion = export.lookup_shortcut(tag, lang, do_track)

if type(expansion) ~= "string" then

error("Tag '" .. tag .. "' is a list~~-tag~~ shortcut, which is not allowed here")

error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")

end

tag = expansion

return tag

end

-- Normalize a component of a multipart tag. This should not have any // in it,

--[=[

-- but may join multiple individual tags with a colon, and may be a single

Normalize a component of a multipart tag. This should not have any // in it, but may join multiple individual tags with

-- list-tag shortcut, which is ~~treates~~ as if colon-separated. If

a colon, and may be a single list-tag shortcut, which is treated as if colon-separated. The return value may be a list

~~-- RECOMBINE_TAGS isn't given, the~~ return value may be a list of tags;

of tags.

~~-- otherwise, it will always be a string, and multiple tags will be~~

]=]

~~-- represented as canonical-form tags joined by ":".~~

local function normalize_multipart_component(tag, lang, do_track)

local function normalize_multipart_component(tag, ~~recombine_tags~~)

-- If there is HTML or a link in the tag, don't try to split on colon. A colon may legitimately occur in either one,

-- If there is HTML or a link in the tag, don't try to split on colon.

-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't

-- A colon may legitimately occur in either one, and we don't want

-- expect to occur in links or HTML; see comment in normalize_tag().

-- these things parsed. Note that we don't do this check before splitting

if export.is_link_or_html(tag) then

-- on //, which we don't expect to occur in links or HTML; see comment

-- in normalize_tag().

if is_link_or_html(tag) then

return tag

end

Line 258:

Line 420:

-- We allow list-tag shortcuts inside of multipart tags, e.g.

-- '1s//3p'. Check for this now.

tag = export.lookup_shortcut(tag)

tag = export.lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

-- We found a list-tag shortcut; treat as if colon-separated.

components = tag

else

return normalize_single_tag(tag)

return normalize_single_tag(tag, lang, do_track)

end

local normtags = {}

for _, component in ipairs(components) do

table.insert(normtags, normalize_single_tag(component))

table.insert(normtags, normalize_single_tag(component, lang, do_track))

end

~~if recombine_tags then~~

return normtags

return ~~table.concat(~~normtags~~, ":")~~

~~else~~

~~return normtags~~

~~end~~

end

-- Normalize a single tag. ~~If RECOMBINE_TAGS isn't given, the~~ return value

--[=[

-- may be a list (in the case of multipart tags), which will contain nested

Normalize a single tag. The return value may be a list (in the case of multipart tags), which will contain nested lists

~~-- lists~~ in the case of two-level multipart tags~~; otherwise, it will always~~

in the case of two-level multipart tags.

~~-- be a string, and multipart tags will be represented as canonical-form tags~~

]=]

~~-- joined by "//" and/or ":".~~

local function normalize_tag(tag, lang, do_track)

local function normalize_tag(tag, ~~recombine_multitags~~)

-- We don't check for links or HTML before splitting on //, which we don't expect to occur in links or HTML. Doing

-- We don't check for links or HTML before splitting on //, which we

-- it this way allows for a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly (the template calls

-- don't expect to occur in links or HTML. Doing it this way allows for

-- will be expanded before we process the tag, and will contain links and HTML). The only check we do is for a URL,

-- a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly

-- which shouldn't normally occur, but might if the user tries to put an external link into the tag. URL's with //

-- ~~(the template calls~~ will be expanded before we process the tag, and

-- normally have the sequence ://, which should never normally occur when // and : are used in their normal ways.

-- will contain links and HTML). The only check we do is for a URL,

-- which shouldn't normally occur, but might if the user tries to put

-- an external link into the tag. URL's with // ~~normally have the~~

-- sequence ://, which should never normally occur when // and : are

-- used in their normal ways.

if tag:find("://", nil, true) then

return tag

Line 299:

Line 452:

local split_tags = rsplit(tag, "//", true)

if #split_tags == 1 then

local retval = normalize_multipart_component(tag, ~~recombine_multitags~~)

local retval = normalize_multipart_component(tag, lang, do_track)

if type(retval) == "table" then

-- The user gave a tag like '1:s', i.e. with colon but without

-- The user gave a tag like '1:s', i.e. with colon but without //. Allow this, but we need to return a

-- //. Allow this, but we need to return a ~~nested list. Note,~~

-- nested list.

-- ~~this will never happen when RECOMBINE_TAGS is given~~.

return {retval}

end

Line 310:

Line 462:

local normtags = {}

for _, single_tag in ipairs(split_tags) do

table.insert(normtags, normalize_multipart_component(single_tag,

table.insert(normtags, normalize_multipart_component(single_tag, lang, do_track))

~~recombine_multitags~~))

~~end~~

~~if recombine_multitags then~~

~~return table.concat(normtags, "//")~~

~~else~~

~~return normtags~~

end

return normtags

end

-- Normalize a tag set (a list of tags) into ~~a list of~~ canonical-form tags

--[==[

-- (~~which --~~ may be ~~larger due to~~ the ~~possibility~~ of ~~list-~~tag ~~shortcuts).~~

Normalize a tag set (a list of tags) into its canonical-form tags. The return value is a list of normalized tag sets

~~-- If RECOMBINE_TAGS isn't given~~, the ~~return~~ list ~~may itself contains lists;~~

(a list because of there may be conjoined shortcuts among the input tags). A normalized tag set is a list of tag

~~-- in particular,~~ multipart tags ~~will be represented as lists. Specifically,~~

elements, where each element is either a string (the canonical form of a tag), a list of such strings (in the case of

~~-- the~~ list ~~will consist~~ of ~~the elements~~ of ~~the multipart tag, which will~~

multipart tags) or a list of lists of such strings (in the case of two-level multipart tags). For example, the multipart

~~-- either be canonical-form~~ strings or (in the case of two-level multipart

tag `nom//acc//voc` will be represented in canonical form as { {"nominative", "accusative", "vocative"}}, and the

-- tags) ~~nested lists of canonical-form strings~~. For example, the multipart

two-level multipart tag `1:s//3:p` will be represented as { {{"first-person", "singular"}, {"third-person", "plural"}}}.

-- tag ''nom//acc//voc'' will ~~expand to~~

-- {"nominative", "accusative", "vocative"}

Example 1:

-- and the two-level multipart tag ''1:s//3:p'' will ~~expand to~~

-- {{"first-person", "singular"}, {"third-person", "plural"}}.

{normalize_tag_set({"nom//acc//voc", "n", "p"})} = { {{{"nominative", "accusative", "vocative"}, "masculine", "plural"}}}

~~-- If RECOMBINE_TAGS is given~~, ~~multipart tags will be represented in string~~

-- form, ~~i.e. as canonical~~-form ~~tags joined by~~ "//" ~~and/or~~ ":".

Example 2:

function export.~~normalize_tags~~(~~tags~~, ~~recombine_multitags~~)

local ~~ntags~~ = {}

{normalize_tag_set({"ed-form"}, ENGLISH)} = { {{"simple", "past"}, {"past", "participle"}}}

for _, tag in ipairs(~~tags~~) do

-- Expand the tag, which may generate a new tag (either a

Example 3:

-- fully canonicalized tag, a multipart tag, or a list of tags).

tag = export.lookup_shortcut(tag)

{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}

]==]

function export.normalize_tag_set(tag_set, lang, do_track)

local output_tag_set = {}

local saw_semicolon = false

for _, tag in ipairs(tag_set) do

-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list

-- of tags).

tag = export.lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

saw_semicolon = m_table.contains(tag, ";")

if saw_semicolon then

-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single

-- tag set into multiple.

break

end

for _, t in ipairs(tag) do

table.insert(~~ntags~~, normalize_tag(t, ~~recombine_multitags~~))

table.insert(output_tag_set, normalize_tag(t, lang, do_track))

end

else

table.insert(output_tag_set, normalize_tag(tag, lang, do_track))

end

if not saw_semicolon then

return {output_tag_set}

end

-- Use a more general algorithm that handles conjoined shortcuts.

local output_tag_set = {}

for i, tag in ipairs(tag_set) do

-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list

-- of tags).

tag = export.lookup_shortcut(tag, lang, do_track)

if type(tag) == "table" then

local output_tag_sets = {}

local shortcut_tag_sets = export.split_tag_set(tag)

local normalized_shortcut_tag_sets = {}

for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do

m_table.extendList(normalized_shortcut_tag_sets,

export.normalize_tag_set(shortcut_tag_set, lang, do_track))

end

local after_tags = slice(tag_set, i + 1)

local normalized_after_tags_sets = export.normalize_tag_set(after_tags, lang, do_track)

for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do

for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do

table.insert(output_tag_sets, m_table.append(output_tag_set, normalized_shortcut_tag_set,

normalized_after_tags_set))

end

return output_tag_sets

else

table.insert(~~ntags~~, normalize_tag(tag, ~~recombine_multitags~~))

table.insert(output_tag_set, normalize_tag(tag, lang, do_track))

end

~~return ntags~~

error("Internal error: Should not get here")

end

-- Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.

function export.combine_multipart_tags(tag_set)

-- Single-level multipart tags are left alone. (If we need to, a slight modification of the following code

for i, tag in ipairs(tag_set) do

-- will also split single-level multipart tags.) This assumes that multipart tags are represented as lists

if type(tag) == "table" then

-- and two-level multipart tags are represented as lists of lists, as is output by ~~normalize_tags~~().

for j, subtag in ipairs(tag) do

-- NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as

if type(subtag) == "table" then

~~-- <code>~~def:s//p~~</code>~~ (or the reverse, ~~<code>~~s//def:p~~</code>~~).

tag[j] = table.concat(subtag, ":")

end

tag_set[i] = table.concat(tag, "//")

end

return tag_set

end

function export.normalize_tags(tags, lang, recombine_multitags, do_track)

local tag_sets = export.normalize_tag_set(tags, lang, do_track)

if recombine_multitags then

for i, tag_set in ipairs(tag_sets) do

tag_sets[i] = export.combine_multipart_tags(tag_set)

end

return export.combine_tag_sets(tag_sets)

end

return tag_sets

end

--[==[

Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.

Single-level multipart tags are left alone. (If we need to, a slight modification of the following code

will also split single-level multipart tags.) This assumes that multipart tags are represented as lists

and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.

NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as

`def:s//p` (or the reverse, `s//def:p`).

]==]

function export.split_two_level_multipart_tag_set(tag_set)

~~-- This would be a whole lot easier in Python, with built-in support for~~

~~-- slicing and array concatenation.~~

for i, tag in ipairs(tag_set) do

if type(tag) == "table" then

Line 374:

Line 599:

-- We found a two-level multipart tag.

-- (1) Extract the preceding tags.

local pre_tags = {}

local pre_tags = slice(tag_set, 1, i - 1)

~~for j=~~1,i-1 do

~~table.insert(pre_tags, tag_set[j]~~)

~~end~~

-- (2) Extract the following tags.

local post_tags = {}

local post_tags = slice(tag_set, i + 1)

~~for j=~~i+1~~,#tag_set do~~

~~table.insert(post_tags, tag_set[j]~~)

~~end~~

-- (3) Loop over each tag set alternant in the two-level multipart tag.

-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,

Line 389:

Line 608:

for _, first_level_tag_set in ipairs(tag) do

local expanded_tag_set = {}

~~for _, pre_tag in ipairs(pre_tags) do~~

m_table.extendList(expanded_tag_set, pre_tags)

~~table~~.~~insert~~(expanded_tag_set, ~~pre_tag~~)

~~end~~

-- The second level may have a string or a list.

if type(first_level_tag_set) == "table" then

~~for _, second_level_tag in ipairs(first_level_tag_set) do~~

m_table.extendList(expanded_tag_set, first_level_tag_set)

~~table~~.~~insert~~(expanded_tag_set, ~~second_level_tag~~)

~~end~~

else

table.insert(expanded_tag_set, first_level_tag_set)

end

~~for _, post_tag in ipairs(post_tags) do~~

m_table.extendList(expanded_tag_set, post_tags)

~~table~~.~~insert~~(expanded_tag_set, ~~post_tag~~)

m_table.extendList(resulting_tag_sets, export.split_two_level_multipart_tag_set(expanded_tag_set))

~~end~~

~~for _~~, ~~split_tag_set in ipairs(~~export.split_two_level_multipart_tag_set(expanded_tag_set)) do

~~table.insert(resulting_tag_sets, split_tag_set)~~

~~end~~

end

return resulting_tag_sets

Line 416:

Line 627:

-- ~~Given~~ a ~~list~~ of ~~tags, split~~ into tag sets ~~(separated by semicolons in the initial list of tags)~~.

--[==[

function export.~~split_tags_into_tag_sets~~(~~tags~~)

Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.

local ~~tag_set_group~~ = {}

]==]

function export.split_tag_set(tag_set)

local split_tag_sets = {}

local cur_tag_set = {}

for _, tag in ipairs(~~tags~~) do

for _, tag in ipairs(tag_set) do

if tag == ";" then

if #cur_tag_set > 0 then

table.insert(~~tag_set_group~~, cur_tag_set)

table.insert(split_tag_sets, cur_tag_set)

end

cur_tag_set = {}

Line 431:

Line 644:

end

if #cur_tag_set > 0 then

table.insert(~~tag_set_group~~, cur_tag_set)

table.insert(split_tag_sets, cur_tag_set)

end

return ~~tag_set_group~~

return split_tag_sets

end

export.split_tags_into_tag_sets = export.split_tag_set

-- ~~Given~~ a ~~list of tags~~, ~~split into~~ tag sets (separated by semicolons in the ~~initial list~~ of ~~tags~~).

--[==[

~~-- Then, potentially split each tag set into multiple tag sets if there are any two-level multipart~~

Combine multiple tag sets in a tag set group into a simple tag set, with logical tag sets separated by semicolons.

~~-- tags in those tag sets.~~

This is the opposite of {split_tag_set()}.

function export.~~split_tags_into_tag_sets_and_expand_two_level_multipart_tags~~(~~tags~~)

]==]

~~-- First, split into tag sets.~~

function export.combine_tag_sets(tag_sets)

~~local~~ tag_sets ~~= export.split_tags_into_tag_sets(tags)~~

if #tag_sets == 1 then

~~-- Now split any two-level multipart tags.~~

return tag_sets[1]

local ~~resulting_tag_sets~~ = {}

end

local combined_tag_set = {}

for _, tag_set in ipairs(tag_sets) do

~~for _, resulting_tag_set in ipairs(export.split_two_level_multipart_tag_set(tag_set)) do~~

if #combined_tag_set > 0 then

table.insert(~~resulting_tag_sets~~, ~~resulting_tag_set~~)

table.insert(combined_tag_set, ";")

end

m_table.extendList(combined_tag_set, tag_set)

end

return tags

end

local tag_set_param_mods = {

lb = {

item_dest = "labels",

convert = function(arg, parse_err)

return rsplit(arg, "//", true)

end,

}

--[==[

Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only

per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be

attached to the last tag.

]==]

function export.parse_tag_set_properties(tag_set)

local function generate_tag_set_obj(last_tag)

tag_set[#tag_set] = last_tag

return {tags = tag_set}

end

local last_tag = tag_set[#tag_set]

-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with , , or

-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of

-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <lb:...>, so if

-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the

-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.

if last_tag:find("<") and not last_tag:find("^[^<]*<[a-z]*[^a-z:]") then

return require(parse_utilities_module).parse_inline_modifiers(last_tag, {

param_mods = tag_set_param_mods,

generate_obj = generate_tag_set_obj,

})

else

return generate_tag_set_obj(last_tag)

end

~~return resulting_tag_sets~~

end

function export.normalize_pos(pos)

return ~~m_pos~~[pos] or pos

if not pos then

return nil

end

return mw.loadData(export.form_of_pos_module)[pos] or pos

end

Line 462:

Line 719:

-- passed in must be a string (i.e. it cannot be a list describing a

-- multipart tag). To handle multipart tags, use get_tag_display_form().

local function get_single_tag_display_form(normtag)

local function get_single_tag_display_form(normtag, lang)

local data = export.lookup_tag(normtag)

local data = export.lookup_tag(normtag, lang)

local display = normtag

-- If the tag has a special display form, use it

if data and data.display then

~~normtag~~ = data.display

display = data.display

end

-- If there is a nonempty glossary index, then show a link to it

if data and data.glossary then

local glossary = data and data[export.GLOSSARY]

if ~~data~~.~~glossary_type =~~= "~~wikt~~" then

if glossary ~= nil then

~~normtag~~ = "[[" .. ~~data.glossary~~ .. "|" .. ~~normtag~~ .. "]]"

if glossary == export.WIKT then

elseif ~~data.glossary_type~~ == ~~"wp"~~ then

display = "[[" .. normtag .. "|" .. display .. "]]"

~~normtag~~ = "[[w:" .. ~~data~~.~~glossary~~ .. "|" .. ~~normtag~~ .. "]]"

elseif glossary == export.WP then

display = "[[w:" .. normtag .. "|" .. display .. "]]"

elseif glossary == export.APPENDIX then

display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(normtag) .. "|" .. display .. "]]"

elseif type(glossary) ~= "string" then

error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(

type(glossary), mw.dumpObject(glossary), normtag))

else

~~normtag~~ = "[[~~wikt~~:Appendix:Glossary#" .. mw.uri.anchorEncode(~~data.~~glossary) .. "|" .. ~~normtag~~ .. "]]"

local link = rmatch(glossary, "^wikt:(.*)")

if link then

display = "[[" .. link .. "|" .. display .. "]]"

end

if not link then

link = rmatch(glossary, "^w:(.*)")

if link then

display = "[[w:" .. link .. "|" .. display .. "]]"

end

if not link then

display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(glossary) .. "|" .. display .. "]]"

end

return ~~normtag~~

return display

end

-- Turn a canonicalized tag spec (which describes a single, possibly

--[==[

-- multipart tag) into the displayed form. The tag spec may be a string

Turn a canonicalized tag spec (which describes a single, possibly multipart tag) into the displayed form. The tag spec

-- (a canonical-form tag)~~, or~~ a list of canonical-form tags (in the

may be a string (a canonical-form tag); a list of canonical-form tags (in the case of a simple multipart tag); or a

-- case of a simple multipart tag), or a list of mixed canonical-form

list of mixed canonical-form tags and lists of such tags (in the case of a two-level multipart tag). `joiner` indicates

-- tags and lists of such tags (in the case of a two-level multipart tag).

how to join the parts of a multipart tag, and can be either {"and"} ("foo and bar", or "foo, bar and baz" for 3 or

~~-- JOINER indicates~~ how to join the parts of a multipart tag, and can

more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in

-- be either "and" ("foo and bar", or "foo, bar and baz" for 3 or more),

{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems

-- "slash" ("foo/bar"), "en-dash" ("foo–bar") or nil, which uses the

unlikely to change.)

-- global default found in multipart_join_strategy() in

]==]

-- [[Module:form of/functions]].

function export.get_tag_display_form(tagspec, lang, joiner)

function export.get_tag_display_form(tagspec, joiner)

if type(tagspec) == "string" then

return get_single_tag_display_form(tagspec)

return get_single_tag_display_form(tagspec, lang)

end

-- We have a multipart tag. See if there's a display handler to

-- We have a multipart tag. See if there's a display handler to display them specially.

-- display them specially.

for _, handler in ipairs(require(export.form_of_functions_module).display_handlers) do

for _, handler in ipairs(~~m_functions~~.display_handlers) do

local displayval = handler(tagspec, joiner)

if displayval then

Line 510:

Line 784:

for _, first_level_tag in ipairs(tagspec) do

if type(first_level_tag) == "string" then

table.insert(displayed_tags, get_single_tag_display_form(first_level_tag))

table.insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang))

else

-- A first-level element of a two-level multipart tag.

-- A first-level element of a two-level multipart tag. Currently we just separate the individual components

-- Currently we just separate the individual components

-- with spaces, but other ways are possible, e.g. using an underscore, colon, parens or braces.

-- with spaces, but other ways are possible, e.g. using

-- an underscore, colon, parens or braces.

local components = {}

for _, component in ipairs(first_level_tag) do

table.insert(components, get_single_tag_display_form(component))

table.insert(components, get_single_tag_display_form(component, lang))

end

table.insert(displayed_tags, table.concat(components, " "))

end

return ~~m_functions~~.join_multiparts(displayed_tags, joiner)

return require(export.form_of_functions_module).join_multiparts(displayed_tags, joiner)

end

-- ~~Return true if~~ the ~~list~~ `~~tags1~~`, ~~treated as~~ a set, is ~~a subset~~ of the ~~list `tags2`~~, ~~also~~

--[==[

-- ~~treated~~ as ~~a set~~.

Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are

local ~~function is_subset~~(~~tags1~~, ~~tags2~~)

represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See

~~tags1~~ = ~~m_table.listToSet~~(~~tags1~~)

{get_tag_display_form()} for the meaning of `joiner`.

~~tags2~~ = ~~m_table~~.~~listToSet~~(~~tags2~~)

]==]

~~for tag, _ in pairs~~(~~tags1~~) do

function export.get_tag_set_display_form(normalized_tag_set, lang, joiner)

if not ~~tags2[tag]~~ then

local parts = {}

~~return false~~

for _, tagspec in ipairs(normalized_tag_set) do

local to_insert = export.get_tag_display_form(tagspec, lang, joiner)

-- Maybe insert a space before inserting the display form of the tag. We insert a space if

-- (a) we're not the first tag; and

-- (b) the tag we're about to insert doesn't have the "no_space_on_left" property; and

-- (c) the preceding tag doesn't have the "no_space_on_right" property.

-- NOTE: We depend here on the fact that

-- (1) all tags with either of the above properties set have the same display form as canonical form, and

-- (2) all tags with either of the above properties set are single-character tags.

-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,

-- which won't be found and which will trigger loading of [[Module:form of/data2]]. If multichar punctuation is

-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.

--

-- If the first property above fails to hold in the future, we need to track the canonical form of each tag

-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.

if #parts > 0 then

local most_recent_tagobj = ulen(parts[#parts]) == 1 and export.lookup_tag(parts[#parts], lang)

local to_insert_tagobj = ulen(to_insert) == 1 and export.lookup_tag(to_insert, lang)

if (

(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and

(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)

) then

table.insert(parts, " ")

end

table.insert(parts, to_insert)

end

return ~~true~~

return table.concat(parts)

end

-- ~~Compute and return the appropriate categories for the~~ tags in `tags~~` (user~~-~~specified~~ tags,

--[==[

~~-- which may consist~~ of ~~multiple tag sets separated by semicolons)~~ and ~~the language in~~ `lang`.

Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are

~~-- This checks both language-specific and~~ language~~-agnostic category specs in [[Module:form~~ of~~/cats]].~~

represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.

-- `POS` is the user-~~specified~~ part of speech~~, if any, and `terminfo` is currently unused~~.

Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,

function export.~~fetch_lang_categories~~(lang, ~~tags~~, ~~terminfo~~, ~~POS~~)

and `POS` is the user-provided part of speech (which may be {nil}).

local m_cats = mw.loadData(~~"Module:form of/cats"~~)

]==]

function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)

local m_cats = mw.loadData(export.form_of_cats_module)

local categories = {}

local labels = {}

~~local normalized_tags = export.normalize_tags(tags)~~

~~local split_tag_sets = export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(normalized_tags)~~

POS = export.normalize_pos(POS)

-- First split any two-level multipart tags into multiple sets, to make our life easier.

-- ~~Loop over each tag set and compute categories for each one~~.

for _, tag_set in ipairs(export.split_two_level_multipart_tag_set(normalized_tag_set)) do

for _, tag_set in ipairs(~~split_tag_sets~~) do

-- Call a named function, either from the lang-specific data in

local function ~~make_function_table~~()

-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].

~~return~~ {

local function call_named_function(name, funtype)

~~lang~~=~~lang~~,

local data = {

~~tags~~=~~normalized_tags~~,

pagename = pagename or mw.title.getCurrentTitle().subpageText,

~~term~~=~~term~~,

lemmas = lemmas,

p=POS

tag_set = normalized_tag_set,

lang = lang,

POS = POS

}

local modules_tried = {}

local function try_lang_specific_module(langcode)

if export.langs_with_lang_specific_tags[langcode] then

local lang_specific_module = export.form_of_lang_data_module_prefix .. langcode .. "/functions"

local langdata = require(utilities_module).safe_require(lang_specific_module)

if langdata then

table.insert(modules_tried, lang_specific_module)

if langdata.cat_functions then

local fn = langdata.cat_functions[name]

if fn then

return fn(data), true

end

return nil, false

end

-- First try lang-specific.

local langcode = lang and lang:getCode()

if langcode then

local retval, found_it = try_lang_specific_module(langcode)

if found_it then

return retval

end

-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.

local full_langcode = lang and lang:getFullCode()

if full_langcode and full_langcode ~= langcode then

local retval, found_it = try_lang_specific_module(full_langcode)

if found_it then

return retval

end

-- Try lang-independent.

table.insert(modules_tried, export.form_of_functions_module)

local fn = require(export.form_of_functions_module).cat_functions[name]

if fn then

return fn(data)

end

for i, modname in ipairs(modules_tried) do

modules_tried[i] = "[[" .. modname .. "]]"

end

error(("No %s function named '%s' in %s"):format(funtype, name, lang_specific_part,

m_table.serialCommaJoin(modules_tried, {conj = "or", dontTag = true})))

end

Line 599:

Line 943:

-- complex when multipart tags are present.

local function tag_set_matches_spec_tag(spec_tag)

spec_tag = normalize_tag(spec_tag)

spec_tag = normalize_tag(spec_tag, lang)

for _, tag_set_tag in ipairs(tag_set) do

if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then

Line 635:

Line 979:

return false, 3

elseif predicate == "tags=" then

local ~~normalized_spec_tags~~ = export.~~normalize_tags~~(spec[2])

local normalized_spec_tag_sets = export.normalize_tag_set(spec[2], lang)

-- Allow tags to be in different orders, and multipart tags to

if #normalized_spec_tag_sets > 1 then

-- be in different orders. To handle this, we first check that

error("Internal error: No support for conjoined shortcuts in category/label specs in "

-- both tag set tags and spec tags have the same length. If so,

.. "[[Module:form of/cats]] when processing spec tag set " .. table.concat(spec[2], "|"))

-- ~~we sort the~~ multipart tags in the tag set tags and spec tags,

end

-- and then check that all tags in the spec tags are ~~in the~~

local normalized_spec_tag_set = normalized_spec_tag_sets[1]

-- tag set tags.

-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove

if #tag_set ~= #~~normalized_spec_tags~~ then

-- support for two-level multipart tags.

for _, tag in ipairs(normalized_spec_tag_set) do

if type(tag) == "table" then

for _, subtag in ipairs(tag) do

if type(subtag) == "table" then

error("Internal error: No support for two-level multipart tags in category/label specs"

.. "[[Module:form of/cats]] when processing spec tag set "

.. table.concat(spec[2], "|"))

end

-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,

-- we first check that both tag set tags and spec tags have the same length. If so, we sort the

-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are

-- in the tag set tags.

if #tag_set ~= #normalized_spec_tag_set then

return false, 3

end

Line 650:

Line 1,010:

table.sort(tag_set_tags[i])

end

if type(~~normalized_spec_tags~~[i]) == "table" then

if type(normalized_spec_tag_set[i]) == "table" then

table.sort(~~normalized_spec_tags~~[i])

table.sort(normalized_spec_tag_set[i])

end

for i=1,#tag_set_tags do

if not m_table.contains(tag_set_tags, ~~normalized_spec_tags~~[i]~~, "deepCompare"~~) then

if not m_table.contains(tag_set_tags, normalized_spec_tag_set[i]) then

return false, 3

end

Line 686:

Line 1,046:

end

return condval, 4

elseif ~~predication~~ == "call" then

elseif predicate == "call" then

~~local~~ fn ~~= m_functions.cat_functions[~~spec[2]]

return fn(call_named_function(spec[2], "condition")), 3

~~if not fn then~~

~~error(~~"No condition ~~function named '" .. spec[2] ..~~ "~~'")~~

~~end~~

~~return fn(make_function_table(~~)), 3

else

error("Unrecognized predicate: " .. predicate)

Line 706:

Line 1,062:

return false

elseif type(spec) == "string" then

-- Substitute POS request with user-specified part of speech

-- A category. Substitute POS request with user-specified part of speech or default.

-- or default

spec = rsub(spec, "<<p=(.-)>>", function(default)

return POS or export.normalize_pos(default)

end)

table.insert(categories, lang:~~getCanonicalName~~() .. " " .. spec)

table.insert(categories, lang:getFullName() .. " " .. spec)

return true

elseif type(spec) == "table" and spec.labels then

-- A label spec.

for _, label in ipairs(spec.labels) do

m_table.insertIfNot(labels, label)

end

return true

elseif type(spec) ~= "table" then

Line 734:

Line 1,095:

return false

elseif predicate == "call" then

~~local fn = m_functions.cat_functions[~~spec[2]]

return process_spec(call_named_function(spec[2], "spec"))

~~if not fn then~~

~~error("No spec function named '~~" .. spec~~[2] ..~~ "~~'")~~

~~end~~

~~return process_spec(fn(make_function_table()~~))

else

local condval, ifspec = check_condition(spec)

Line 752:

Line 1,109:

end

local ~~langspecs~~ = ~~m_cats[~~lang:getCode()]

local langcode = lang:getCode()

local langspecs = m_cats[langcode]

if langspecs then

for _, spec in ipairs(langspecs) do

Line 758:

Line 1,116:

end

if lang:~~getCode~~() ~= "und" then

local full_code = lang:getFullCode()

if full_code ~= langcode then

local langspecs = m_cats[full_code]

if langspecs then

for _, spec in ipairs(langspecs) do

process_spec(spec)

end

if full_code ~= "und" then

local langspecs = m_cats["und"]

if langspecs then

Line 768:

Line 1,135:

end

return categories

return categories, labels

end

~~function export~~.~~tagged_inflections~~(~~data~~, ~~terminfo~~, notext, capfirst, posttext, joiner)

--[==[

if not data.tags then

Implementation of templates that display inflection tags, such as the general {{tl|inflection of}}, semi-specific

error("First argument must ~~now~~ be a table of arguments")

variants such as {{tl|participle of}}, and specific variants such as {{tl|past participle of}}. `data` contains all the

information controlling the display, with the following fields:

* `.lang`: ('''''required''''') Language to use when looking up language-specific inflection tags, categories and

labels, and for displaying categories and labels.

* `.tags`: ('''''required''' unless `.tag_sets` is given'') List of non-canonicalized inflection tags. Multiple tag sets

can be indicated by a {";"} as one of the tags, and tag-set properties may be attached to the last tag of a tag set.

The tags themselves may come directly from the user (as in {{tl|inflection of}}); come partly from the user (as in

{{tl|participle of}}, which adds the tag `part` to user-specified inflection tags); or be entirely specified by the

template (as in {{tl|past participle of}}).

* `.tag_sets`: ('''''required''' unless `.tags` is given'') List of non-canonicalized tag sets and associated

per-tag-set properties. Each element of the list is an object of the form

{ {tags = {"TAG", "TAG", ...}, labels = {"LABEL", "LABEL", ...}}. If `.tag_sets` is specified, `.tags` should not be

given and vice-versa. Specifying `.tag_sets` in place of tags allowed per-tag set labels to be specified; otherwise,

there is no advantage. [[Module:pt-gl-inflections]] uses this functionality to supply labels like {"Brazil"} and

{"Portugal"} associated with specific tag sets.

* `.lemmas`: ('''''recommended''''') List of objects describing the lemma(s) of which the term in question is a

non-lemma form. These are passed directly to {full_link()} in [[Module:links]]. Each object should have at minimum a

`.lang` field containing the language of the lemma and a `.term` field containing the lemma itself. Each object is

formatted using {full_link()} and then if there are more than one, they are joined using {serialCommaJoin()} in

[[Module:table]]. Alternatively, `.lemmas` can be a string, which is displayed directly. If omitted entirely, no lemma

links are shown and the connecting "of" is also omitted.

* `.lemma_face`: ('''''recommended''''') "Face" to use when displaying the lemma objects. Usually should be set to

{"term"}.

* `.POS`: ('''''recommended''''') Categorizing part-of-speech tag. Comes from the {{para|p}} or {{para|POS}} argument of

{{tl|inflection of}}.

* `.pagename`: Page name of "current" page or nil to use the actual page title; for testing purposes.

* `.enclitics`: List of enclitics to display after the lemmas, in parens.

* `.no_format_categories`: If true, don't format the categories derived from the inflection tags; just return them.

* `.sort`: Sort key for formatted categories. Ignored when `.no_format_categories` = {true}.

* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).

* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)

* `.capfirst`: Capitalize the first word displayed.

* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.

* `.posttext`: Additional text to display after the lemma links.

* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is

{"form-of-definition use-with-mention"} for the tag text, {"form-of-definition-link"} for the lemma links. (FIXME:

Should separate out the lemma links into their own field.)

`.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.

A typical call might look like this (for {{m+|es|amo}}): {

local lang = require("Module:languages").getByCode("es")

local lemma_obj = {

lang = lang,

term = "amar",

}

return m_form_of.tagged_inflections({

lang = lang, tags = {"1", "s", "pres", "ind"}, lemmas = {lemma_obj}, lemma_face = "term", POS = "verb"

})

}

Normally, one value is returned, the formatted text, which has appended to it the formatted categories derived from the

tag-set-related categories generated by the specs in [Module:form of/cats]]. To suppress this, set

`data.no_format_categories` = {true}, in which case two values are returned, the formatted text without any formatted

categories appended and a list of the categories to be formatted.

NOTE: There are two sets of categories that may be generated: (1) categories derived directly from the tag sets, as

specified in [[Module:form of/cats]]; (2) categories derived from tag-set labels, either (a) set explicitly by the

caller in `data.tag_sets`, (b) specified by the user using `<lb:...>` attached to the last tag in a tag set, or

(c) specified in [[Module:form of/cats]]. The second type (label-related categories) are currently not returned in

the second return value of {tagged_inflections()}, and are currently inserted into the output text even if

`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also

suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is

set to {true}).

]==]

function export.tagged_inflections(data)

if not data.tags and not data.tag_sets then

error("First argument must be a table of arguments, and `.tags` or `.tag_sets` must be specified")

end

if data.tags and data.tag_sets then

error("Both `.tags` and `.tag_sets` cannot be specified")

end

local ~~cur_infl~~ = {}

local tag_sets = data.tag_sets

if not tag_sets then

tag_sets = export.split_tag_set(data.tags)

for i, tag_set in ipairs(tag_sets) do

tag_sets[i] = export.parse_tag_set_properties(tag_set)

end

local inflections = {}

local categories = {}

for _, tag_set in ipairs(tag_sets) do

local normalized_tag_sets = export.normalize_tag_set(tag_set.tags, data.lang, "do-track")

local ~~ntags~~ = export.~~normalize_tags~~(data.~~tags~~, ~~nil~~, ~~"do-track")~~

for _, normalized_tag_set in ipairs(normalized_tag_sets) do

local cur_infl = {}

~~for i~~, ~~tagspec in ipairs~~(~~ntags~~) do

local this_categories, this_labels = export.fetch_categories_and_labels(normalized_tag_set, data.lang,

~~if tagspec~~ == ";" ~~then~~

data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)

if ~~#cur_infl > 0~~ then

if not data.nocat then

~~table~~.~~insert~~(~~inflections~~, ~~table.concat(cur_infl)~~)

m_table.extendList(categories, this_categories)

end

local cur_infl = export.get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner)

cur_infl ~~= {}~~

~~else~~

~~local to_insert~~ = export.~~get_tag_display_form~~(~~tagspec~~, data.~~joiner)~~

~~-- Maybe insert a space before inserting the display form~~

~~-- of the tag. We insert a space if~~

~~-- (a) we're not the first tag; and~~

~~-- (b) the tag we're about to insert doesn't have the~~

~~-- "no_space_on_left" property; and~~

~~-- (c) the preceding tag doesn't have the "no_space_on_right"~~

~~-- property.~~

~~-- NOTE: We depend here on the fact that~~

~~-- (1) all tags with either of the above properties set have the~~

~~-- same display form as canonical form~~, ~~and~~

~~-- (2) all tags with either of the above properties set are~~

~~-- single-character tags~~.

~~-- The second property is an optimization to avoid looking up~~

~~-- display forms resulting from multipart tags, which won't be~~

~~-- found and which will trigger loading of [[Module:form of/data2]].~~

~~-- If multichar punctuation is added in the future, it's ok to~~

~~-- change the == 1 below to <= 2 or <= 3.~~

--

~~-- If the first property above fails to hold in the future, we~~

~~-- need to track the canonical form of each tag (including the~~

~~-- previous one~~) ~~as well as the display form. This would also~~

~~-- avoid the need for the == 1 check.~~

if #cur_infl > 0 then

~~local most_recent_tagobj = ulen(cur_infl[#cur_infl]) == 1 and~~

if tag_set.labels then

~~export.lookup_tag(cur_infl[#cur_infl])~~

this_labels = m_table.append(tag_set.labels, this_labels)

~~local to_insert_tagobj = ulen(to_insert) == 1 and~~

~~export.lookup_tag(to_insert)~~

if (

~~(not most_recent_tagobj or~~

~~not most_recent_tagobj~~.~~no_space_on_right) and~~

~~(not to_insert_tagobj or~~

~~not to_insert_tagobj.no_space_on_left)~~

) then

~~table~~.~~insert~~(~~cur_infl~~, ~~" "~~)

end

table.insert(inflections, {infl_text = cur_infl, labels = this_labels})

end

~~table.insert(cur_infl, to_insert)~~

end

if ~~#cur_infl > 0~~ then

local overall_labels, need_per_tag_set_labels

~~table~~.~~insert~~(~~inflections~~, ~~table~~.~~concat(cur_infl)~~)

for _, inflection in ipairs(inflections) do

if overall_labels == nil then

overall_labels = inflection.labels

elseif not m_table.deepEquals(overall_labels, inflection.labels) then

need_per_tag_set_labels = true

overall_labels = nil

break

end

local format_data = require(~~"Module:table"~~).~~shallowcopy~~(data)

if not need_per_tag_set_labels then

for _, inflection in ipairs(inflections) do

inflection.labels = nil

end

local format_data = m_table.shallowcopy(data)

local function format_labels(labels, notext)

if labels and #labels > 0 then

return require(labels_module).show_labels { labels = labels, lang = data.lang, sort = data.sort, nocat = data.nocat } ..

(notext and (data.pretext or "") == "" and "" or " ")

else

return ""

end

local of_text = data.lemmas and " of" or ""

local formatted_text

if #inflections == 1 then

format_data.text =

if need_per_tag_set_labels then

data.notext and "" or ((data.capfirst and require("Module:string utilities").ucfirst(inflections[1]) or inflections[1]) ..

error("Internal error: need_per_tag_set_labels should not be set with one inflection")

~~(data.terminfo and " of" or ""~~))

end

~~return~~ export.format_form_of(format_data)

format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or

((data.capfirst and require("Module:string utilities").ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))

formatted_text = export.format_form_of(format_data)

else

format_data.text = data.notext and "" or ((data.capfirst and "Inflection" or "inflection") ..

format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or

~~(data.terminfo and " of" or ""~~))

((data.capfirst and "Inflection" or "inflection") .. of_text))

format_data.posttext = (data.posttext or "") .. ":"

local link = export.format_form_of(format_data)

local text_classes = data.text_classes or "form-of-definition use-with-mention"

~~return link ..~~"\n## ~~"~~ ..

for i, inflection in ipairs(inflections) do

~~table.concat(inflections,~~ "~~\n##~~ ") .. ""

inflections[i] = "\n## " .. format_labels(inflection.labels, false) ..

"" .. inflection.infl_text .. ""

end

formatted_text = link .. table.concat(inflections)

end

if not data.no_format_categories then

if #categories > 0 then

formatted_text = formatted_text .. require("Module:utilities").format_categories(categories, data.lang,

data.sort, nil, export.force_cat)

end

return formatted_text

end

return formatted_text, categories

end

~~function export.to_Wikidata_IDs(tags, skip_tags_without_ids)~~

~~if type(tags) == "string" then~~

~~tags = mw.text.split(tags, "|", true)~~

~~end~~

--[==[

Given a tag set, return a flattened list all Wikidata ID's of all tags in the tag set. FIXME: Only used in a debugging

function in [[Module:se-verbs]]; move there.

]==]

function export.to_Wikidata_IDs(tag_set, lang, skip_tags_without_ids)

local ret = {}

local function get_wikidata_id(tag)

~~if tag == ";" and not skip_tags_without_ids then~~

local data = export.lookup_tag(tag, lang)

~~error("Semicolon is not supported for Wikidata IDs")~~

~~else~~

~~return nil~~

~~end~~

local data = export.lookup_tag(tag)

if not data or not data.~~wikidata~~ then

if not data or not data[export.WIKIDATA] then

if not skip_tags_without_ids then

error("The tag \"" .. tag .. "\" does not have a Wikidata ID defined in ~~[[Module:~~form of/data~~]]"~~)

error('The tag "' .. tag .. '" does not have a Wikidata ID defined in the form-of data modules')

else

return nil

end

else

return data.~~wikidata~~

return ("Q%s"):format(data[export.WIKIDATA])

end

for i, tag in ipairs(~~export.normalize_tags~~(~~tags~~)) do

local normalized_tag_sets = export.normalize_tag_set(tag_set, lang)

if type(~~tag~~) == "table" then

for _, tag_set in ipairs(normalized_tag_sets) do

~~local ids = {}~~

for _, tag in ipairs(tag_set) do

for _, ~~onetag~~ in ipairs(~~tag~~) do

if type(tag) == "table" then

table.insert(~~ids~~, get_wikidata_id(~~onetag~~))

for _, subtag in ipairs(tag) do

if type(subtag) == "table" then

-- two-level multipart tag; FIXME: delete support for this

for _, subsubtag in ipairs(subtag) do

table.insert(ret, get_wikidata_id(subsubtag))

end

else

table.insert(ret, get_wikidata_id(subtag))

end

else

table.insert(ret, get_wikidata_id(tag))

end

~~table.insert(ret, ids)~~

~~else~~

~~table.insert(ret, get_wikidata_id(tag))~~

end

Line 897:

Line 1,353:

function export.dump_form_of_data(frame)

local data = {

data = require(~~"Module:form of/data"~~),

data = require(export.form_of_data_module),

data2 = require(~~"Module:form of/data2"~~)

data2 = require(export.form_of_data2_module)

}

return require("Module:JSON").toJSON(data)

end

function export.finalize_tag_data(tags, shortcuts)

local function process_shortcut(name, shortcut)

-- If the shortcut is already in the list, then there is a duplicate.

if shortcuts[shortcut] then

error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing shortcut for the tag \"" .. shortcuts[shortcut] .. "\".")

elseif tags[shortcut] then

error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing tag with that name.")

end

shortcuts[shortcut] = name

end

for name, data in pairs(tags) do

local data_shortcuts = data[export.SHORTCUTS]

if data_shortcuts then

if type(data_shortcuts) == "string" then

process_shortcut(name, data_shortcuts)

else

for _, shortcut in ipairs(data_shortcuts) do

process_shortcut(name, shortcut)

end

return export

@@ Line 1: / Line 1: @@
+local export = {}
+export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages
 local m_links = require("Module:links")
+local m_string_utils = require("Module:string utilities")
 local m_table = require("Module:table")
-local m_pos = mw.loadData("Module:form of/pos")
+local parse_utilities_module = "Module:parse utilities"
-local m_functions = require("Module:form of/functions")
+local labels_module = "Module:labels"
+local utilities_module = "Module:utilities"
+export.form_of_pos_module = "Module:form of/pos"
+export.form_of_functions_module = "Module:form of/functions"
+export.form_of_cats_module = "Module:form of/cats"
+export.form_of_lang_data_module_prefix = "Module:form of/lang-data/"
+export.form_of_data_module = "Module:form of/data"
+export.form_of_data2_module = "Module:form of/data2"
-local ulen = mw.ustring.len
+local ulen = m_string_utils.len
-local rsubn = mw.ustring.gsub
+local rsubn = m_string_utils.gsub
-local rmatch = mw.ustring.match
+local rmatch = m_string_utils.match
-local rsplit = mw.text.split
+local rsplit = m_string_utils.split
-local export = {}
+export.TAG_TYPE = 1
+export.GLOSSARY = 2
+export.SHORTCUTS = 3
+export.WIKIDATA = 4
---[=[
+export.APPENDIX = true
+export.WP = false
+export.WIKT = 0
-This module implements the underlying processing of {{form of}},
+--[==[
-{{inflection of}} and specific variants such as {{past participle of}}
+Set listing the languages with lang-specific tags. If a language isn't listed here, the tags for that language won't be
-and {{alternative spelling of}}. Most of the logic in this file is to
+recognized.
-handle tags in {{inflection of}}. Other related files:
+]==]
+export.langs_with_lang_specific_tags = {
+	["en"] = true,
+	["got"] = true,
+	["lt"] = true,
+	["lv"] = true,
+	["nl"] = true,
+	["pi"] = true,
+	["sw"] = true,
+	["ttj"] = true,
+}
-* [[Module:form of/templates]] contains the majority of the logic that
+--[==[ intro:
-  implements the templates themselves.
-* [[Module:form of/data]] is a data-only file containing information on
-  the more common inflection tags, listing the tags, their shortcuts,
-  the category they belong to (tense-aspect, case, gender, voice-valence,
-  etc.), the appropriate glossary link and the wikidata ID.
-* [[Module:form of/data2]] is a data-only file containing information on
-  the less common inflection tags, in the same format as
-  [[Module:form of/data]].
-* [[Module:form of/cats]] is a data-only file listing the
-  language-specific categories that are added when the appropriate
-  combinations of tags are seen for a given language.
-* [[Module:form of/pos]] is a data-only file listing the recognized
-  parts of speech and their abbreviations, used for categorization.
-  FIXME: This should be unified with the parts of speech listed in
-  [[Module:links]].
-* [[Module:form of/functions]] contains functions for use with
-  [[Module:form of/data]] and [[Module:form of/cats]]. They are
-  contained in this module because data-only modules can't contain
-  code. The functions in this file are of two types:
-  (1) Display handlers allow for customization of the display of
+This module implements the underlying processing of {{tl|form of}}, {{tl|inflection of}} and specific variants such as
-      multipart tags (see below). Currently there is only one
+{{tl|past participle of}} and {{tl|alternative spelling of}}. Most of the logic in this file is to handle tags in
-	  such handler, for handling multipart person tags such as
+{{tl|inflection of}}. Other related files:
-	  '1//2//3'.
-  (2) Cat functions allow for more complex categorization logic,
-      and are referred to by name in [[Module:form of/cats]].
-	  Currently no such functions exist.
-The following terminology is used in conjunction with {{inflection of}}:
+* [[Module:form of/templates]] contains the majority of the logic that implements the templates themselves.
+* [[Module:form of/data]] is a data-only file containing information on the more common inflection tags, listing the
+  tags, their shortcuts, the category they belong to (tense-aspect, case, gender, voice-valence, etc.), the appropriate
+  glossary link and the wikidata ID.
+* [[Module:form of/data2]] is a data-only file containing information on the less common inflection tags, in the same
+  format as [[Module:form of/data]].
+* [[Module:form of/lang-data/LANGCODE]] is a data-only file containing information on the language-specific inflection
+  tags for the language with code LANGCODE, in the same format as [[Module:form of/data]]. Language-specific tags
+  override general tags.
+* [[Module:form of/cats]] is a data-only file listing the language-specific categories that are added when the
+  appropriate combinations of tags are seen for a given language.
+* [[Module:form of/pos]] is a data-only file listing the recognized parts of speech and their abbreviations, used for
+  categorization. FIXME: This should be unified with the parts of speech listed in [[Module:links]].
+* [[Module:form of/functions]] contains functions for use with [[Module:form of/data]] and [[Module:form of/cats]].
+  They are contained in this module because data-only modules can't contain code. The functions in this file are of two
+  types:
+*# Display handlers allow for customization of the display of multipart tags (see below). Currently there is only
+   one such handler, for handling multipart person tags such as `1//2//3`.
+*# Cat functions allow for more complex categorization logic, and are referred to by name in [[Module:form of/cats]].
+   Currently no such functions exist.
-* A TAG is a single grammatical item, as specified in a single numbered
+The following terminology is used in conjunction with {{tl|inflection of}}:
-  parameter of {{inflection of}}. Examples are 'masculine', 'nominative',
-  or 'first-person'. Tags may be abbreviated, e.g. 'm' for 'masculine',
-  'nom' for 'nominative', or '1' for 'first-person'. Such abbreviations
-  are called SHORTCUTS, and some tags have multiple equivalent shortcuts
-  (e.g. 'p' or 'pl' for 'plural'). The full, non-abbreviated form of
-  a tag is called its CANONICAL FORM.
-* The DISPLAY FORM of a tag is the way it's displayed to the user. Usually
-  the displayed text of the tag is the same as its canonical form, and it
-  normally functions as a link to a glossary entry explaining the tag.
-  Usually the link is to an entry in [[Appendix:Glossary]], but sometimes
-  the tag is linked to an individual dictionary entry or to a Wikipedia
-  entry. Occasionally, the display text differs from the canonical form of
-  the tag. An example is the tag 'comparative case', which has the display
-  text read as simply 'comparative'. Normally, tags referring to cases don't
-  have the word "case" in them, but in this case the tag 'comparative' was
-  already used as a shortcut for the tag 'comparative degree', so the tag was
-  named 'comparative case' to avoid clashing. A similar situation occurs
-  with 'adverbial case' vs. the grammar tag 'adverbial' (as in 'adverbial
-  participle').
-* A TAG SET is an ordered list of tags, which together express a single
-  inflection, for example, '1|s|pres|ind', which can be expanded to
-  canonical-form tags as 'first-person|singular|present|indicative'.
-  Multiple tag sets can be specified in a single call to {{inflection of}}
-  by separating the individual tag sets with a semicolon, e.g.
-  '1|s|pres|ind|;|2|s|imp', which specifies two tag sets, '1|s|pres|ind'
-  as above and '2|s|imp' (in canonical form,
-  'second-person|singular|imperative').
-* A MULTIPART TAG is a tag that embeds multiple tags within it, such as
-  'f//n' or 'nom//acc//voc'. These are used in the case of [[syncretism]],
-  when the same form applies to multiple inflections. Examples are the
-  Spanish present subjunctive, where the first-person and third-person
-  singular have the same form (e.g. [[siga]] from [[seguir]] "to follow"),
-  or Latin third-declension adjectives, where the dative and ablative
-  plural of all genders have the same form (e.g. [[omnibus]] from [[omnis]]
-  "all"). These would be expressed respectively as '1//3|s|pres|sub'
-  and 'dat//abl|m//f//n|p', where the use of the multipart tag compactly
-  encodes the syncretism and avoids the need to individually list out
-  all of the inflections. Multipart tags currently display as a list
-  separated by "and", ''dative and ablative'' or
-  ''masculine, feminine and neuter'' where each individual word is linked
-  appropriately. As a special case, multipart tags involving persons display
-  specially; for example, the multipart tag ''1//2//3'' displays as
-  ''first-, second- and third-person'', with the word "person" occurring
-  only once.
-* A TWO-LEVEL MULTIPART TAG is a special type of multipart tag that
-  joins two or more tag sets instead of joining individual tags. The tags
-  within the tag set are joined by a colon, e.g. '1:s//3:p', which is
-  displayed as ''first-person singular and third-person plural'', e.g.
-  for use with the form [[μέλλον]] of the verb [[μέλλω]] "to intend",
-  which uses the tag set '1:s//3:p|impf|actv|indc|unaugmented' to express
-  the syncretism between the first singular and third plural forms of the
-  imperfect active indicative unaugmented conjugation. Two-level multipart
-  tags should be used sparingly; if in doubt, list out the inflections
-  separately.
-* A MULTIPART TAG SHORTCUT is a shortcut that expands into a multipart
-  tag, for example '123', which expands to the multipart tag '1//2//3'.
-  Only the most common such combinations exist as shortcuts.
-* A LIST TAG SHORTCUT is a special type of shortcut that expands to a list
-  of tags instead of a single tag. For example, the shortcut '1s' expands to
-  '1|s' (first-person singular). Only the most common such combinations
-  exist as shortcuts.
-]=]
+* A ''tag'' is a single grammatical item, as specified in a single numbered parameter of {{tl|inflection of}}. Examples
+  are `masculine`, `nominative`, or `first-person`. Tags may be abbreviated, e.g. `m` for `masculine`, `nom` for
+  `nominative`, or `1` for `first-person`. Such abbreviations are called ''aliases'', and some tags have multiple
+  equivalent aliases (e.g. `p` or `pl` for `plural`). The full, non-abbreviated form of a tag is called its
+  ''canonical form''.
+* The ''display form'' of a tag is the way it's displayed to the user. Usually the displayed text of the tag is the same
+  as its canonical form, and it normally functions as a link to a glossary entry explaining the tag. Usually the link is
+  to an entry in [[Appendix:Glossary]], but sometimes the tag is linked to an individual dictionary entry or to a
+  Wikipedia entry. Occasionally, the display text differs from the canonical form of the tag. An example is the tag
+  `comparative case`, which has the display text read as simply `comparative`. Normally, tags referring to cases don't
+  have the word "case" in them, but in this case the tag `comparative` was already used as an alias for the tag
+  `comparative degree`, so the tag was named `comparative case` to avoid clashing. A similar situation occurs with
+  `adverbial case` vs. the grammar tag `adverbial` (as in `adverbial participle`).
+* A ''tag set'' is an ordered list of tags, which together express a single inflection, for example, `1|s|pres|ind`,
+  which can be expanded to canonical-form tags as `first-person|singular|present|indicative`.
+* A ''conjoined tag set'' is a tag set that consists of multiple individual tag sets separated by a semicolon, e.g.
+  `1|s|pres|ind|;|2|s|imp`, which specifies two tag sets, `1|s|pres|ind` as above and `2|s|imp` (in canonical form,
+  `second-person|singular|imperative`). Multiple tag sets specified in a single call to {{tl|inflection of}} are
+  specified in this fashion. Conjoined tag sets can also occur in list-tag shortcuts.
+* A ''multipart tag'' is a tag that embeds multiple tags within it, such as `f//n` or `nom//acc//voc`. These are used in
+  the case of [[syncretism]], when the same form applies to multiple inflections. Examples are the Spanish present
+  subjunctive, where the first-person and third-person singular have the same form (e.g. {{m|es|siga}} from
+  {{m|es|seguir|t=to follow}}), or Latin third-declension adjectives, where the dative and ablative plural of all
+  genders have the same form (e.g. {{m|la|omnibus}} from {{m|la|omnis|t=all}}). These would be expressed respectively as
+  `1//3|s|pres|sub` and `dat//abl|m//f//n|p`, where the use of the multipart tag compactly encodes the syncretism and
+  avoids the need to individually list out all of the inflections. Multipart tags currently display as a list separated
+  by a slash, e.g.  ''dative/ablative'' or ''masculine/feminine/neuter'' where each individual word is linked
+  appropriately. As a special case, multipart tags involving persons display specially; for example, the multipart tag
+  `1//2//3` displays as ''first-, second- and third-person'', with the word "person" occurring only once.
+* A ''two-level multipart tag'' is a special type of multipart tag that joins two or more tag sets instead of joining
+  individual tags. The tags within the tag set are joined by a colon, e.g. `1:s//3:p`, which is displayed as
+  ''first-person singular and third-person plural'', e.g. for use with the form {{m|grc|μέλλον}} of the verb
+  {{m|grc|μέλλω|t=to intend}}, which uses the tag set `1:s//3:p|impf|actv|indc|unaugmented` to express the syncretism
+  between the first singular and third plural forms of the imperfect active indicative unaugmented conjugation.
+  Two-level multipart tags should be used sparingly; if in doubt, list out the inflections separately. [FIXME: Make
+  two-level multipart tags obsolete.]
+* A ''shortcut'' is a tag that expands to any type of tag described above, or to any type of tag set described above.
+  Aliases are a particular type of shortcut whose expansion is a single non-multipart tag.
+* A ''multipart shortcut'' is a shortcut that expands into a multipart tag, for example `123`, which expands to the
+  multipart tag `1//2//3`. Only the most common such combinations exist as shortcuts.
+* A ''list shortcut'' is a special type of shortcut that expands to a list of tags instead of a single tag. For example,
+  the shortcut `1s` expands to `1|s` (first-person singular). Only the most common such combinations exist as shortcuts.
+* A ''conjoined shortcut'' is a special type of list shortcut that consists of a conjoined tag set (multiple logical tag
+  sets). For example, the English language-specific shortcut `ed-form` expands to `spast|;|past|part`, expressing the
+  common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
+  shortcut that expands to `simple|past`).
+]==]
 -- version of rsubn() that discards all but the first return value
@@ Line 121: / Line 129: @@
+local function normalize_index(list, index)
+	if index < 0 then
+		return #list + index + 1
+	end
+	return index
+end
+-- FIXME, consider moving to [[Module:table]]
+-- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also treated as a set.
+local function is_subset(tags1, tags2)
+	tags1 = m_table.listToSet(tags1)
+	tags2 = m_table.listToSet(tags2)
+	for tag, _ in pairs(tags1) do
+		if not tags2[tag] then
+			return false
+		end
+	end
+	return true
+end
+-- FIXME, move to [[Module:table]]
+local function slice(list, i, j)
+	--checkType("slice", 1, list, "table")
+	--checkType("slice", 2, i, "number", true)
+	--checkType("slice", 3, j, "number", true)
+	if i == nil then
+		i = 1
+	else
+		i = normalize_index(list, i)
+	end
+	j = normalize_index(list, j or -1)
+	local retval = {}
+	local k = 0
+	for index = i, j do
+		k = k + 1
+		retval[k] = list[index]
+	end
+	return retval
+end
+local function wrap_in_span(text, classes)
+	return ("<span class='%s'>%s</span>"):format(classes, text)
+end
+--[==[
+Lowest-level implementation of form-of templates, including the general {{tl|form of}} as well as those that deal with
+inflection tags, such as the general {{tl|inflection of}}, semi-specific variants such as {{tl|participle of}}, and
+specific variants such as {{tl|past participle of}}. `data` contains all the information controlling the display, with
+the following fields:
+* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.
+* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed
+   directly to {full_link()} in [[Module:links]]. Each object should have at minimum a `.lang` field containing the
+   language of the lemma and a `.term` field containing the lemma itself. Each object is formatted using {full_link()}
+   and then if there are more than one, they are joined using {serialCommaJoin()} in [[Module:table]]. Alternatively,
+   `.lemmas` can be a string, which is displayed directly, or omitted, to show no lemma links and omit the connecting
+   text.
+* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.
+* `.enclitics`: List of enclitics to display after the lemmas, in parens.
+* `.base_lemmas`: List of base lemmas to display after the lemmas, in the case where the lemmas in `.lemmas` are
+   themselves forms of another lemma (the base lemma), e.g. a comparative, superlative or participle. Each object is of
+   the form { { paramobj = PARAM_OBJ, lemmas = {LEMMA_OBJ, LEMMA_OBJ, ...} }} where PARAM_OBJ describes the properties
+   of the base lemma parameter (i.e. the relationship between the intermediate and base lemmas) and LEMMA_OBJ is an
+   object suitable to be passed to {full_link()} in [[Module:links]]. PARAM_OBJ is of the format
+   { { param = "PARAM", tags = {"TAG", "TAG", ...} } where PARAM is the name of the parameter to {{tl|inflection of}}
+   etc. that holds the base lemma(s) of the specified relationship and the tags describe the relationship, such as
+   { {"comd"}} or { {"past", "part"}}.
+* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is {"form-of-definition use-with-mention"}
+   for the tag text and lemma links, and additionally {"form-of-definition-link"} specifically for the lemma links.
+   (FIXME: Should separate out the lemma links into their own field.)
+* `.posttext`: Additional text to display after the lemma links.
+]==]
 function export.format_form_of(data)
 	if type(data) ~= "table" then
-		error("First argument must now be a table of arguments")
+		error("Internal error: First argument must now be a table of arguments")
 	end
 	local text_classes = data.text_classes or "form-of-definition use-with-mention"
-	local terminfo_classes = data.text_classes or "form-of-definition-link"
+	local lemma_classes = data.text_classes or "form-of-definition-link"
 	local parts = {}
-	table.insert(parts, "<span class='" .. text_classes .. "'>")
+	local function ins(text)
-	table.insert(parts, data.text)
+		table.insert(parts, text)
-	if data.text ~= "" and data.terminfo then
+	end
-		table.insert(parts, " ")
+	ins("<span class='" .. text_classes .. "'>")
+	ins(data.text)
+	if data.text ~= "" and data.lemmas then
+		ins(" ")
 	end
-	if data.terminfo then
+	if data.lemmas then
-		table.insert(parts, "<span class='" .. terminfo_classes .. "' style='font-style:normal;'>")
+		if type(data.lemmas) == "string" then
-		if type(data.terminfo) == "string" then
+			ins(wrap_in_span(data.lemmas, lemma_classes))
-			table.insert(parts, data.terminfo)
 		else
-			table.insert(parts, m_links.full_link(data.terminfo, data.terminfo_face, false))
+			local formatted_terms = {}
+			for _, lemma in ipairs(data.lemmas) do
+				table.insert(formatted_terms, wrap_in_span(
+					m_links.full_link(lemma, data.lemma_face), lemma_classes
+				))
+			end
+			ins(m_table.serialCommaJoin(formatted_terms))
+		end
+	end
+	if data.enclitics and #data.enclitics > 0 then
+		-- The outer parens need to be outside of the text_classes span so they show in upright instead of italic, or
+		-- they will clash with upright parens generated by link annotations such as transliterations and pos=.
+		ins("</span>")
+		local formatted_terms = {}
+		for _, enclitic in ipairs(data.enclitics) do
+			-- FIXME, should we have separate clitic face and/or classes?
+			table.insert(formatted_terms, wrap_in_span(
+				m_links.full_link(enclitic, data.lemma_face, nil, "show qualifiers"), lemma_classes
+			))
 		end
-		table.insert(parts, "</span>")
+		ins(" (")
+		ins(wrap_in_span("with enclitic" .. (#data.enclitics > 1 and "s" or "") .. " ", text_classes))
+		ins(m_table.serialCommaJoin(formatted_terms))
+		ins(")")
+		ins("<span class='" .. text_classes .. "'>")
 	end
+	if data.base_lemmas and #data.base_lemmas > 0 then
+		for _, base_lemma in ipairs(data.base_lemmas) do
+			ins(", the </span>")
+			ins(export.tagged_inflections {
+				lang = base_lemma.lemmas[1].lang,
+				tags = base_lemma.paramobj.tags,
+				lemmas = base_lemma.lemmas,
+				lemma_face = data.lemma_face,
+				no_format_categories = true,
+				nocat = true,
+				text_classes = data.text_classes,
+			})
+			ins("<span class='" .. text_classes .. "'>")
+		end
+	end
+	-- FIXME, should posttext go before enclitics? If so we need to have separate handling for the
+	-- final colon when there are multiple tag sets in tagged_inflections().
 	if data.posttext then
-		table.insert(parts, data.posttext)
+		ins(data.posttext)
 	end
-	table.insert(parts, "</span>")
+	ins("</span>")
 	return table.concat(parts)
 end
-local function is_link_or_html(tag)
+--[==[
-	return tag:find("[[", nil, true) or tag:find("|", nil, true) or
+Return true if `tag` contains an internal link or HTML.
-		tag:find("<", nil, true)
+]==]
+function export.is_link_or_html(tag)
+	return tag:find("[[", nil, true) or tag:find("|", nil, true) or tag:find("<", nil, true)
 end
--- Look up a tag (either a shortcut of any sort of a canonical long-form tag)
+--[==[
--- and return its expansion. The expansion will be a string unless the
+Look up a tag (either a shortcut of any sort of a canonical long-form tag) and return its expansion. The expansion
--- shortcut is a list-tag shortcut such as "1s"; in that case, the expansion
+will be a string unless the shortcut is a list-tag shortcut such as `1s`; in that case, the expansion will be a
--- will be a list. The caller must handle both cases. Only one level of
+list. The caller must handle both cases. Only one level of expansion happens; hence, `acc` expands to {"accusative"},
--- expansion happens; hence, "acc" expands to "accusative", "1s" expands to
+`1s` expands to { {"1", "s"}} (not to { {"first", "singular"}}) and `123` expands to {"1//2//3"}. The expansion will be
--- {"1", "s"} (not to {"first", "singular"}) and "123" expands to "1//2//3".
+the same as the passed-in tag in the following circumstances:
--- The expansion will be the same as the passed-in tag in the following
--- circumstances:
+# The tag is `;` (this is special-cased, and no lookup is done).
---
+# The tag is a multipart tag such as `nom//acc` (this is special-cased, and no lookup is done).
--- 1. The tag is ";" (this is special-cased, and no lookup is done).
+# The tag contains a raw link (this is special-cased, and no lookup is done).
--- 2. The tag is a multipart tag such as "nom//acc" (this is special-cased,
+# The tag contains HTML (this is special-cased, and no lookup is done).
---    and no lookup is done).
+# The tag is already a canonical long-form tag.
--- 3. The tag contains a raw link (this is special-cased, and no lookup is
+# The tag is unrecognized.
---    done).
--- 4. The tag contains HTML (this is special-cased, and no lookup is done).
+This function first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
--- 5. The tag is already a canonical long-form tag.
+[[Module:form of/data]] (which includes more common non-lang-specific tags) and finally (only if the tag is not
--- 6. The tag is unrecognized.
+recognized as a shortcut or canonical tag, and is not of types 1-4 above) in [[Module:form of/data2]].
---
--- This function first looks up in [[Module:form of/data]] (which includes
+If the expansion is a string and is different from the tag, track it if `do_track` is true.
--- more common tags) and then (only if the tag is not recognized as a
+]==]
--- shortcut or canonical tag, and is not of types 1-4 above) in
+function export.lookup_shortcut(tag, lang, do_track)
--- [[Module:form of/data2]].
---
--- If the expansion is a string and is different from the tag, track it if
--- DO_TRACK is true.
-function export.lookup_shortcut(tag)
 	-- If there is HTML or a link in the tag, return it directly; don't try
 	-- to look it up, which will fail.
-	if tag == ";" or tag:find("//", nil, true) or is_link_or_html(tag) then
+	if tag == ";" or tag:find("//", nil, true) or export.is_link_or_html(tag) then
 		return tag
 	end
-	local m_data = mw.loadData("Module:form of/data")
+	local expansion
-	-- If this is a canonical long-form tag, just return it, and don't
+	local langcode = lang and lang:getCode()
-	-- check for shortcuts (which will cause [[Module:form of/data2]] to be
+	if langcode and export.langs_with_lang_specific_tags[langcode] then
-	-- loaded).
+		local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)
-	if m_data.tags[tag] then
+		-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
-		return tag
+		-- optimization; see below.
+		if langdata.tags[tag] then
+			return tag
+		end
+		expansion = langdata.shortcuts[tag]
+	end
+	if not expansion and lang then
+		-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
+		local full_langcode = lang:getFullCode()
+		if full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then
+			local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)
+			-- If this is a canonical long-form tag, just return it, and don't check for shortcuts. This is an
+			-- optimization; see below.
+			if langdata.tags[tag] then
+				return tag
+			end
+			expansion = langdata.shortcuts[tag]
+		end
+	end
+	if not expansion then
+		local m_data = mw.loadData(export.form_of_data_module)
+		-- If this is a canonical long-form tag, just return it, and don't check for shortcuts (which will cause
+		-- [[Module:form of/data2]] to be loaded, because there won't be a shortcut entry in [[Module:form of/data]] --
+		-- or, for that matter, in [[Module:form of/data2]]). This is an optimization; the code will still work without
+		-- it, but will use up more memory.
+		if m_data.tags[tag] then
+			return tag
+		end
+		expansion = m_data.shortcuts[tag]
 	end
-	local expansion = m_data.shortcuts[tag]
 	if not expansion then
-		local m_data2 = mw.loadData("Module:form of/data2")
+		local m_data2 = mw.loadData(export.form_of_data2_module)
 		expansion = m_data2.shortcuts[tag]
 	end
@@ Line 203: / Line 351: @@
 		return tag
 	end
 	return expansion
 end
--- Look up a normalized/canonicalized tag and return the data object
+--[==[
--- associated with it. If the tag isn't found, return nil. This first looks up
+Look up a normalized/canonicalized tag and return the data object associated with it. If the tag isn't found, return
--- in [[Module:form of/data]] (which includes more common tags) and then in
+nil. This first looks up in the lang-specific data module [[Module:form of/lang-data/LANGCODE]], then in
--- [[Module:form of/data2]].
+[[Module:form of/data]] (which includes more common non-lang-specific tags) and then finally in
-function export.lookup_tag(tag)
+[[Module:form of/data2]].
-	local m_data = mw.loadData("Module:form of/data")
+]==]
+function export.lookup_tag(tag, lang)
+	local langcode = lang and lang:getCode()
+	if langcode and export.langs_with_lang_specific_tags[langcode] then
+		local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. langcode)
+		if langdata.tags[tag] then
+			return langdata.tags[tag]
+		end
+	end
+	local full_langcode = lang and lang:getFullCode()
+	if full_langcode and full_langcode ~= langcode and export.langs_with_lang_specific_tags[full_langcode] then
+		-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
+		local langdata = mw.loadData(export.form_of_lang_data_module_prefix .. full_langcode)
+		if langdata.tags[tag] then
+			return langdata.tags[tag]
+		end
+	end
+	local m_data = mw.loadData(export.form_of_data_module)
 	local tagobj = m_data.tags[tag]
 	if tagobj then
 		return tagobj
 	end
-	local m_data2 = mw.loadData("Module:form of/data2")
+	local m_data2 = mw.loadData(export.form_of_data2_module)
 	local tagobj2 = m_data2.tags[tag]
 	if tagobj2 then
@@ Line 226: / Line 392: @@
--- Normalize a single tag, which may be a shortcut but should not be a
+-- Normalize a single tag, which may be a shortcut but should not be a multipart tag, a multipart shortcut or a list
--- multipart tag, a multipart-tag shortcut or a list-tag shortcut.
+-- shortcut.
-local function normalize_single_tag(tag)
+local function normalize_single_tag(tag, lang, do_track)
-	local expansion = export.lookup_shortcut(tag)
+	local expansion = export.lookup_shortcut(tag, lang, do_track)
 	if type(expansion) ~= "string" then
-		error("Tag '" .. tag .. "' is a list-tag shortcut, which is not allowed here")
+		error("Tag '" .. tag .. "' is a list shortcut, which is not allowed here")
 	end
 	tag = expansion
 	return tag
 end
--- Normalize a component of a multipart tag. This should not have any // in it,
+--[=[
--- but may join multiple individual tags with a colon, and may be a single
+Normalize a component of a multipart tag. This should not have any // in it, but may join multiple individual tags with
--- list-tag shortcut, which is treates as if colon-separated. If
+a colon, and may be a single list-tag shortcut, which is treated as if colon-separated. The return value may be a list
--- RECOMBINE_TAGS isn't given, the return value may be a list of tags;
+of tags.
--- otherwise, it will always be a string, and multiple tags will be
+]=]
--- represented as canonical-form tags joined by ":".
+local function normalize_multipart_component(tag, lang, do_track)
-local function normalize_multipart_component(tag, recombine_tags)
+	-- If there is HTML or a link in the tag, don't try to split on colon. A colon may legitimately occur in either one,
-	-- If there is HTML or a link in the tag, don't try to split on colon.
+	-- and we don't want these things parsed. Note that we don't do this check before splitting on //, which we don't
-	-- A colon may legitimately occur in either one, and we don't want
+	-- expect to occur in links or HTML; see comment in normalize_tag().
-	-- these things parsed. Note that we don't do this check before splitting
+	if export.is_link_or_html(tag) then
-	-- on //, which we don't expect to occur in links or HTML; see comment
-	-- in normalize_tag().
-	if is_link_or_html(tag) then
 		return tag
 	end
@@ Line 258: / Line 420: @@
 		-- We allow list-tag shortcuts inside of multipart tags, e.g.
 		-- '1s//3p'. Check for this now.
-		tag = export.lookup_shortcut(tag)
+		tag = export.lookup_shortcut(tag, lang, do_track)
 		if type(tag) == "table" then
 			-- We found a list-tag shortcut; treat as if colon-separated.
 			components = tag
 		else
-			return normalize_single_tag(tag)
+			return normalize_single_tag(tag, lang, do_track)
 		end
 	end
 	local normtags = {}
 	for _, component in ipairs(components) do
-		table.insert(normtags, normalize_single_tag(component))
+		table.insert(normtags, normalize_single_tag(component, lang, do_track))
 	end
-	if recombine_tags then
+	return normtags
-		return table.concat(normtags, ":")
-	else
-		return normtags
-	end
 end
--- Normalize a single tag. If RECOMBINE_TAGS isn't given, the return value
+--[=[
--- may be a list (in the case of multipart tags), which will contain nested
+Normalize a single tag. The return value may be a list (in the case of multipart tags), which will contain nested lists
--- lists in the case of two-level multipart tags; otherwise, it will always
+in the case of two-level multipart tags.
--- be a string, and multipart tags will be represented as canonical-form tags
+]=]
--- joined by "//" and/or ":".
+local function normalize_tag(tag, lang, do_track)
-local function normalize_tag(tag, recombine_multitags)
+	-- We don't check for links or HTML before splitting on //, which we don't expect to occur in links or HTML. Doing
-	-- We don't check for links or HTML before splitting on //, which we
+	-- it this way allows for a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly (the template calls
-	-- don't expect to occur in links or HTML. Doing it this way allows for
+	-- will be expanded before we process the tag, and will contain links and HTML). The only check we do is for a URL,
-	-- a tag like '{{lb|grc|Epic}}//{{lb|grc|Ionic}}' to function correctly
+	-- which shouldn't normally occur, but might if the user tries to put an external link into the tag. URL's with //
-	-- (the template calls will be expanded before we process the tag, and
+	-- normally have the sequence ://, which should never normally occur when // and : are used in their normal ways.
-	-- will contain links and HTML). The only check we do is for a URL,
-	-- which shouldn't normally occur, but might if the user tries to put
-	-- an external link into the tag. URL's with // normally have the
-	-- sequence ://, which should never normally occur when // and : are
-	-- used in their normal ways.
 	if tag:find("://", nil, true) then
 		return tag
@@ Line 299: / Line 452: @@
 	local split_tags = rsplit(tag, "//", true)
 	if #split_tags == 1 then
-		local retval = normalize_multipart_component(tag, recombine_multitags)
+		local retval = normalize_multipart_component(tag, lang, do_track)
 		if type(retval) == "table" then
-			-- The user gave a tag like '1:s', i.e. with colon but without
+			-- The user gave a tag like '1:s', i.e. with colon but without //. Allow this, but we need to return a
-			-- //. Allow this, but we need to return a nested list. Note,
+			-- nested list.
-			-- this will never happen when RECOMBINE_TAGS is given.
 			return {retval}
 		end
@@ Line 310: / Line 462: @@
 	local normtags = {}
 	for _, single_tag in ipairs(split_tags) do
-		table.insert(normtags, normalize_multipart_component(single_tag,
+		table.insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
-			recombine_multitags))
-	end
-	if recombine_multitags then
-		return table.concat(normtags, "//")
-	else
-		return normtags
 	end
+	return normtags
 end
--- Normalize a tag set (a list of tags) into a list of canonical-form tags
+--[==[
--- (which -- may be larger due to the possibility of list-tag shortcuts).
+Normalize a tag set (a list of tags) into its canonical-form tags. The return value is a list of normalized tag sets
--- If RECOMBINE_TAGS isn't given, the return list may itself contains lists;
+(a list because of there may be conjoined shortcuts among the input tags). A normalized tag set is a list of tag
--- in particular, multipart tags will be represented as lists. Specifically,
+elements, where each element is either a string (the canonical form of a tag), a list of such strings (in the case of
--- the list will consist of the elements of the multipart tag, which will
+multipart tags) or a list of lists of such strings (in the case of two-level multipart tags). For example, the multipart
--- either be canonical-form strings or (in the case of two-level multipart
+tag `nom//acc//voc` will be represented in canonical form as { {"nominative", "accusative", "vocative"}}, and the
--- tags) nested lists of canonical-form strings. For example, the multipart
+two-level multipart tag `1:s//3:p` will be represented as { {{"first-person", "singular"}, {"third-person", "plural"}}}.
--- tag ''nom//acc//voc'' will expand to
---   {"nominative", "accusative", "vocative"}
+Example 1:
--- and the two-level multipart tag ''1:s//3:p'' will expand to
---   {{"first-person", "singular"}, {"third-person", "plural"}}.
+{normalize_tag_set({"nom//acc//voc", "n", "p"})} = { {{{"nominative", "accusative", "vocative"}, "masculine", "plural"}}}
--- If RECOMBINE_TAGS is given, multipart tags will be represented in string
--- form, i.e. as canonical-form tags joined by "//" and/or ":".
+Example 2:
-function export.normalize_tags(tags, recombine_multitags)
-	local ntags = {}
+{normalize_tag_set({"ed-form"}, ENGLISH)} = { {{"simple", "past"}, {"past", "participle"}}}
-	for _, tag in ipairs(tags) do
-		-- Expand the tag, which may generate a new tag (either a
+Example 3:
-		-- fully canonicalized tag, a multipart tag, or a list of tags).
-		tag = export.lookup_shortcut(tag)
+{normalize_tag_set({"archaic", "ed-form"}, ENGLISH)} = { {{"archaic", "simple", "past"}, {"archaic", "past", "participle"}}}
+]==]
+function export.normalize_tag_set(tag_set, lang, do_track)
+	local output_tag_set = {}
+	local saw_semicolon = false
+	for _, tag in ipairs(tag_set) do
+		-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
+		-- of tags).
+		tag = export.lookup_shortcut(tag, lang, do_track)
 		if type(tag) == "table" then
+			saw_semicolon = m_table.contains(tag, ";")
+			if saw_semicolon then
+				-- If we saw a conjoined shortcut, we need to use a more general algorithm that can expand a single
+				-- tag set into multiple.
+				break
+			end
 			for _, t in ipairs(tag) do
-				table.insert(ntags, normalize_tag(t, recombine_multitags))
+				table.insert(output_tag_set, normalize_tag(t, lang, do_track))
+			end
+		else
+			table.insert(output_tag_set, normalize_tag(tag, lang, do_track))
+		end
+	end
+	if not saw_semicolon then
+		return {output_tag_set}
+	end
+	-- Use a more general algorithm that handles conjoined shortcuts.
+	local output_tag_set = {}
+	for i, tag in ipairs(tag_set) do
+		-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
+		-- of tags).
+		tag = export.lookup_shortcut(tag, lang, do_track)
+		if type(tag) == "table" then
+			local output_tag_sets = {}
+			local shortcut_tag_sets = export.split_tag_set(tag)
+			local normalized_shortcut_tag_sets = {}
+			for _, shortcut_tag_set in ipairs(shortcut_tag_sets) do
+				m_table.extendList(normalized_shortcut_tag_sets,
+					export.normalize_tag_set(shortcut_tag_set, lang, do_track))
 			end
+			local after_tags = slice(tag_set, i + 1)
+			local normalized_after_tags_sets = export.normalize_tag_set(after_tags, lang, do_track)
+			for _, normalized_shortcut_tag_set in ipairs(normalized_shortcut_tag_sets) do
+				for _, normalized_after_tags_set in ipairs(normalized_after_tags_sets) do
+					table.insert(output_tag_sets, m_table.append(output_tag_set, normalized_shortcut_tag_set,
+						normalized_after_tags_set))
+				end
+			end
+			return output_tag_sets
 		else
-			table.insert(ntags, normalize_tag(tag, recombine_multitags))
+			table.insert(output_tag_set, normalize_tag(tag, lang, do_track))
 		end
 	end
-	return ntags
+	error("Internal error: Should not get here")
 end
--- Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
+function export.combine_multipart_tags(tag_set)
--- Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
+	for i, tag in ipairs(tag_set) do
--- will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
+		if type(tag) == "table" then
--- and two-level multipart tags are represented as lists of lists, as is output by normalize_tags().
+			for j, subtag in ipairs(tag) do
--- NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
+				if type(subtag) == "table" then
--- <code>def:s//p</code> (or the reverse, <code>s//def:p</code>).
+					tag[j] = table.concat(subtag, ":")
+				end
+			end
+			tag_set[i] = table.concat(tag, "//")
+		end
+	end
+	return tag_set
+end
+function export.normalize_tags(tags, lang, recombine_multitags, do_track)
+	local tag_sets = export.normalize_tag_set(tags, lang, do_track)
+	if recombine_multitags then
+		for i, tag_set in ipairs(tag_sets) do
+			tag_sets[i] = export.combine_multipart_tags(tag_set)
+		end
+		return export.combine_tag_sets(tag_sets)
+	end
+	return tag_sets
+end
+--[==[
+Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
+Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
+will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
+and two-level multipart tags are represented as lists of lists, as is output by {normalize_tag_set()}.
+NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
+`def:s//p` (or the reverse, `s//def:p`).
+]==]
 function export.split_two_level_multipart_tag_set(tag_set)
-	-- This would be a whole lot easier in Python, with built-in support for
-	-- slicing and array concatenation.
 	for i, tag in ipairs(tag_set) do
 		if type(tag) == "table" then
@@ Line 374: / Line 599: @@
 				-- We found a two-level multipart tag.
 				-- (1) Extract the preceding tags.
-				local pre_tags = {}
+				local pre_tags = slice(tag_set, 1, i - 1)
-				for j=1,i-1 do
-					table.insert(pre_tags, tag_set[j])
-				end
 				-- (2) Extract the following tags.
-				local post_tags = {}
+				local post_tags = slice(tag_set, i + 1)
-				for j=i+1,#tag_set do
-					table.insert(post_tags, tag_set[j])
-				end
 				-- (3) Loop over each tag set alternant in the two-level multipart tag.
 				-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
@@ Line 389: / Line 608: @@
 				for _, first_level_tag_set in ipairs(tag) do
 					local expanded_tag_set = {}
-					for _, pre_tag in ipairs(pre_tags) do
+					m_table.extendList(expanded_tag_set, pre_tags)
-						table.insert(expanded_tag_set, pre_tag)
-					end
 					-- The second level may have a string or a list.
 					if type(first_level_tag_set) == "table" then
-						for _, second_level_tag in ipairs(first_level_tag_set) do
+						m_table.extendList(expanded_tag_set, first_level_tag_set)
-							table.insert(expanded_tag_set, second_level_tag)
-						end
 					else
 						table.insert(expanded_tag_set, first_level_tag_set)
 					end
-					for _, post_tag in ipairs(post_tags) do
+					m_table.extendList(expanded_tag_set, post_tags)
-						table.insert(expanded_tag_set, post_tag)
+					m_table.extendList(resulting_tag_sets, export.split_two_level_multipart_tag_set(expanded_tag_set))
-					end
-					for _, split_tag_set in ipairs(export.split_two_level_multipart_tag_set(expanded_tag_set)) do
-						table.insert(resulting_tag_sets, split_tag_set)
-					end
 				end
 				return resulting_tag_sets
@@ Line 416: / Line 627: @@
--- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
+--[==[
-function export.split_tags_into_tag_sets(tags)
+Split a tag set that may consist of multiple semicolon-separated tag sets into the component tag sets.
-	local tag_set_group = {}
+]==]
+function export.split_tag_set(tag_set)
+	local split_tag_sets = {}
 	local cur_tag_set = {}
-	for _, tag in ipairs(tags) do
+	for _, tag in ipairs(tag_set) do
 		if tag == ";" then
 			if #cur_tag_set > 0 then
-				table.insert(tag_set_group, cur_tag_set)
+				table.insert(split_tag_sets, cur_tag_set)
 			end
 			cur_tag_set = {}
@@ Line 431: / Line 644: @@
 	end
 	if #cur_tag_set > 0 then
-		table.insert(tag_set_group, cur_tag_set)
+		table.insert(split_tag_sets, cur_tag_set)
 	end
-	return tag_set_group
+	return split_tag_sets
 end
+export.split_tags_into_tag_sets = export.split_tag_set
--- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
+--[==[
--- Then, potentially split each tag set into multiple tag sets if there are any two-level multipart
+Combine multiple tag sets in a tag set group into a simple tag set, with logical tag sets separated by semicolons.
--- tags in those tag sets.
+This is the opposite of {split_tag_set()}.
-function export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(tags)
+]==]
-	-- First, split into tag sets.
+function export.combine_tag_sets(tag_sets)
-	local tag_sets = export.split_tags_into_tag_sets(tags)
+	if #tag_sets == 1 then
-	-- Now split any two-level multipart tags.
+		return tag_sets[1]
-	local resulting_tag_sets = {}
+	end
+	local combined_tag_set = {}
 	for _, tag_set in ipairs(tag_sets) do
-		for _, resulting_tag_set in ipairs(export.split_two_level_multipart_tag_set(tag_set)) do
+		if #combined_tag_set > 0 then
-			table.insert(resulting_tag_sets, resulting_tag_set)
+			table.insert(combined_tag_set, ";")
 		end
+		m_table.extendList(combined_tag_set, tag_set)
+	end
+	return tags
+end
+local tag_set_param_mods = {
+	lb = {
+		item_dest = "labels",
+		convert = function(arg, parse_err)
+			return rsplit(arg, "//", true)
+		end,
+	}
+}
+--[==[
+Parse tag set properties from a tag set (list of tags). Currently no per-tag properties are recognized, and the only
+per-tag-set property recognized is `<lb:...>` for specifing label(s) for the tag set. Per-tag-set properties must be
+attached to the last tag.
+]==]
+function export.parse_tag_set_properties(tag_set)
+	local function generate_tag_set_obj(last_tag)
+		tag_set[#tag_set] = last_tag
+		return {tags = tag_set}
+	end
+	local last_tag = tag_set[#tag_set]
+	-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or
+	-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of
+	-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <lb:...>, so if
+	-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
+	-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
+	if last_tag:find("<") and not last_tag:find("^[^<]*<[a-z]*[^a-z:]") then
+		return require(parse_utilities_module).parse_inline_modifiers(last_tag, {
+			param_mods = tag_set_param_mods,
+			generate_obj = generate_tag_set_obj,
+		})
+	else
+		return generate_tag_set_obj(last_tag)
 	end
-	return resulting_tag_sets
 end
 function export.normalize_pos(pos)
-	return m_pos[pos] or pos
+	if not pos then
+		return nil
+	end
+	return mw.loadData(export.form_of_pos_module)[pos] or pos
 end
@@ Line 462: / Line 719: @@
 -- passed in must be a string (i.e. it cannot be a list describing a
 -- multipart tag). To handle multipart tags, use get_tag_display_form().
-local function get_single_tag_display_form(normtag)
+local function get_single_tag_display_form(normtag, lang)
-	local data = export.lookup_tag(normtag)
+	local data = export.lookup_tag(normtag, lang)
+	local display = normtag
 	-- If the tag has a special display form, use it
 	if data and data.display then
-		normtag = data.display
+		display = data.display
 	end
 	-- If there is a nonempty glossary index, then show a link to it
-	if data and data.glossary then
+	local glossary = data and data[export.GLOSSARY]
-		if data.glossary_type == "wikt" then
+	if glossary ~= nil then
-			normtag = "[[" .. data.glossary .. "|" .. normtag .. "]]"
+		if glossary == export.WIKT then
-		elseif data.glossary_type == "wp" then
+			display = "[[" .. normtag .. "|" .. display .. "]]"
-			normtag = "[[w:" .. data.glossary .. "|" .. normtag .. "]]"
+		elseif glossary == export.WP then
+			display = "[[w:" .. normtag .. "|" .. display .. "]]"
+		elseif glossary == export.APPENDIX then
+			display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(normtag) .. "|" .. display .. "]]"
+		elseif type(glossary) ~= "string" then
+			error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
+				type(glossary), mw.dumpObject(glossary), normtag))
 		else
-			normtag = "[[wikt:Appendix:Glossary#" .. mw.uri.anchorEncode(data.glossary) .. "|" .. normtag .. "]]"
+			local link = rmatch(glossary, "^wikt:(.*)")
+			if link then
+				display = "[[" .. link .. "|" .. display .. "]]"
+			end
+			if not link then
+				link = rmatch(glossary, "^w:(.*)")
+				if link then
+					display = "[[w:" .. link .. "|" .. display .. "]]"
+				end
+			end
+			if not link then
+				display = "[[Appendix:Glossary#" .. mw.uri.anchorEncode(glossary) .. "|" .. display .. "]]"
+			end
 		end
 	end
-	return normtag
+	return display
 end
--- Turn a canonicalized tag spec (which describes a single, possibly
+--[==[
--- multipart tag) into the displayed form. The tag spec may be a string
+Turn a canonicalized tag spec (which describes a single, possibly multipart tag) into the displayed form. The tag spec
--- (a canonical-form tag), or a list of canonical-form tags (in the
+may be a string (a canonical-form tag); a list of canonical-form tags (in the case of a simple multipart tag); or a
--- case of a simple multipart tag), or a list of mixed canonical-form
+list of mixed canonical-form tags and lists of such tags (in the case of a two-level multipart tag). `joiner` indicates
--- tags and lists of such tags (in the case of a two-level multipart tag).
+how to join the parts of a multipart tag, and can be either {"and"} ("foo and bar", or "foo, bar and baz" for 3 or
--- JOINER indicates how to join the parts of a multipart tag, and can
+more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
--- be either "and" ("foo and bar", or "foo, bar and baz" for 3 or more),
+{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
--- "slash" ("foo/bar"), "en-dash" ("foo–bar") or nil, which uses the
+unlikely to change.)
--- global default found in multipart_join_strategy() in
+]==]
--- [[Module:form of/functions]].
+function export.get_tag_display_form(tagspec, lang, joiner)
-function export.get_tag_display_form(tagspec, joiner)
 	if type(tagspec) == "string" then
-		return get_single_tag_display_form(tagspec)
+		return get_single_tag_display_form(tagspec, lang)
 	end
-	-- We have a multipart tag. See if there's a display handler to
+	-- We have a multipart tag. See if there's a display handler to display them specially.
-	-- display them specially.
+	for _, handler in ipairs(require(export.form_of_functions_module).display_handlers) do
-	for _, handler in ipairs(m_functions.display_handlers) do
 		local displayval = handler(tagspec, joiner)
 		if displayval then
@@ Line 510: / Line 784: @@
 	for _, first_level_tag in ipairs(tagspec) do
 		if type(first_level_tag) == "string" then
-			table.insert(displayed_tags, get_single_tag_display_form(first_level_tag))
+			table.insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang))
 		else
-			-- A first-level element of a two-level multipart tag.
+			-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
-			-- Currently we just separate the individual components
+			-- with spaces, but other ways are possible, e.g. using an underscore, colon, parens or braces.
-			-- with spaces, but other ways are possible, e.g. using
-			-- an underscore, colon, parens or braces.
 			local components = {}
 			for _, component in ipairs(first_level_tag) do
-				table.insert(components, get_single_tag_display_form(component))
+				table.insert(components, get_single_tag_display_form(component, lang))
 			end
 			table.insert(displayed_tags, table.concat(components, " "))
 		end
 	end
-	return m_functions.join_multiparts(displayed_tags, joiner)
+	return require(export.form_of_functions_module).join_multiparts(displayed_tags, joiner)
 end
--- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also
+--[==[
--- treated as a set.
+Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
-local function is_subset(tags1, tags2)
+represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
-	tags1 = m_table.listToSet(tags1)
+{get_tag_display_form()} for the meaning of `joiner`.
-	tags2 = m_table.listToSet(tags2)
+]==]
-	for tag, _ in pairs(tags1) do
+function export.get_tag_set_display_form(normalized_tag_set, lang, joiner)
-		if not tags2[tag] then
+	local parts = {}
-			return false
+	for _, tagspec in ipairs(normalized_tag_set) do
+		local to_insert = export.get_tag_display_form(tagspec, lang, joiner)
+		-- Maybe insert a space before inserting the display form of the tag. We insert a space if
+		-- (a) we're not the first tag; and
+		-- (b) the tag we're about to insert doesn't have the "no_space_on_left" property; and
+		-- (c) the preceding tag doesn't have the "no_space_on_right" property.
+		-- NOTE: We depend here on the fact that
+		-- (1) all tags with either of the above properties set have the same display form as canonical form, and
+		-- (2) all tags with either of the above properties set are single-character tags.
+		-- The second property is an optimization to avoid looking up display forms resulting from multipart tags,
+		-- which won't be found and which will trigger loading of [[Module:form of/data2]]. If multichar punctuation is
+		-- added in the future, it's ok to change the == 1 below to <= 2 or <= 3.
+		--
+		-- If the first property above fails to hold in the future, we need to track the canonical form of each tag
+		-- (including the previous one) as well as the display form. This would also avoid the need for the == 1 check.
+		if #parts > 0 then
+			local most_recent_tagobj = ulen(parts[#parts]) == 1 and export.lookup_tag(parts[#parts], lang)
+			local to_insert_tagobj = ulen(to_insert) == 1 and export.lookup_tag(to_insert, lang)
+			if (
+				(not most_recent_tagobj or not most_recent_tagobj.no_space_on_right) and
+				(not to_insert_tagobj or not to_insert_tagobj.no_space_on_left)
+			) then
+				table.insert(parts, " ")
+			end
 		end
+		table.insert(parts, to_insert)
 	end
-	return true
+	return table.concat(parts)
 end
--- Compute and return the appropriate categories for the tags in `tags` (user-specified tags,
+--[==[
--- which may consist of multiple tag sets separated by semicolons) and the language in `lang`.
+Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
--- This checks both language-specific and language-agnostic category specs in [[Module:form of/cats]].
+represented as lists, and two-level multipart tags as lists of lists), fetch the associated categories and labels.
--- `POS` is the user-specified part of speech, if any, and `terminfo` is currently unused.
+Return two values, a list of categories and a list of labels. `lang` is the language of term represented by the tag set,
-function export.fetch_lang_categories(lang, tags, terminfo, POS)
+and `POS` is the user-provided part of speech (which may be {nil}).
-	local m_cats = mw.loadData("Module:form of/cats")
+]==]
+function export.fetch_categories_and_labels(normalized_tag_set, lang, POS, pagename, lemmas)
+	local m_cats = mw.loadData(export.form_of_cats_module)
 	local categories = {}
+	local labels = {}
-	local normalized_tags = export.normalize_tags(tags)
-	local split_tag_sets = export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(normalized_tags)
 	POS = export.normalize_pos(POS)
+	-- First split any two-level multipart tags into multiple sets, to make our life easier.
-	-- Loop over each tag set and compute categories for each one.
+	for _, tag_set in ipairs(export.split_two_level_multipart_tag_set(normalized_tag_set)) do
-	for _, tag_set in ipairs(split_tag_sets) do
+		-- Call a named function, either from the lang-specific data in
-		local function make_function_table()
+		-- [[Module:form of/lang-specific/LANGCODE/functions]] or in [[Module:form of/functions]].
-			return {
+		local function call_named_function(name, funtype)
-				lang=lang,
+			local data = {
-				tags=normalized_tags,
+				pagename = pagename or mw.title.getCurrentTitle().subpageText,
-				term=term,
+				lemmas = lemmas,
-				p=POS
+				tag_set = normalized_tag_set,
+				lang = lang,
+				POS = POS
 			}
+			local modules_tried = {}
+			local function try_lang_specific_module(langcode)
+				if export.langs_with_lang_specific_tags[langcode] then
+					local lang_specific_module = export.form_of_lang_data_module_prefix .. langcode .. "/functions"
+					local langdata = require(utilities_module).safe_require(lang_specific_module)
+					if langdata then
+						table.insert(modules_tried, lang_specific_module)
+						if langdata.cat_functions then
+							local fn = langdata.cat_functions[name]
+							if fn then
+								return fn(data), true
+							end
+						end
+					end
+				end
+				return nil, false
+			end
+			-- First try lang-specific.
+			local langcode = lang and lang:getCode()
+			if langcode then
+				local retval, found_it = try_lang_specific_module(langcode)
+				if found_it then
+					return retval
+				end
+			end
+			-- If the lang we're dealing with is an etym-only lang, try again with the corresponding full language.
+			local full_langcode = lang and lang:getFullCode()
+			if full_langcode and full_langcode ~= langcode then
+				local retval, found_it = try_lang_specific_module(full_langcode)
+				if found_it then
+					return retval
+				end
+			end
+			-- Try lang-independent.
+			table.insert(modules_tried, export.form_of_functions_module)
+			local fn = require(export.form_of_functions_module).cat_functions[name]
+			if fn then
+				return fn(data)
+			end
+			for i, modname in ipairs(modules_tried) do
+				modules_tried[i] = "[[" .. modname .. "]]"
+			end
+			error(("No %s function named '%s' in %s"):format(funtype, name, lang_specific_part,
+				m_table.serialCommaJoin(modules_tried, {conj = "or", dontTag = true})))
 		end
@@ Line 599: / Line 943: @@
 		-- complex when multipart tags are present.
 		local function tag_set_matches_spec_tag(spec_tag)
-			spec_tag = normalize_tag(spec_tag)
+			spec_tag = normalize_tag(spec_tag, lang)
 			for _, tag_set_tag in ipairs(tag_set) do
 				if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
@@ Line 635: / Line 979: @@
 				return false, 3
 			elseif predicate == "tags=" then
-				local normalized_spec_tags = export.normalize_tags(spec[2])
+				local normalized_spec_tag_sets = export.normalize_tag_set(spec[2], lang)
-				-- Allow tags to be in different orders, and multipart tags to
+				if #normalized_spec_tag_sets > 1 then
-				-- be in different orders. To handle this, we first check that
+					error("Internal error: No support for conjoined shortcuts in category/label specs in "
-				-- both tag set tags and spec tags have the same length. If so,
+						.. "[[Module:form of/cats]] when processing spec tag set " .. table.concat(spec[2], "|"))
-				-- we sort the multipart tags in the tag set tags and spec tags,
+				end
-				-- and then check that all tags in the spec tags are in the
+				local normalized_spec_tag_set = normalized_spec_tag_sets[1]
-				-- tag set tags.
+				-- Check for and disallow two-level multipart tags in the specs. FIXME: Remove this when we remove
-				if #tag_set ~= #normalized_spec_tags then
+				-- support for two-level multipart tags.
+				for _, tag in ipairs(normalized_spec_tag_set) do
+					if type(tag) == "table" then
+						for _, subtag in ipairs(tag) do
+							if type(subtag) == "table" then
+								error("Internal error: No support for two-level multipart tags in category/label specs"
+									.. "[[Module:form of/cats]] when processing spec tag set "
+									.. table.concat(spec[2], "|"))
+							end
+						end
+					end
+				end
+				-- Allow tags to be in different orders, and multipart tags to be in different orders. To handle this,
+				-- we first check that both tag set tags and spec tags have the same length. If so, we sort the
+				-- multipart tags in the tag set tags and spec tags, and then check that all tags in the spec tags are
+				-- in the tag set tags.
+				if #tag_set ~= #normalized_spec_tag_set then
 					return false, 3
 				end
@@ Line 650: / Line 1,010: @@
 						table.sort(tag_set_tags[i])
 					end
-					if type(normalized_spec_tags[i]) == "table" then
+					if type(normalized_spec_tag_set[i]) == "table" then
-						table.sort(normalized_spec_tags[i])
+						table.sort(normalized_spec_tag_set[i])
 					end
 				end
 				for i=1,#tag_set_tags do
-					if not m_table.contains(tag_set_tags, normalized_spec_tags[i], "deepCompare") then
+					if not m_table.contains(tag_set_tags, normalized_spec_tag_set[i]) then
 						return false, 3
 					end
@@ Line 686: / Line 1,046: @@
 				end
 				return condval, 4
-			elseif predication == "call" then
+			elseif predicate == "call" then
-				local fn = m_functions.cat_functions[spec[2]]
+				return fn(call_named_function(spec[2], "condition")), 3
-				if not fn then
-					error("No condition function named '" .. spec[2] .. "'")
-				end
-				return fn(make_function_table()), 3
 			else
 				error("Unrecognized predicate: " .. predicate)
@@ Line 706: / Line 1,062: @@
 				return false
 			elseif type(spec) == "string" then
-				-- Substitute POS request with user-specified part of speech
+				-- A category. Substitute POS request with user-specified part of speech or default.
-				-- or default
 				spec = rsub(spec, "<<p=(.-)>>", function(default)
 					return POS or export.normalize_pos(default)
 				end)
-				table.insert(categories, lang:getCanonicalName() .. " " .. spec)
+				table.insert(categories, lang:getFullName() .. " " .. spec)
+				return true
+			elseif type(spec) == "table" and spec.labels then
+				-- A label spec.
+				for _, label in ipairs(spec.labels) do
+					m_table.insertIfNot(labels, label)
+				end
 				return true
 			elseif type(spec) ~= "table" then
@@ Line 734: / Line 1,095: @@
 				return false
 			elseif predicate == "call" then
-				local fn = m_functions.cat_functions[spec[2]]
+				return process_spec(call_named_function(spec[2], "spec"))
-				if not fn then
-					error("No spec function named '" .. spec[2] .. "'")
-				end
-				return process_spec(fn(make_function_table()))
 			else
 				local condval, ifspec = check_condition(spec)
@@ Line 752: / Line 1,109: @@
 		end
-		local langspecs = m_cats[lang:getCode()]
+		local langcode = lang:getCode()
+		local langspecs = m_cats[langcode]
 		if langspecs then
 			for _, spec in ipairs(langspecs) do
@@ Line 758: / Line 1,116: @@
 			end
 		end
-		if lang:getCode() ~= "und" then
+		local full_code = lang:getFullCode()
+		if full_code ~= langcode then
+			local langspecs = m_cats[full_code]
+			if langspecs then
+				for _, spec in ipairs(langspecs) do
+					process_spec(spec)
+				end
+			end
+		end
+		if full_code ~= "und" then
 			local langspecs = m_cats["und"]
 			if langspecs then
@@ Line 768: / Line 1,135: @@
 	end
-	return categories
+	return categories, labels
 end
-function export.tagged_inflections(data, terminfo, notext, capfirst, posttext, joiner)
+--[==[
-	if not data.tags then
+Implementation of templates that display inflection tags, such as the general {{tl|inflection of}}, semi-specific
-		error("First argument must now be a table of arguments")
+variants such as {{tl|participle of}}, and specific variants such as {{tl|past participle of}}. `data` contains all the
+information controlling the display, with the following fields:
+* `.lang`: ('''''required''''') Language to use when looking up language-specific inflection tags, categories and
+  labels, and for displaying categories and labels.
+* `.tags`: ('''''required''' unless `.tag_sets` is given'') List of non-canonicalized inflection tags. Multiple tag sets
+  can be indicated by a {";"} as one of the tags, and tag-set properties may be attached to the last tag of a tag set.
+  The tags themselves may come directly from the user (as in {{tl|inflection of}}); come partly from the user (as in
+  {{tl|participle of}}, which adds the tag `part` to user-specified inflection tags); or be entirely specified by the
+  template (as in {{tl|past participle of}}).
+* `.tag_sets`: ('''''required''' unless `.tags` is given'') List of non-canonicalized tag sets and associated
+  per-tag-set properties. Each element of the list is an object of the form
+  { {tags = {"TAG", "TAG", ...}, labels = {"LABEL", "LABEL", ...}}. If `.tag_sets` is specified, `.tags` should not be
+  given and vice-versa. Specifying `.tag_sets` in place of tags allowed per-tag set labels to be specified; otherwise,
+  there is no advantage. [[Module:pt-gl-inflections]] uses this functionality to supply labels like {"Brazil"} and
+  {"Portugal"} associated with specific tag sets.
+* `.lemmas`: ('''''recommended''''') List of objects describing the lemma(s) of which the term in question is a
+  non-lemma form. These are passed directly to {full_link()} in [[Module:links]]. Each object should have at minimum a
+  `.lang` field containing the language of the lemma and a `.term` field containing the lemma itself. Each object is
+  formatted using {full_link()} and then if there are more than one, they are joined using {serialCommaJoin()} in
+  [[Module:table]]. Alternatively, `.lemmas` can be a string, which is displayed directly. If omitted entirely, no lemma
+  links are shown and the connecting "of" is also omitted.
+* `.lemma_face`: ('''''recommended''''') "Face" to use when displaying the lemma objects. Usually should be set to
+  {"term"}.
+* `.POS`: ('''''recommended''''') Categorizing part-of-speech tag. Comes from the {{para|p}} or {{para|POS}} argument of
+  {{tl|inflection of}}.
+* `.pagename`: Page name of "current" page or nil to use the actual page title; for testing purposes.
+* `.enclitics`: List of enclitics to display after the lemmas, in parens.
+* `.no_format_categories`: If true, don't format the categories derived from the inflection tags; just return them.
+* `.sort`: Sort key for formatted categories. Ignored when `.no_format_categories` = {true}.
+* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).
+* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)
+* `.capfirst`: Capitalize the first word displayed.
+* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.
+* `.posttext`: Additional text to display after the lemma links.
+* `.text_classes`: CSS classes used to wrap the tag text and lemma links. Default is
+  {"form-of-definition use-with-mention"} for the tag text, {"form-of-definition-link"} for the lemma links. (FIXME:
+  Should separate out the lemma links into their own field.)
+`.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.
+A typical call might look like this (for {{m+|es|amo}}): {
+	local lang = require("Module:languages").getByCode("es")
+	local lemma_obj = {
+		lang = lang,
+		term = "amar",
+	}
+	return m_form_of.tagged_inflections({
+		lang = lang, tags = {"1", "s", "pres", "ind"}, lemmas = {lemma_obj}, lemma_face = "term", POS = "verb"
+	})
+}
+Normally, one value is returned, the formatted text, which has appended to it the formatted categories derived from the
+tag-set-related categories generated by the specs in [Module:form of/cats]]. To suppress this, set
+`data.no_format_categories` = {true}, in which case two values are returned, the formatted text without any formatted
+categories appended and a list of the categories to be formatted.
+NOTE: There are two sets of categories that may be generated: (1) categories derived directly from the tag sets, as
+specified in [[Module:form of/cats]]; (2) categories derived from tag-set labels, either (a) set explicitly by the
+caller in `data.tag_sets`, (b) specified by the user using `<lb:...>` attached to the last tag in a tag set, or
+(c) specified in [[Module:form of/cats]]. The second type (label-related categories) are currently not returned in
+the second return value of {tagged_inflections()}, and are currently inserted into the output text even if
+`data.no_format_categories` is set to {true}; but they can be suppressed by setting `data.nocat` = {true} (which also
+suppresses the first type of categories, those derived directly from tag sets, even if `data.no_format_categories` is
+set to {true}).
+]==]
+function export.tagged_inflections(data)
+	if not data.tags and not data.tag_sets then
+		error("First argument must be a table of arguments, and `.tags` or `.tag_sets` must be specified")
+	end
+	if data.tags and data.tag_sets then
+		error("Both `.tags` and `.tag_sets` cannot be specified")
 	end
-	local cur_infl = {}
+	local tag_sets = data.tag_sets
+	if not tag_sets then
+		tag_sets = export.split_tag_set(data.tags)
+		for i, tag_set in ipairs(tag_sets) do
+			tag_sets[i] = export.parse_tag_set_properties(tag_set)
+		end
+	end
 	local inflections = {}
+	local categories = {}
+	for _, tag_set in ipairs(tag_sets) do
+		local normalized_tag_sets = export.normalize_tag_set(tag_set.tags, data.lang, "do-track")
-	local ntags = export.normalize_tags(data.tags, nil, "do-track")
+		for _, normalized_tag_set in ipairs(normalized_tag_sets) do
+			local cur_infl = {}
-	for i, tagspec in ipairs(ntags) do
+			local this_categories, this_labels = export.fetch_categories_and_labels(normalized_tag_set, data.lang,
-		if tagspec == ";" then
+				data.POS, data.pagename, type(data.lemmas) == "table" and data.lemmas or nil)
-			if #cur_infl > 0 then
+			if not data.nocat then
-				table.insert(inflections, table.concat(cur_infl))
+				m_table.extendList(categories, this_categories)
 			end
+			local cur_infl = export.get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner)
-			cur_infl = {}
-		else
-			local to_insert = export.get_tag_display_form(tagspec, data.joiner)
-			-- Maybe insert a space before inserting the display form
-			-- of the tag. We insert a space if
-			-- (a) we're not the first tag; and
-			-- (b) the tag we're about to insert doesn't have the
-			--     "no_space_on_left" property; and
-			-- (c) the preceding tag doesn't have the "no_space_on_right"
-			--     property.
-			-- NOTE: We depend here on the fact that
-			-- (1) all tags with either of the above properties set have the
-			--     same display form as canonical form, and
-			-- (2) all tags with either of the above properties set are
-			--     single-character tags.
-			-- The second property is an optimization to avoid looking up
-			-- display forms resulting from multipart tags, which won't be
-			-- found and which will trigger loading of [[Module:form of/data2]].
-			-- If multichar punctuation is added in the future, it's ok to
-			-- change the == 1 below to <= 2 or <= 3.
-			--
-			-- If the first property above fails to hold in the future, we
-			-- need to track the canonical form of each tag (including the
-			-- previous one) as well as the display form. This would also
-			-- avoid the need for the == 1 check.
 			if #cur_infl > 0 then
-				local most_recent_tagobj = ulen(cur_infl[#cur_infl]) == 1 and
+				if tag_set.labels then
-					export.lookup_tag(cur_infl[#cur_infl])
+					this_labels = m_table.append(tag_set.labels, this_labels)
-				local to_insert_tagobj = ulen(to_insert) == 1 and
-					export.lookup_tag(to_insert)
-				if (
-					(not most_recent_tagobj or
-					 not most_recent_tagobj.no_space_on_right) and
-					(not to_insert_tagobj or
-					 not to_insert_tagobj.no_space_on_left)
-				) then
-					table.insert(cur_infl, " ")
 				end
+				table.insert(inflections, {infl_text = cur_infl, labels = this_labels})
 			end
-			table.insert(cur_infl, to_insert)
 		end
 	end
-	if #cur_infl > 0 then
+	local overall_labels, need_per_tag_set_labels
-		table.insert(inflections, table.concat(cur_infl))
+	for _, inflection in ipairs(inflections) do
+		if overall_labels == nil then
+			overall_labels = inflection.labels
+		elseif not m_table.deepEquals(overall_labels, inflection.labels) then
+			need_per_tag_set_labels = true
+			overall_labels = nil
+			break
+		end
 	end
-	local format_data = require("Module:table").shallowcopy(data)
+	if not need_per_tag_set_labels then
+		for _, inflection in ipairs(inflections) do
+			inflection.labels = nil
+		end
+	end
+	local format_data = m_table.shallowcopy(data)
+	local function format_labels(labels, notext)
+		if labels and #labels > 0 then
+			return require(labels_module).show_labels { labels = labels, lang = data.lang, sort = data.sort, nocat = data.nocat } ..
+				(notext and (data.pretext or "") == "" and "" or " ")
+		else
+			return ""
+		end
+	end
+	local of_text = data.lemmas and " of" or ""
+	local formatted_text
 	if #inflections == 1 then
-		format_data.text =
+		if need_per_tag_set_labels then
-			data.notext and "" or ((data.capfirst and require("Module:string utilities").ucfirst(inflections[1]) or inflections[1]) ..
+			error("Internal error: need_per_tag_set_labels should not be set with one inflection")
-				(data.terminfo and " of" or ""))
+		end
-		return export.format_form_of(format_data)
+		format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or
+			((data.capfirst and require("Module:string utilities").ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
+		formatted_text = export.format_form_of(format_data)
 	else
-		format_data.text = data.notext and "" or ((data.capfirst and "Inflection" or "inflection") ..
+		format_data.text = format_labels(overall_labels, data.notext) .. (data.pretext or "") .. (data.notext and "" or
-			(data.terminfo and " of" or ""))
+			((data.capfirst and "Inflection" or "inflection") .. of_text))
 		format_data.posttext = (data.posttext or "") .. ":"
 		local link = export.format_form_of(format_data)
 		local text_classes = data.text_classes or "form-of-definition use-with-mention"
-		return link .."\n## <span class='" .. text_classes .. "'>" ..
+		for i, inflection in ipairs(inflections) do
-			table.concat(inflections, "</span>\n## <span class='" .. text_classes .. "'>") .. "</span>"
+			inflections[i] = "\n## " .. format_labels(inflection.labels, false) ..
+				"<span class='" .. text_classes .. "'>" .. inflection.infl_text .. "</span>"
+		end
+		formatted_text = link .. table.concat(inflections)
 	end
+	if not data.no_format_categories then
+		if #categories > 0 then
+			formatted_text = formatted_text .. require("Module:utilities").format_categories(categories, data.lang,
+				data.sort, nil, export.force_cat)
+		end
+		return formatted_text
+	end
+	return formatted_text, categories
 end
-function export.to_Wikidata_IDs(tags, skip_tags_without_ids)
-	if type(tags) == "string" then
-		tags = mw.text.split(tags, "|", true)
-	end
+--[==[
+Given a tag set, return a flattened list all Wikidata ID's of all tags in the tag set. FIXME: Only used in a debugging
+function in [[Module:se-verbs]]; move there.
+]==]
+function export.to_Wikidata_IDs(tag_set, lang, skip_tags_without_ids)
 	local ret = {}
 	local function get_wikidata_id(tag)
-		if tag == ";" and not skip_tags_without_ids then
+		local data = export.lookup_tag(tag, lang)
-			error("Semicolon is not supported for Wikidata IDs")
-		else
-			return nil
-		end
-		local data = export.lookup_tag(tag)
-		if not data or not data.wikidata then
+		if not data or not data[export.WIKIDATA] then
 			if not skip_tags_without_ids then
-				error("The tag \"" .. tag .. "\" does not have a Wikidata ID defined in [[Module:form of/data]]")
+				error('The tag "' .. tag .. '" does not have a Wikidata ID defined in the form-of data modules')
 			else
 				return nil
 			end
 		else
-			return data.wikidata
+			return ("Q%s"):format(data[export.WIKIDATA])
 		end
 	end
-	for i, tag in ipairs(export.normalize_tags(tags)) do
+	local normalized_tag_sets = export.normalize_tag_set(tag_set, lang)
-		if type(tag) == "table" then
+	for _, tag_set in ipairs(normalized_tag_sets) do
-			local ids = {}
+		for _, tag in ipairs(tag_set) do
-			for _, onetag in ipairs(tag) do
+			if type(tag) == "table" then
-				table.insert(ids, get_wikidata_id(onetag))
+				for _, subtag in ipairs(tag) do
+					if type(subtag) == "table" then
+						-- two-level multipart tag; FIXME: delete support for this
+						for _, subsubtag in ipairs(subtag) do
+							table.insert(ret, get_wikidata_id(subsubtag))
+						end
+					else
+						table.insert(ret, get_wikidata_id(subtag))
+					end
+				end
+			else
+				table.insert(ret, get_wikidata_id(tag))
 			end
-			table.insert(ret, ids)
-		else
-			table.insert(ret, get_wikidata_id(tag))
 		end
 	end
@@ Line 897: / Line 1,353: @@
 function export.dump_form_of_data(frame)
 	local data = {
-		data = require("Module:form of/data"),
+		data = require(export.form_of_data_module),
-		data2 = require("Module:form of/data2")
+		data2 = require(export.form_of_data2_module)
 	}
 	return require("Module:JSON").toJSON(data)
+end
+function export.finalize_tag_data(tags, shortcuts)
+	local function process_shortcut(name, shortcut)
+		-- If the shortcut is already in the list, then there is a duplicate.
+		if shortcuts[shortcut] then
+			error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing shortcut for the tag \"" .. shortcuts[shortcut] .. "\".")
+		elseif tags[shortcut] then
+			error("The shortcut \"" .. shortcut .. "\" (for the inflection tag \"" .. name .. "\") conflicts with an existing tag with that name.")
+		end
+		shortcuts[shortcut] = name
+	end
+	for name, data in pairs(tags) do
+		local data_shortcuts = data[export.SHORTCUTS]
+		if data_shortcuts then
+			if type(data_shortcuts) == "string" then
+				process_shortcut(name, data_shortcuts)
+			else
+				for _, shortcut in ipairs(data_shortcuts) do
+					process_shortcut(name, shortcut)
+				end
+			end
+		end
+	end
 end
 return export