Module:affix: Difference between revisions

(16 intermediate revisions by the same user not shown)

Line 6:

local m_str_utils = require("Module:string utilities")

local m_table = require("Module:table")

local en_utilities_module = "Module:en-utilities"

local etymology_module = "Module:etymology"

local pron_qualifier_module = "Module:pron qualifier"

local scripts_module = "Module:scripts"

local utilities_module = "Module:utilities"

-- Export this so the category code in [[Module:category tree/~~poscatboiler/data/terms by~~ etymology]] can access it.

-- Export this so the category code in [[Module:category tree/etymology]] can access it.

export.affix_lang_data_module_prefix = "Module:affix/lang-data/"

Line 18:

Line 19:

local rfind = m_str_utils.find

local rmatch = m_str_utils.match

local pluralize = ~~m_str_utils~~.pluralize

local pluralize = require(en_utilities_module).pluralize

local u = m_str_utils.char

local ucfirst = m_str_utils.ucfirst

local unpack = unpack or table.unpack -- Lua 5.2 compatibility

-- Export this so the category code in [[Module:category tree/~~poscatboiler/data/terms by~~ etymology]] can access it.

function export.affix_variants(canonical, variants)

local mappings = {}

for _, variant in ipairs(variants) do

mappings[variant] = canonical

end

return mappings

end

function export.id_mapping(default, ids)

local mapping = { default = default }

if ids then

for id, target in pairs(ids) do

mapping[id] = target

end

return mapping

end

function export.id_mapping_with_affix_variants(base, id_variants)

local mappings = {}

for id, variants in pairs(id_variants) do

for _, variant in ipairs(variants) do

mappings[variant] = export.id_mapping(base, {[id] = base})

end

return mappings

end

function export.merge_tables(...)

local result = {}

for i = 1, select('#', ...) do

local t = select(i, ...)

if t then

for k, v in pairs(t) do

result[k] = v

end

return result

end

-- Export this so the category code in [[Module:category tree/etymology]] can access it.

export.langs_with_lang_specific_data = {

["az"] = true,

["fi"] = true,

["fr"] = true,

["izh"] = true,

["la"] = true,

["sah"] = true,

["tr"] = true,

["trk-pro"] = true,

}

Line 57:

Line 102:

===About different types of affixes ("template", "display", "link", "lookup" and "category"):===

* A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has

* A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has an

an attached template hyphen (see above) to indicate that it is an affix and indicate what type of affix it is

attached template hyphen (see above) to indicate that it is an affix and indicate what type of affix it is (prefix,

(prefix, suffix, interfix~~/infix~~ or circumfix), but some of the older-style templates such as {{tl|suffix}},

suffix, interfix or circumfix), but some of the older-style templates such as {{tl|suffix}}, {{tl|prefix}},

{{tl|prefix}}, {{tl|confix}}, etc. have "positional" affixes where the presence of the affix in a certain position

{{tl|confix}}, etc. have "positional" affixes where the presence of the affix in a certain position (e.g. the second

(e.g. the second or third parameter) indicates that it is a certain type of affix, whether or not it has an attached

or third parameter) indicates that it is a certain type of affix, whether or not it has an attached template hyphen.

template hyphen.

* A "display affix" is the corresponding affix as it is actually displayed to the user. The display affix may differ

from the template affix for various reasons:

Line 72:

Line 116:

languages have differences between the "template hyphen" specified in the template (which always needs to be

specified somehow or other in templates like {{tl|affix}}, to indicate that the term is an affix and what type of

affix it is) and the display hyphen (see above), with corresponding differences between template and display affixes.

affix it is) and the display hyphen (see above), with corresponding differences between template and display

affixes.

* A (regular) "link affix" is the affix that is linked to when the affix is shown to the user. The link affix is usually

the same as the display affix, but will differ in one of three circumstances:

Line 78:

Line 123:

inline modifiers or piped links, as described above under "display affix".

*# For certain languages, certain affixes are mapped to canonical form using language-specific mappings. For example,

in Finnish, the adjective-forming suffix [[-kas]] appears as [[-käs]] after front vowels, but logically ~~both~~

in Finnish, the adjective-forming suffix {{m|fi|-kas}} appears as {{m|fi|-käs}} after front vowels, but logically

forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and

both forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and

intensive prefixes spelled [[in-]] (etymologically two distinct prefixes) appear variously as [[il-]], [[im-]] or

intensive prefixes spelled {{m|la|in-}} (etymologically two distinct prefixes) appear variously as {{m|la|il-}},

[[ir-]] before certain consonants. Mappings are supplied in [[Module:affix/lang-data/LANGCODE]] to convert

{{m|la|im-}} or {{m|la|ir-}} before certain consonants. Mappings are supplied in [[Module:affix/lang-data/LANGCODE]]

Finnish [[-käs]] to [[-kas]] for linking and categorization purposes. Note that the affixes in the mappings use

to convert Finnish {{m|fi|-käs}} to {{m|fi|-kas}} for linking and categorization purposes. Note that the affixes in

"lookup hyphens" to indicate the different types of affixes, which is usually the same as the template hyphen but

the mappings use "lookup hyphens" to indicate the different types of affixes, which is usually the same as the

differs for Arabic scripts, because there are multiple possible template hyphens recognized but only one lookup

template hyphen but differs for Arabic scripts, because there are multiple possible template hyphens recognized but

hyphen (tatweel). The form of the affix as used to look up in the mapping tables is called the "lookup affix";

only one lookup hyphen (tatweel). The form of the affix as used to look up in the mapping tables is called the

see below.

"lookup affix"; see below.

* A "stripped link affix" is a link affix that has been passed through the language's `~~makeEntryName~~()` function, which

* A "stripped link affix" is a link affix that has been passed through the language's `stripDiacritics()` function, which

may strip certain diacritics: e.g. macrons in Latin and Old English (indicating length); acute and grave accents in

Russian and various other Slavic languages (indicating stress); vowel diacritics in most Arabic-script languages; and

Line 99:

Line 144:

link.

*# If no entry is found, the affix is then looked up in a modified link form (specifically, the modified display

form passed through the language's `~~makeEntryName~~()` function, which strips out certain diacritics, but with the

form passed through the language's `stripDiacritics()` function, which strips out certain diacritics, but with the

lookup hyphen re-added if it was stripped out, as in the case of tatweel in many Arabic-script languages).

The reason for this double lookup procedure is to allow for mappings that are sensitive to the extra diacritics, but

also allow for mappings that are not sensitive in this fashion (e.g. Russian [[-ливый]] occurs both stressed and

also allow for mappings that are not sensitive in this fashion (e.g. Russian {{m|ru|-ливый}} occurs both stressed and

unstressed, but is the same prefix either way).

* A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas]].

* A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas|

The category affix is currently always the same as the stripped link affix. This means that for Arabic-script

Category:Finnish terms suffixed with ''-kas'']]. The category affix is currently always the same as the stripped link

languages, it may or may not have a tatweel, even if the correponding display affix and regular link affix have a

affix. This means that for Arabic-script languages, it may or may not have a tatweel, even if the correponding display

tatweel. As mentioned above, ~~makeEntryName~~() strips tatweel for Arabic, Persian and Urdu, but not for Ottoman Turkish.

affix and regular link affix have a tatweel. As mentioned above, stripDiacritics() strips tatweel for Arabic, Persian

Hence affix categories for Arabic, Persian and Urdu will be missing the tatweel, but affix categories for

and Urdu, but not for Ottoman Turkish. Hence affix categories for Arabic, Persian and Urdu will be missing the

Ottoman Turkish will have it. An additional complication is that if the template affix contains a ZWNJ, the display

tatweel, but affix categories for Ottoman Turkish will have it. An additional complication is that if the template

(and hence the link and category affixes) will have no hyphen attached in any case.

affix contains a ZWNJ, the display (and hence the link and category affixes) will have no hyphen attached in any case.

]==]

Line 137:

Line 182:

local ZWNJ = u(0x200C) -- zero-width non-joiner

local template_hyphens = {

-- This covers all Arabic scripts. See above.

["Arab"] = "ـ" .. ZWNJ .. "-", -- tatweel + zero-width non-joiner + regular hyphen

["Hebr"] = "־", -- Hebrew-specific hyphen termed "maqqef"

~~-- This covers all Arabic scripts. See above.~~

["Mong"] = "᠊",

~~["mnc-Mong"] = "᠊",~~

~~["sjo-Mong"] = "᠊",~~

~~["xwo-Mong"] = "᠊",~~

-- FIXME! What about the following right-to-left scripts?

-- Adlm (Adlam)

Line 224:

Line 266:

["Thaa"] = no_display_hyphen,

["Thai"] = no_display_hyphen,

["Tibt"] = no_display_hyphen,

}

Line 232:

Line 275:

local function glossary_link(entry, text)

text = text or entry

return "[[~~wikt:~~Appendix:Glossary#" .. entry .. "|" .. text .. "]]"

return "[[Appendix:Glossary#" .. entry .. "|" .. text .. "]]"

end

local function track(page)

if type(page) == "table" then

for i, pg in ipairs(page) do

page[i] = "affix/" .. pg

end

else

page = "affix/" .. page

end

require("Module:debug/track")(page)

end

Line 401:

Line 456:

for i, cat in ipairs(data.categories) do

if type(cat) == "table" then

data.categories[i] = require(utilities_module).format_categories({lang:getFullName() .. " " .. cat.cat},

data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat.cat,

lang, cat.sort_key, cat.sort_base, force_cat)

else

data.categories[i] = require(utilities_module).format_categories({lang:getFullName() .. " " .. cat}, lang,

data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat, lang,

data.data.sort_key, nil, force_cat)

end

Line 410:

Line 465:

cattext = table.concat(data.categories)

end

local result = table.concat(data.parts_formatted, " +&lrm; ") .. (data.data.lit and ", literally " ..

local result = table.concat(data.parts_formatted, not data.separator_already_added and " +&lrm; " or nil) ..

m_links.mark(data.data.lit, "gloss") or "")

(data.data.lit and ", literally " .. m_links.mark(data.data.lit, "gloss") or "")

local q = data.data.q

local qq = data.data.qq

local l = data.data.l

local ll = data.data.ll

if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] then

local infl = data.data.infl

if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] or infl and infl[1] then

result = require(pron_qualifier_module).format_qualifiers {

lang = lang,

Line 424:

Line 480:

l = l,

ll = ll,

infl = infl,

}

end

return result .. cattext

~~end~~

~~--[==[~~

~~Older entry point for calling `join_formatted_parts(). FIXME: Convert callers.~~

~~]==]~~

~~function export.concat_parts(lang, parts_formatted, categories, nocat, sort_key, lit, force_cat)~~

~~return export.join_formatted_parts {~~

~~data = {~~

~~lang = lang,~~

~~nocat = nocat,~~

~~sort_key = sort_key,~~

~~lit = lit,~~

~~force_cat = force_cat,~~

},

~~parts_formatted = parts_formatted,~~

~~categories = categories,~~

}

end

Line 461:

Line 500:

-- Remove links and call lang:~~makeEntryName~~(term).

-- Remove links and call lang:stripDiacritics(term).

local function ~~make_entry_name_no_links~~(lang, term)

local function strip_diacritics_no_links(lang, term)

~~-- Double parens because makeEntryName() returns multiple values. Yuck.~~

return lang:stripDiacritics(m_links.remove_links(term))

return (lang:~~makeEntryName~~(m_links.remove_links(term)))

end

Line 503:

Line 541:

to access information for constructing the categories added by `format_derived()`.

]==]

function export.link_term(part, data)

function export.link_term(part, data, include_separator)

local result

Line 509:

Line 547:

result = require(etymology_module).format_derived {

lang = data.lang,

~~terminfo~~ = part,

terms = {part},

sources = {part.lang},

sort_key = data.sort_key,

nocat = data.nocat,

template_name = "affix",

qualifiers_labels_on_outside = true,

borrowing_type = data.borrowing_type,

force_cat = data.force_cat or debug_force_cat,

}

else

~~-- language (e.g. in a pseudo-loan).~~

result = m_links.full_link(part, "term", nil, "show qualifiers")

result = m_links.full_link(part, "term")

end

if ~~part.q~~ and part.~~q[1] or part.qq and part.qq[1] or part.l and part.l[1] or part.ll and part.ll[1] or~~

if include_separator and part.separator then

part.~~refs and part~~.~~refs[1] then~~

return part.separator .. result

~~result = require(pron_qualifier_module).format_qualifiers {~~

else

~~lang = part~~.~~lang,~~

return result

~~text =~~ result,

~~q = part.q,~~

~~qq = part.qq,~~

~~l = part.l,~~

~~ll = part.ll,~~

~~refs = part.refs,~~

}

end

~~return result~~

end

Line 612:

Line 643:

end

if ~~not~~ affix_type then

if affix_type == "non-affix" then

return term

elseif affix_type == "circumfix" then

Line 689:

Line 720:

if mapping then

if type(mapping) == "table" then

mapping = mapping[affix_id or false]

mapping = mapping[affix_id] or mapping.default or mapping[affix_id or false]

if mapping then

return mapping

Line 721:

Line 752:

end

~~-- Double parens because makeEntryName() returns multiple values. Yuck.~~

return do_lookup(affix) or do_lookup(lang:stripDiacritics(affix)) or nil

return do_lookup(affix) or do_lookup((lang:~~makeEntryName~~(affix))) or nil

end

Line 728:

Line 758:

--[==[

For a given template term in a given language (see the definition of "template affix" near the top of the file),

possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"}, {"~~infix~~"},

possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"},

{"suffix"}, {"circumfix"} or {~~nil} for~~ non-affix) along with the corresponding link and display affixes (see definitions

{"interfix"}, {"suffix"}, {"circumfix"} or {"non-affix"}) along with the corresponding link and display affixes

near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified). The term passed

(see definitions near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified).

in should already have any fragment (after the # sign) parsed off of it. Four values are returned: `affix_type`,

The term passed in should already have any fragment (after the # sign) parsed off of it. Four values are returned:

`link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected ~~(pass~~ in ~~{false}~~

`affix_type`, `link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected; in

~~if the term is not an affix); in~~ this case, the template term need not have any attached hyphens, and the appropriate

this case, the template term need not have any attached hyphens, and the appropriate hyphens will be added in the

hyphens will be added in the appropriate places. If `do_affix_mapping` is specified, look up the affix in the

appropriate places. If `do_affix_mapping` is specified, look up the affix in the lang-specific affix mappings, as

lang-specific affix mappings, as described in the comment at the top of the file; otherwise, the link and display terms

described in the comment at the top of the file; otherwise, the link and display terms will always be the same. (They

will always be the same. (They will be the same in any case if the template term has a bracketed link in it or is not

will be the same in any case if the template term has a bracketed link in it or is not an affix.) If

an affix.) If `return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens

`return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens in the

in the appropriate places; otherwise, it is the same as the display term. (This functionality is used in

appropriate places; otherwise, it is the same as the display term. (This functionality is used in

[[Module:category tree~~/poscatboiler/data~~/affixes and compounds]] to convert link affixes into lookup affixes so that

[[Module:category tree/affixes and compounds]] to convert link affixes into lookup affixes so that they can be looked up

they can be looked up in the affix mapping tables.)

in the affix mapping tables.)

]==]

local function parse_term_for_affixes(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id)

if not term then

return ~~nil~~, nil, nil, nil

return "non-affix", nil, nil, nil

end

if term == "^" then

-- Indicates a null term to emulate the behavior of {{suffix|foo||bar}}.

term = ""

return "non-affix", term, term, term

end

if term:find("^%^") then

-- ~~If term begins with~~ ^, it~~'s not an affix no matter what. Strip off the ^ and return~~ "no affix".

-- HACK! ^ at the beginning of Korean languages has a special meaning, triggering capitalization of the

~~term~~ = ~~usub~~(~~term, 2~~)

-- transliteration. Don't interpret it as "force non-affix" for those languages.

~~return nil~~, ~~term~~, ~~term, term~~

local langcode = lang:getCode()

if langcode ~= "ko" and langcode ~= "okm" and langcode ~= "jje" then

-- Formerly we allowed ^ to force non-affix type; this is now handled using an inline modifier

-- <naf>, <root>, etc. Throw an error for the moment when the old way is encountered.

error("Use of ^ to force non-affix status is no longer supported; use an inline modifier <naf> or <root> " ..

"after the component")

end

Line 763:

Line 805:

thyph = "([" .. thyph .. "])"

if affix_type ~~== nil~~ then

if not affix_type then

if rfind(term, thyph .. " " .. thyph) then

affix_type = "circumfix"

Line 770:

Line 812:

local has_ending_hyphen = rfind(term, thyph .. "$")

if has_beginning_hyphen and has_ending_hyphen then

affix_type = "~~infix~~"

affix_type = "interfix"

elseif has_ending_hyphen then

affix_type = "prefix"

elseif has_beginning_hyphen then

affix_type = "suffix"

else

affix_type = "non-affix"

end

Line 780:

Line 824:

local link_term, display_term, lookup_term

if affix_type then

if affix_type == "non-affix" then

link_term = term

display_term = term

lookup_term = term

else

display_term = reconstruct_term_per_hyphens(term, affix_type, scode, thyph, dhyph)

if do_affix_mapping then

Line 800:

Line 848:

lookup_term = display_term

end

~~else~~

~~link_term = term~~

~~display_term = term~~

~~lookup_term = term~~

end

Line 819:

Line 863:

is of the wrong type). Three values are returned: the link term, display term and lookup term. This function is a thin

wrapper around `parse_term_for_affixes`; see the comments above that function for more information. Note that this

function is exposed externally because it is called by [[Module:category tree~~/poscatboiler/data~~/affixes and compounds]];

function is exposed externally because it is called by [[Module:category tree/affixes and compounds]]; see the comment

see the comment in `parse_term_for_affixes` for more information.

in `parse_term_for_affixes` for more information.

]==]

function export.make_affix(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id)

if not (affix_type == "prefix" or affix_type == "suffix" or affix_type == "circumfix" or affix_type == "infix" or

affix_type == "interfix") then

affix_type == "interfix" or affix_type == "non-affix") then

error("Internal error: Invalid affix type " .. (affix_type or "(nil)"))

end

Line 839:

Line 883:

--[==[

~~Implementation of {{tl|affix}} and {{tl|surface analysis}}. `data` contains all the information describing the~~ affixes to

Core categorization logic for affixes. This is shared between show_affix(), show_compound_like() and

~~be displayed, and contains the following:~~

get_affix_categories_only(). Returns the categories array and other metadata needed for formatting.

* `.~~lang` ('''required'''): Overall language object. Different from term-specific language objects (see `.parts` below).~~

* `.sc`: Overall script object (usually omitted). Different from term-specific script objects.

* `.parts` ('''required'''): List of objects describing the affixes to show. The general format of each object is ~~as would~~

~~be passed to `full_link~~()`, ~~except that the `.lang` field should be missing unless the term is of a language~~

~~different from the overall `.lang` value (in such a case, the language name is shown along with the term and~~

~~an additional "derived from" category is added). '''WARNING''': The data in `.parts` will be destructively~~

~~modified.~~

* `.pos`: Overall part of speech (~~used in categories, defaults to {"terms"}~~)~~. Different from term-specific part of speech.~~

* `.sort_key`: Overall sort key. Normally omitted except e.g. in Japanese.

* `.type`: Type of compound, if the parts in `.parts` describe a compound. Strictly optional, and ~~if supplied, the~~

~~compound type is displayed before the parts (normally capitalized, unless `.nocap` is given).~~

* `.nocap`: Don't capitalize the first letter of text displayed before the parts (~~relevant only if `.type` or~~

~~`.surface_analysis` is given~~).

* `.notext`: Don't display any text before the ~~parts (relevant only if `.type` or `.surface_analysis` is given).~~

* `.nocat`: Disable all categorization.

* `.lit`: Overall literal definition. Different from term-specific literal definitions.

* `.force_cat`: Always display categories~~, even on userspace pages.~~

* `.surface_analysis`: Implement {{surface analysis}}; adds `By surface analysis, ` before the parts.

~~'''WARNING''': This destructively modifies both `data`~~ and ~~the individual structures within `.parts`~~.

]==]

function ~~export.show_affix~~(data)

local function generate_affix_categories(data)

data.pos = data.pos or default_pos

data.pos = pluralize(data.pos)

Line 872:

Line 895:

-- Process each part

~~local parts_formatted = {}~~

local whole_words = 0

local is_affix_or_compound = false

Line 882:

Line 904:

data.parts[i] = part

canonicalize_part(part, data.lang, data.sc)

-- Determine affix type and get link and display terms (see text at top of file). Store them in the part

-- (in fields that won't clash with fields used by full_link() in [[Module:links]] or link_term()), so they

-- can be used in the loop below when categorizing.

part.affix_type, part.affix_link_term, part.affix_display_term = parse_term_for_affixes(part.term,

part.lang, part.sc, ~~nil~~, not part.alt, nil, part.id)

part.lang, part.sc, part.type, not part.alt, nil, part.id)

-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with inline

Line 895:

Line 916:

-- redundant alt text.

part.alt = part.alt or (part.affix_display_term ~= part.affix_link_term and part.affix_display_term) or nil

~~-- Make a link for the part.~~

~~table.insert(parts_formatted, export.link_term(part, data))~~

end

-- Now do categorization.

if not data.noaffixcat then

for i, part in ipairs_with_gaps(data.parts) do

-- Now do categorization.

local affix_type = part.affix_type

for i, part in ipairs_with_gaps(data.parts) do

~~if affix_type then~~

local affix_type = part.affix_type

~~is_affix_or_compound = true~~

if affix_type ~= "non-affix" then

~~-- We cannot distinguish interfixes from infixes by appearance. Prefer interfixes; infixes will need to~~

is_affix_or_compound = true

~~-- use {{infix}}.~~

if affix_type == "~~infix~~" then ~~affix_type~~ = ~~"interfix" end~~

-- Make a sort key. For the first part, use the second part as the sort key; the intention is that if the

-- term has a prefix, sorting by the prefix won't be very useful so we sort by what follows, which is

-- Make a sort key. For the first part, use the second part as the sort key; the intention is that if the

-- presumably the root.

-- term has a prefix, sorting by the prefix won't be very useful so we sort by what follows, which is

local part_sort_base = nil

-- presumably the root.

local part_sort = part.sort or data.sort_key

local part_sort_base = nil

local part_sort = part.sort or data.sort_key

if i == 1 and data.parts[2] and data.parts[2].term then

local part2 = data.parts[2]

if i == 1 and data.parts[2] and data.parts[2].term then

-- If the second-part link term is empty, the user requested an unlinked term; avoid a wikitext error

local part2 = data.parts[2]

-- by using the alt value if available.

-- If the second-part link term is empty, the user requested an unlinked term; avoid a wikitext error

part_sort_base = ine(part2.affix_link_term) or ine(part2.alt)

-- by using the alt value if available.

if part_sort_base then

part_sort_base = ine(part2.affix_link_term) or ine(part2.alt)

part_sort_base = strip_diacritics_no_links(part2.lang, part_sort_base)

if part_sort_base then

end

part_sort_base = ~~make_entry_name_no_links~~(~~part2~~.lang, part_sort_base)

end

if part.pos and rfind(part.pos, "patronym") then

table.insert(categories, {cat = "patronymics", sort_key = part_sort, sort_base = part_sort_base})

end

if data.pos ~= "terms" and part.pos and rfind(part.pos, "diminutive") then

table.insert(categories, {cat = "diminutive " .. data.pos, sort_key = part_sort,

sort_base = part_sort_base})

end

-- Don't add a '*fixed with' category if the link term is empty or is in a different language.

if ine(part.affix_link_term) and not part.part_lang then

table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " ..

strip_diacritics_no_links(part.lang, part.affix_link_term) ..

(part.id and " (" .. part.id .. ")" or ""),

sort_key = part_sort, sort_base = part_sort_base})

end

else

whole_words = whole_words + 1

if whole_words == 2 then

is_affix_or_compound = true

table.insert(categories, "compound " .. data.pos)

end

-- Make sure there was either an affix or a compound (two or more non-affix terms).

if not is_affix_or_compound and not data.allow_no_affixes_or_compounds then

error("The parameters did not include any affixes, and the term is not a compound. Please provide at least one affix.")

end

~~if part.pos and rfind(part.pos~~, ~~"patronym") then~~

return text_sections, categories, borrowing_type

~~table.insert(~~categories, ~~{cat = "patronymics", sort_key = part_sort, sort_base = part_sort_base})~~

end

~~if data.pos ~~~= ~~"terms"~~ and ~~part.pos and rfind(part.pos, "diminutive") then~~

--[==[

~~table.insert(categories,~~ {~~cat = "diminutive " .~~. data~~.pos~~, ~~sort_key = part_sort,~~

Implementation of {{tl|affix}} and {{tl|surface analysis}}. `data` contains all the information describing the affixes to

~~sort_base = part_sort_base})~~

be displayed, and contains the following:

~~end~~

-- ~~Don~~'~~t add a~~ '*fixed with' ~~category if~~ the ~~link~~ term is ~~empty or is~~ in a ~~different~~ language.

* `.lang` ('''required'''): Overall language object. Different from term-specific language objects (see `.parts` below).

~~if ine(part~~.~~affix_link_term) and not part.part_lang then~~

* `.sc`: Overall script object (usually omitted). Different from term-specific script objects.

~~table~~.~~insert~~(categories, {~~cat = data~~.~~pos~~ .. ~~" "~~ .. ~~affix_type~~ .. ~~"ed with "~~ ..

* `.parts` ('''required'''): List of objects describing the affixes to show. The general format of each object is as would

~~make_entry_name_no_links~~(~~part.lang~~, ~~part~~.~~affix_link_term~~) ..

be passed to `full_link()`, except that the `.lang` field should be missing unless the term is of a language

(~~part~~.~~id and " ("~~ .. ~~part~~.id .. ~~")" or ""~~),

different from the overall `.lang` value (in such a case, the language name is shown along with the term and

~~sort_key = part_sort, sort_base = part_sort_base})~~

an additional "derived from" category is added). '''WARNING''': The data in `.parts` will be destructively

~~end~~

modified.

~~else~~

* `.pos`: Overall part of speech (used in categories, defaults to {"terms"}). Different from term-specific part of speech.

~~whole_words = whole_words + 1~~

* `.sort_key`: Overall sort key. Normally omitted except e.g. in Japanese.

* `.type`: Type of compound, if the parts in `.parts` describe a compound. Strictly optional, and if supplied, the

compound type is displayed before the parts (normally capitalized, unless `.nocap` is given).

* `.nocap`: Don't capitalize the first letter of text displayed before the parts (relevant only if `.type` or

`.surface_analysis` is given).

* `.notext`: Don't display any text before the parts (relevant only if `.type` or `.surface_analysis` is given).

* `.nocat`: Disable all categorization.

* `.noaffixcat`: Disable affix (and compound) categorization. Relevant for e.g. blends, which may otherwise

be incorrectly categorized as compound terms.

* `.lit`: Overall literal definition. Different from term-specific literal definitions.

* `.force_cat`: Always display categories, even on userspace pages.

* `.surface_analysis`: Implement {{surface analysis}}; adds `By surface analysis, ` before the parts.

~~if whole_words =~~= ~~2 then~~

'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`.

~~is_affix_or_compound~~ = ~~true~~

]==]

~~table~~.~~insert~~(categories, ~~"compound " ..~~ data~~.pos~~)

function export.show_affix(data)

~~end~~

local text_sections, categories, borrowing_type = generate_affix_categories(data)

~~end~~

-- ~~Make sure there was either an affix or a compound~~ (~~two or more regular terms~~).

-- Process each part for display

~~if not is_affix_or_compound then~~

local parts_formatted = {}

~~error~~(~~"The parameters did not include any affixes~~, ~~and the term is not a compound. Please provide at least one affix~~.")

for i, part in ipairs_with_gaps(data.parts) do

-- Make a link for the part

table.insert(parts_formatted, export.link_term(part, data, "include_separator"))

end

Line 961:

Line 1,018:

text = ucfirst(text)

end

table.insert(text_sections, 1, text)

end

table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,

categories = categories })

categories = categories, separator_already_added = true })

return table.concat(text_sections)

end

--[==[

Get only the categories that would be generated by show_affix(), without any text output or formatting.

This is used by Module:etymon to get affix categorization.

Returns an array of category objects, where

each entry is either a string (simple category name) or a table with keys `cat`, `sort_key`,

and `sort_base` for more complex categorization.

`data` should have the same structure as passed to show_affix():

* `.lang` (required): Overall language object

* `.parts` (required): Array of affix part objects with `.term`, `.lang`, `.id`, etc.

* `.pos`: Part of speech (defaults to "terms")

* `.sort_key`: Overall sort key for categories

'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`.

]==]

function export.get_affix_categories_only(data)

local text_sections, categories, borrowing_type = generate_affix_categories(data)

return categories

end

Line 973:

Line 1,049:

function export.show_surface_analysis(data)

data.surface_analysis = true

data.allow_no_affixes_or_compounds = true

return export.show_affix(data)

end

Line 983:

Line 1,060:

]==]

function export.show_compound(data)

local data_for_cats = m_table.shallowCopy(data)

data_for_cats.parts = {}

for k, part in pairs(data.parts) do

data_for_cats.parts[k] = m_table.shallowCopy(part)

end

data_for_cats.allow_no_affixes_or_compounds = true

local categories = export.get_affix_categories_only(data_for_cats)

data.pos = data.pos or default_pos

data.pos = pluralize(data.pos)

local text_sections, ~~categories~~, borrowing_type =

local text_sections, _, borrowing_type =

process_etymology_type(data.type, data.nocap, data.notext, #data.parts > 0)

data.borrowing_type = borrowing_type

local parts_formatted = {}

table.insert(categories, "compound " .. data.pos)

Line 999:

Line 1,084:

-- Determine affix type and get link and display terms (see text at top of file).

local affix_type, link_term, display_term = parse_term_for_affixes(part.term, part.lang, part.sc,

~~nil~~, not part.alt, nil, part.id)

part.type, not part.alt, nil, part.id)

-- If the term is an ~~infix~~, recognize it as such (which means e.g. that we will display the term without

-- If the term is an interfix or the type was explicitly given, recognize it as such (which means e.g. that we

-- hyphens for East Asian languages). Otherwise, ignore the fact that it looks like an affix and display as

-- will display the term without hyphens for East Asian languages). Otherwise, ignore the fact that it looks

-- specified in the template (but pay attention to the detected affix type for certain tracking purposes).

-- like an affix and display as specified in the template (but pay attention to the detected affix type for

if affix_type == "~~infix~~" then

-- certain tracking purposes).

if affix_type == "interfix" or (part.type and part.type ~= "non-affix") then

-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with

-- inline modifiers. The intention in either case is not to link the term. Don't add a '*fixed with'

Line 1,011:

Line 1,097:

-- redundant alt text.

if link_term and link_term ~= "" and not part.part_lang then

table.insert(categories, {cat = data.pos .. " ~~interfixed~~ with " .. ~~make_entry_name_no_links~~(part.lang,

table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " ..

link_term), sort_key = part.sort or data.sort_key})

strip_diacritics_no_links(part.lang, link_term), sort_key = part.sort or data.sort_key})

end

part.term = link_term ~= "" and link_term or nil

part.alt = part.alt or (display_term ~= link_term and display_term) or nil

else

if affix_type then

if affix_type ~= "non-affix" then

local langcode = data.lang:getCode()

-- If `data.lang` is an etymology-only language, track both using its code and its full parent's code.

track { affix_type, affix_type .. "/lang/" .. langcode }

local full_langcode = data.lang:getFullCode()

if langcode ~= full_langcode then

track(affix_type .. "/lang/" .. full_langcode)

end

else

whole_words = whole_words + 1

end

table.insert(parts_formatted, export.link_term(part, data))

table.insert(parts_formatted, export.link_term(part, data, "include_separator"))

end

if whole_words == 1 then

track("one whole word")

elseif whole_words == 0 then

track("looks like confix")

end

table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,

categories = categories })

categories = categories, separator_already_added = true })

return table.concat(text_sections)

end

Line 1,039:

Line 1,136:

]==]

function export.show_compound_like(data)

~~local parts_formatted~~ = {}

data.allow_no_affixes_or_compounds = true

local categories = {}

local text_sections, categories, borrowing_type = generate_affix_categories(data)

if data.cat then

Line 1,046:

Line 1,143:

end

-- ~~Make links out of all the parts~~

-- Process each part for display

for i, part in ~~ipairs~~(data.parts) do

local parts_formatted = {}

~~canonicalize_part(~~part~~, data.lang, data.sc)~~

for i, part in ipairs_with_gaps(data.parts) do

table.insert(parts_formatted, export.link_term(part, data))

-- Make a link for the part

table.insert(parts_formatted, export.link_term(part, data, "include_separator"))

end

~~local~~ text_sections ~~= {}~~

if #data.parts > 0 and data.oftext then

table.insert(text_sections, 1, " " .. data.oftext .. " ")

end

if data.text then

table.insert(text_sections, data.text)

table.insert(text_sections, 1, data.text)

~~end~~

~~if #data.parts > 0 and data.oftext then~~

~~table.insert(text_sections, " ")~~

~~table.insert(text_sections, data.oftext)~~

~~table.insert(text_sections, " "~~)

end

table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,

categories = categories })

categories = categories, separator_already_added = true })

return table.concat(text_sections)

end

Line 1,098:

Line 1,194:

part.ts = export.make_affix(part.ts, part.lang, Latn, affix_type)

end

local function track_wrong_affix_type(template, part, expected_affix_type)

if part and not part.type then

local affix_type = parse_term_for_affixes(part.term, part.lang, part.sc)

if affix_type ~= expected_affix_type then

local part_name = expected_affix_type or "base"

local langcode = part.lang:getCode()

local full_langcode = part.lang:getFullCode()

require("Module:debug/track") {

template,

template .. "/" .. part_name,

template .. "/" .. part_name .. "/" .. (affix_type or "none"),

template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. langcode

}

-- If `part.lang` is an etymology-only language, track both using its code and its full parent's code.

if full_langcode ~= langcode then

require("Module:debug/track")(

template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. full_langcode

)

end

local function insert_affix_category(categories, pos, affix_type, part, sort_key, sort_base)

-- Don't add a '*fixed with' category if the link term is empty or is in a different language.

if part.term and not part.part_lang then

local cat = pos .. " " .. affix_type .. "ed with " .. ~~make_entry_name_no_links~~(part.lang, part.term) ..

local cat = pos .. " " .. affix_type .. "ed with " .. strip_diacritics_no_links(part.lang, part.term) ..

(part.id and " (" .. part.id .. ")" or "")

if sort_key or sort_base then

Line 1,126:

Line 1,247:

make_part_into_affix(data.prefix, data.lang, data.sc, "prefix")

make_part_into_affix(data.suffix, data.lang, data.sc, "suffix")

track_wrong_affix_type("circumfix", data.prefix, "prefix")

track_wrong_affix_type("circumfix", data.base, nil)

track_wrong_affix_type("circumfix", data.suffix, "suffix")

-- Create circumfix term.

Line 1,143:

Line 1,268:

local sort_base

if data.base.term then

sort_base = ~~make_entry_name_no_links~~(data.base.lang, data.base.term)

sort_base = strip_diacritics_no_links(data.base.lang, data.base.term)

end

Line 1,152:

Line 1,277:

-- Insert the categories, but don't add a '*fixed with' category if the link term is in a different language.

if not data.prefix.part_lang then

table.insert(categories, {cat=data.pos .. " circumfixed with " .. ~~make_entry_name_no_links~~(data.prefix.lang,

table.insert(categories, {cat=data.pos .. " circumfixed with " .. strip_diacritics_no_links(data.prefix.lang,

circumfix), sort_key=data.sort_key, sort_base=sort_base})

end

Line 1,173:

Line 1,298:

make_part_into_affix(data.prefix, data.lang, data.sc, "prefix")

make_part_into_affix(data.suffix, data.lang, data.sc, "suffix")

track_wrong_affix_type("confix", data.prefix, "prefix")

track_wrong_affix_type("confix", data.base, nil)

track_wrong_affix_type("confix", data.suffix, "suffix")

-- Make links out of all the parts.

Line 1,178:

Line 1,307:

local prefix_sort_base

if data.base and data.base.term then

prefix_sort_base = ~~make_entry_name_no_links~~(data.base.lang, data.base.term)

prefix_sort_base = strip_diacritics_no_links(data.base.lang, data.base.term)

elseif data.suffix.term then

prefix_sort_base = ~~make_entry_name_no_links~~(data.suffix.lang, data.suffix.term)

prefix_sort_base = strip_diacritics_no_links(data.suffix.lang, data.suffix.term)

end

Line 1,213:

Line 1,342:

-- Hyphenate the affixes and apply any affix mappings.

make_part_into_affix(data.infix, data.lang, data.sc, "infix")

track_wrong_affix_type("infix", data.base, nil)

track_wrong_affix_type("infix", data.infix, "infix")

-- Make links out of all the parts.

Line 1,243:

Line 1,375:

make_part_into_affix(prefix, data.lang, data.sc, "prefix")

end

for i, prefix in ipairs(data.prefixes) do

track_wrong_affix_type("prefix", prefix, "prefix")

end

track_wrong_affix_type("prefix", data.base, nil)

-- Make links out of all the parts.

Line 1,252:

Line 1,390:

first_sort_base = ine(data.prefixes[2].term) or ine(data.prefixes[2].alt)

if first_sort_base then

first_sort_base = ~~make_entry_name_no_links~~(data.prefixes[2].lang, first_sort_base)

first_sort_base = strip_diacritics_no_links(data.prefixes[2].lang, first_sort_base)

end

elseif data.base then

first_sort_base = ine(data.base.term) or ine(data.base.alt)

if first_sort_base then

first_sort_base = ~~make_entry_name_no_links~~(data.base.lang, first_sort_base)

first_sort_base = strip_diacritics_no_links(data.base.lang, first_sort_base)

end

Line 1,291:

Line 1,429:

for i, suffix in ipairs(data.suffixes) do

make_part_into_affix(suffix, data.lang, data.sc, "suffix")

end

track_wrong_affix_type("suffix", data.base, nil)

for i, suffix in ipairs(data.suffixes) do

track_wrong_affix_type("suffix", suffix, "suffix")

end

@@ Line 6: / Line 6: @@
 local m_str_utils = require("Module:string utilities")
 local m_table = require("Module:table")
+local en_utilities_module = "Module:en-utilities"
 local etymology_module = "Module:etymology"
 local pron_qualifier_module = "Module:pron qualifier"
 local scripts_module = "Module:scripts"
 local utilities_module = "Module:utilities"
--- Export this so the category code in [[Module:category tree/poscatboiler/data/terms by etymology]] can access it.
+-- Export this so the category code in [[Module:category tree/etymology]] can access it.
 export.affix_lang_data_module_prefix = "Module:affix/lang-data/"
@@ Line 18: / Line 19: @@
 local rfind = m_str_utils.find
 local rmatch = m_str_utils.match
-local pluralize = m_str_utils.pluralize
+local pluralize = require(en_utilities_module).pluralize
 local u = m_str_utils.char
 local ucfirst = m_str_utils.ucfirst
+local unpack = unpack or table.unpack -- Lua 5.2 compatibility
--- Export this so the category code in [[Module:category tree/poscatboiler/data/terms by etymology]] can access it.
+function export.affix_variants(canonical, variants)
+	local mappings = {}
+	for _, variant in ipairs(variants) do
+		mappings[variant] = canonical
+	end
+	return mappings
+end
+function export.id_mapping(default, ids)
+	local mapping = { default = default }
+	if ids then
+		for id, target in pairs(ids) do
+			mapping[id] = target
+		end
+	end
+	return mapping
+end
+function export.id_mapping_with_affix_variants(base, id_variants)
+	local mappings = {}
+	for id, variants in pairs(id_variants) do
+		for _, variant in ipairs(variants) do
+			mappings[variant] = export.id_mapping(base, {[id] = base})
+		end
+	end
+	return mappings
+end
+function export.merge_tables(...)
+	local result = {}
+	for i = 1, select('#', ...) do
+		local t = select(i, ...)
+		if t then
+			for k, v in pairs(t) do
+				result[k] = v
+			end
+		end
+	end
+	return result
+end
+-- Export this so the category code in [[Module:category tree/etymology]] can access it.
 export.langs_with_lang_specific_data = {
 	["az"] = true,
 	["fi"] = true,
+	["fr"] = true,
 	["izh"] = true,
 	["la"] = true,
 	["sah"] = true,
 	["tr"] = true,
+	["trk-pro"] = true,
 }
@@ Line 57: / Line 102: @@
 ===About different types of affixes ("template", "display", "link", "lookup" and "category"):===
-* A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has
+* A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has an
-   an attached template hyphen (see above) to indicate that it is an affix and indicate what type of affix it is
+   attached template hyphen (see above) to indicate that it is an affix and indicate what type of affix it is (prefix,
-  (prefix, suffix, interfix/infix or circumfix), but some of the older-style templates such as {{tl|suffix}},
+  suffix, interfix or circumfix), but some of the older-style templates such as {{tl|suffix}}, {{tl|prefix}},
-  {{tl|prefix}}, {{tl|confix}}, etc. have "positional" affixes where the presence of the affix in a certain position
+  {{tl|confix}}, etc. have "positional" affixes where the presence of the affix in a certain position (e.g. the second
-  (e.g. the second or third parameter) indicates that it is a certain type of affix, whether or not it has an attached
+  or third parameter) indicates that it is a certain type of affix, whether or not it has an attached template hyphen.
-  template hyphen.
 * A "display affix" is the corresponding affix as it is actually displayed to the user. The display affix may differ
    from the template affix for various reasons:
@@ Line 72: / Line 116: @@
       languages have differences between the "template hyphen" specified in the template (which always needs to be
 	 specified somehow or other in templates like {{tl|affix}}, to indicate that the term is an affix and what type of
-	 affix it is) and the display hyphen (see above), with corresponding differences between template and display affixes.
+	 affix it is) and the display hyphen (see above), with corresponding differences between template and display
+	 affixes.
 * A (regular) "link affix" is the affix that is linked to when the affix is shown to the user. The link affix is usually
    the same as the display affix, but will differ in one of three circumstances:
@@ Line 78: / Line 123: @@
       inline modifiers or piped links, as described above under "display affix".
    *# For certain languages, certain affixes are mapped to canonical form using language-specific mappings. For example,
-	 in Finnish, the adjective-forming suffix [[-kas]] appears as [[-käs]] after front vowels, but logically both
+	 in Finnish, the adjective-forming suffix {{m|fi|-kas}} appears as {{m|fi|-käs}} after front vowels, but logically
-	 forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and
+	 both forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and
-	 intensive prefixes spelled [[in-]] (etymologically two distinct prefixes) appear variously as [[il-]], [[im-]] or
+	 intensive prefixes spelled {{m|la|in-}} (etymologically two distinct prefixes) appear variously as {{m|la|il-}},
-	 [[ir-]] before certain consonants. Mappings are supplied in [[Module:affix/lang-data/LANGCODE]] to convert
+	 {{m|la|im-}} or {{m|la|ir-}} before certain consonants. Mappings are supplied in [[Module:affix/lang-data/LANGCODE]]
-	 Finnish [[-käs]] to [[-kas]] for linking and categorization purposes. Note that the affixes in the mappings use
+	 to convert Finnish {{m|fi|-käs}} to {{m|fi|-kas}} for linking and categorization purposes. Note that the affixes in
-	 "lookup hyphens" to indicate the different types of affixes, which is usually the same as the template hyphen but
+	 the mappings use "lookup hyphens" to indicate the different types of affixes, which is usually the same as the
-	 differs for Arabic scripts, because there are multiple possible template hyphens recognized but only one lookup
+	 template hyphen but differs for Arabic scripts, because there are multiple possible template hyphens recognized but
-	 hyphen (tatweel). The form of the affix as used to look up in the mapping tables is called the "lookup affix";
+	 only one lookup hyphen (tatweel). The form of the affix as used to look up in the mapping tables is called the
-	 see below.
+	 "lookup affix"; see below.
-* A "stripped link affix" is a link affix that has been passed through the language's `makeEntryName()` function, which
+* A "stripped link affix" is a link affix that has been passed through the language's `stripDiacritics()` function, which
    may strip certain diacritics: e.g. macrons in Latin and Old English (indicating length); acute and grave accents in
    Russian and various other Slavic languages (indicating stress); vowel diacritics in most Arabic-script languages; and
@@ Line 99: / Line 144: @@
 	 link.
    *# If no entry is found, the affix is then looked up in a modified link form (specifically, the modified display
-	 form passed through the language's `makeEntryName()` function, which strips out certain diacritics, but with the
+	 form passed through the language's `stripDiacritics()` function, which strips out certain diacritics, but with the
 	 lookup hyphen re-added if it was stripped out, as in the case of tatweel in many Arabic-script languages).
    The reason for this double lookup procedure is to allow for mappings that are sensitive to the extra diacritics, but
-   also allow for mappings that are not sensitive in this fashion (e.g. Russian [[-ливый]] occurs both stressed and
+   also allow for mappings that are not sensitive in this fashion (e.g. Russian {{m|ru|-ливый}} occurs both stressed and
    unstressed, but is the same prefix either way).
-* A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas]].
+* A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas|
-  The category affix is currently always the same as the stripped link affix. This means that for Arabic-script
+  Category:Finnish terms suffixed with ''-kas'']]. The category affix is currently always the same as the stripped link
-  languages, it may or may not have a tatweel, even if the correponding display affix and regular link affix have a
+  affix. This means that for Arabic-script languages, it may or may not have a tatweel, even if the correponding display
-  tatweel. As mentioned above, makeEntryName() strips tatweel for Arabic, Persian and Urdu, but not for Ottoman Turkish.
+  affix and regular link affix have a tatweel. As mentioned above, stripDiacritics() strips tatweel for Arabic, Persian
-  Hence affix categories for Arabic, Persian and Urdu will be missing the tatweel, but affix categories for
+  and Urdu, but not for Ottoman Turkish. Hence affix categories for Arabic, Persian and Urdu will be missing the
-  Ottoman Turkish will have it. An additional complication is that if the template affix contains a ZWNJ, the display
+  tatweel, but affix categories for Ottoman Turkish will have it. An additional complication is that if the template
-  (and hence the link and category affixes) will have no hyphen attached in any case.
+  affix contains a ZWNJ, the display (and hence the link and category affixes) will have no hyphen attached in any case.
 ]==]
@@ Line 137: / Line 182: @@
 local ZWNJ = u(0x200C) -- zero-width non-joiner
 local template_hyphens = {
+	-- This covers all Arabic scripts. See above.
 	["Arab"] = "ـ" .. ZWNJ .. "-", -- tatweel + zero-width non-joiner + regular hyphen
 	["Hebr"] = "־", -- Hebrew-specific hyphen termed "maqqef"
-	-- This covers all Arabic scripts. See above.
 	["Mong"] = "᠊",
-	["mnc-Mong"] = "᠊",
-	["sjo-Mong"] = "᠊",
-	["xwo-Mong"] = "᠊",
 	-- FIXME! What about the following right-to-left scripts?
 	-- Adlm (Adlam)
@@ Line 224: / Line 266: @@
 	["Thaa"] = no_display_hyphen,
 	["Thai"] = no_display_hyphen,
+	["Tibt"] = no_display_hyphen,
 }
@@ Line 232: / Line 275: @@
 local function glossary_link(entry, text)
 	text = text or entry
-	return "[[wikt:Appendix:Glossary#" .. entry .. "|" .. text .. "]]"
+	return "[[Appendix:Glossary#" .. entry .. "|" .. text .. "]]"
+end
+local function track(page)
+	if type(page) == "table" then
+		for i, pg in ipairs(page) do
+			page[i] = "affix/" .. pg
+		end
+	else
+		page = "affix/" .. page
+	end
+	require("Module:debug/track")(page)
 end
@@ Line 401: / Line 456: @@
 		for i, cat in ipairs(data.categories) do
 			if type(cat) == "table" then
-				data.categories[i] = require(utilities_module).format_categories({lang:getFullName() .. " " .. cat.cat},
+				data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat.cat,
 					lang, cat.sort_key, cat.sort_base, force_cat)
 			else
-				data.categories[i] = require(utilities_module).format_categories({lang:getFullName() .. " " .. cat}, lang,
+				data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat, lang,
 					data.data.sort_key, nil, force_cat)
 			end
@@ Line 410: / Line 465: @@
 		cattext = table.concat(data.categories)
 	end
-	local result = table.concat(data.parts_formatted, " +&lrm; ") .. (data.data.lit and ", literally " ..
+	local result = table.concat(data.parts_formatted, not data.separator_already_added and " +&lrm; " or nil) ..
-		m_links.mark(data.data.lit, "gloss") or "")
+		(data.data.lit and ", literally " .. m_links.mark(data.data.lit, "gloss") or "")
 	local q = data.data.q
 	local qq = data.data.qq
 	local l = data.data.l
 	local ll = data.data.ll
-	if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] then
+	local infl = data.data.infl
+	if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] or infl and infl[1] then
 		result = require(pron_qualifier_module).format_qualifiers {
 			lang = lang,
@@ Line 424: / Line 480: @@
 			l = l,
 			ll = ll,
+			infl = infl,
 		}
 	end
 	return result .. cattext
-end
---[==[
-Older entry point for calling `join_formatted_parts(). FIXME: Convert callers.
-]==]
-function export.concat_parts(lang, parts_formatted, categories, nocat, sort_key, lit, force_cat)
-	return export.join_formatted_parts {
-		data = {
-			lang = lang,
-			nocat = nocat,
-			sort_key = sort_key,
-			lit = lit,
-			force_cat = force_cat,
-		},
-		parts_formatted = parts_formatted,
-		categories = categories,
-	}
 end
@@ Line 461: / Line 500: @@
--- Remove links and call lang:makeEntryName(term).
+-- Remove links and call lang:stripDiacritics(term).
-local function make_entry_name_no_links(lang, term)
+local function strip_diacritics_no_links(lang, term)
-	-- Double parens because makeEntryName() returns multiple values. Yuck.
+	return lang:stripDiacritics(m_links.remove_links(term))
-	return (lang:makeEntryName(m_links.remove_links(term)))
 end
@@ Line 503: / Line 541: @@
 to access information for constructing the categories added by `format_derived()`.
 ]==]
-function export.link_term(part, data)
+function export.link_term(part, data, include_separator)
 	local result
@@ Line 509: / Line 547: @@
 		result = require(etymology_module).format_derived {
 			lang = data.lang,
-			terminfo = part,
+			terms = {part},
+			sources = {part.lang},
 			sort_key = data.sort_key,
 			nocat = data.nocat,
+			template_name = "affix",
+			qualifiers_labels_on_outside = true,
 			borrowing_type = data.borrowing_type,
 			force_cat = data.force_cat or debug_force_cat,
 		}
 	else
-		-- language (e.g. in a pseudo-loan).
+		result = m_links.full_link(part, "term", nil, "show qualifiers")
-		result = m_links.full_link(part, "term")
 	end
-	if part.q and part.q[1] or part.qq and part.qq[1] or part.l and part.l[1] or part.ll and part.ll[1] or
+	if include_separator and part.separator then
-		part.refs and part.refs[1] then
+		return part.separator .. result
-		result = require(pron_qualifier_module).format_qualifiers {
+	else
-			lang = part.lang,
+		return result
-			text = result,
-			q = part.q,
-			qq = part.qq,
-			l = part.l,
-			ll = part.ll,
-			refs = part.refs,
-		}
 	end
-	return result
 end
@@ Line 612: / Line 643: @@
 	end
-	if not affix_type then
+	if affix_type == "non-affix" then
 		return term
 	elseif affix_type == "circumfix" then
@@ Line 689: / Line 720: @@
 					if mapping then
 						if type(mapping) == "table" then
-							mapping = mapping[affix_id or false]
+							mapping = mapping[affix_id] or mapping.default or mapping[affix_id or false]
 							if mapping then
 								return mapping
@@ Line 721: / Line 752: @@
 	end
-	-- Double parens because makeEntryName() returns multiple values. Yuck.
+	return do_lookup(affix) or do_lookup(lang:stripDiacritics(affix)) or nil
-	return do_lookup(affix) or do_lookup((lang:makeEntryName(affix))) or nil
 end
@@ Line 728: / Line 758: @@
 --[==[
 For a given template term in a given language (see the definition of "template affix" near the top of the file),
-possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"}, {"infix"},
+possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"},
-{"suffix"}, {"circumfix"} or {nil} for non-affix) along with the corresponding link and display affixes (see definitions
+{"interfix"}, {"suffix"}, {"circumfix"} or {"non-affix"}) along with the corresponding link and display affixes
-near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified). The term passed
+(see definitions near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified).
-in should already have any fragment (after the # sign) parsed off of it. Four values are returned: `affix_type`,
+The term passed in should already have any fragment (after the # sign) parsed off of it. Four values are returned:
-`link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected (pass in {false}
+`affix_type`, `link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected; in
-if the term is not an affix); in this case, the template term need not have any attached hyphens, and the appropriate
+this case, the template term need not have any attached hyphens, and the appropriate hyphens will be added in the
-hyphens will be added in the appropriate places. If `do_affix_mapping` is specified, look up the affix in the
+appropriate places. If `do_affix_mapping` is specified, look up the affix in the lang-specific affix mappings, as
-lang-specific affix mappings, as described in the comment at the top of the file; otherwise, the link and display terms
+described in the comment at the top of the file; otherwise, the link and display terms will always be the same. (They
-will always be the same. (They will be the same in any case if the template term has a bracketed link in it or is not
+will be the same in any case if the template term has a bracketed link in it or is not an affix.) If
-an affix.) If `return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens
+`return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens in the
-in the appropriate places; otherwise, it is the same as the display term. (This functionality is used in
+appropriate places; otherwise, it is the same as the display term. (This functionality is used in
-[[Module:category tree/poscatboiler/data/affixes and compounds]] to convert link affixes into lookup affixes so that
+[[Module:category tree/affixes and compounds]] to convert link affixes into lookup affixes so that they can be looked up
-they can be looked up in the affix mapping tables.)
+in the affix mapping tables.)
 ]==]
 local function parse_term_for_affixes(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id)
 	if not term then
-		return nil, nil, nil, nil
+		return "non-affix", nil, nil, nil
 	end
+	if term == "^" then
+		-- Indicates a null term to emulate the behavior of {{suffix|foo||bar}}.
+		term = ""
+		return "non-affix", term, term, term
+	end
 	if term:find("^%^") then
-		-- If term begins with ^, it's not an affix no matter what. Strip off the ^ and return "no affix".
+		-- HACK! ^ at the beginning of Korean languages has a special meaning, triggering capitalization of the
-		term = usub(term, 2)
+		-- transliteration. Don't interpret it as "force non-affix" for those languages.
-		return nil, term, term, term
+		local langcode = lang:getCode()
+		if langcode ~= "ko" and langcode ~= "okm" and langcode ~= "jje" then
+			-- Formerly we allowed ^ to force non-affix type; this is now handled using an inline modifier
+			-- <naf>, <root>, etc. Throw an error for the moment when the old way is encountered.
+			error("Use of ^ to force non-affix status is no longer supported; use an inline modifier <naf> or <root> " ..
+				"after the component")
+		end
 	end
@@ Line 763: / Line 805: @@
 	thyph = "([" .. thyph .. "])"
-	if affix_type == nil then
+	if not affix_type then
 		if rfind(term, thyph .. " " .. thyph) then
 			affix_type = "circumfix"
@@ Line 770: / Line 812: @@
 			local has_ending_hyphen = rfind(term, thyph .. "$")
 			if has_beginning_hyphen and has_ending_hyphen then
-				affix_type = "infix"
+				affix_type = "interfix"
 			elseif has_ending_hyphen then
 				affix_type = "prefix"
 			elseif has_beginning_hyphen then
 				affix_type = "suffix"
+			else
+				affix_type = "non-affix"
 			end
 		end
@@ Line 780: / Line 824: @@
 	local link_term, display_term, lookup_term
-	if affix_type then
+	if affix_type == "non-affix" then
+		link_term = term
+		display_term = term
+		lookup_term = term
+	else
 		display_term = reconstruct_term_per_hyphens(term, affix_type, scode, thyph, dhyph)
 		if do_affix_mapping then
@@ Line 800: / Line 848: @@
 			lookup_term = display_term
 		end
-	else
-		link_term = term
-		display_term = term
-		lookup_term = term
 	end
@@ Line 819: / Line 863: @@
 is of the wrong type). Three values are returned: the link term, display term and lookup term. This function is a thin
 wrapper around `parse_term_for_affixes`; see the comments above that function for more information. Note that this
-function is exposed externally because it is called by [[Module:category tree/poscatboiler/data/affixes and compounds]];
+function is exposed externally because it is called by [[Module:category tree/affixes and compounds]]; see the comment
-see the comment in `parse_term_for_affixes` for more information.
+in `parse_term_for_affixes` for more information.
 ]==]
 function export.make_affix(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id)
 	if not (affix_type == "prefix" or affix_type == "suffix" or affix_type == "circumfix" or affix_type == "infix" or
-		affix_type == "interfix") then
+		affix_type == "interfix" or affix_type == "non-affix") then
 		error("Internal error: Invalid affix type " .. (affix_type or "(nil)"))
 	end
@@ Line 839: / Line 883: @@
 --[==[
-Implementation of {{tl|affix}} and {{tl|surface analysis}}. `data` contains all the information describing the affixes to
+Core categorization logic for affixes. This is shared between show_affix(), show_compound_like() and
-be displayed, and contains the following:
+get_affix_categories_only(). Returns the categories array and other metadata needed for formatting.
-* `.lang` ('''required'''): Overall language object. Different from term-specific language objects (see `.parts` below).
-* `.sc`: Overall script object (usually omitted). Different from term-specific script objects.
-* `.parts` ('''required'''): List of objects describing the affixes to show. The general format of each object is as would
-           be passed to `full_link()`, except that the `.lang` field should be missing unless the term is of a language
-		   different from the overall `.lang` value (in such a case, the language name is shown along with the term and
-		   an additional "derived from" category is added). '''WARNING''': The data in `.parts` will be destructively
-		   modified.
-* `.pos`: Overall part of speech (used in categories, defaults to {"terms"}). Different from term-specific part of speech.
-* `.sort_key`: Overall sort key. Normally omitted except e.g. in Japanese.
-* `.type`: Type of compound, if the parts in `.parts` describe a compound. Strictly optional, and if supplied, the
-		   compound type is displayed before the parts (normally capitalized, unless `.nocap` is given).
-* `.nocap`: Don't capitalize the first letter of text displayed before the parts (relevant only if `.type` or
-		    `.surface_analysis` is given).
-* `.notext`: Don't display any text before the parts (relevant only if `.type` or `.surface_analysis` is given).
-* `.nocat`: Disable all categorization.
-* `.lit`: Overall literal definition. Different from term-specific literal definitions.
-* `.force_cat`: Always display categories, even on userspace pages.
-* `.surface_analysis`: Implement {{surface analysis}}; adds `By surface analysis, ` before the parts.
-'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`.
 ]==]
-function export.show_affix(data)
+local function generate_affix_categories(data)
 	data.pos = data.pos or default_pos
 	data.pos = pluralize(data.pos)
@@ Line 872: / Line 895: @@
 	-- Process each part
-	local parts_formatted = {}
 	local whole_words = 0
 	local is_affix_or_compound = false
@@ Line 882: / Line 904: @@
 		data.parts[i] = part
 		canonicalize_part(part, data.lang, data.sc)
 		-- Determine affix type and get link and display terms (see text at top of file). Store them in the part
 		-- (in fields that won't clash with fields used by full_link() in [[Module:links]] or link_term()), so they
 		-- can be used in the loop below when categorizing.
 		part.affix_type, part.affix_link_term, part.affix_display_term = parse_term_for_affixes(part.term,
-			part.lang, part.sc, nil, not part.alt, nil, part.id)
+			part.lang, part.sc, part.type, not part.alt, nil, part.id)
 		-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with inline
@@ Line 895: / Line 916: @@
 		-- redundant alt text.
 		part.alt = part.alt or (part.affix_display_term ~= part.affix_link_term and part.affix_display_term) or nil
-		-- Make a link for the part.
-		table.insert(parts_formatted, export.link_term(part, data))
 	end
-	-- Now do categorization.
+	if not data.noaffixcat then
-	for i, part in ipairs_with_gaps(data.parts) do
+		-- Now do categorization.
-		local affix_type = part.affix_type
+		for i, part in ipairs_with_gaps(data.parts) do
-		if affix_type then
+			local affix_type = part.affix_type
-			is_affix_or_compound = true
+			if affix_type ~= "non-affix" then
-			-- We cannot distinguish interfixes from infixes by appearance. Prefer interfixes; infixes will need to
+				is_affix_or_compound = true
-			-- use {{infix}}.
-			if affix_type == "infix" then affix_type = "interfix" end
+				-- Make a sort key. For the first part, use the second part as the sort key; the intention is that if the
+				-- term has a prefix, sorting by the prefix won't be very useful so we sort by what follows, which is
-			-- Make a sort key. For the first part, use the second part as the sort key; the intention is that if the
+				-- presumably the root.
-			-- term has a prefix, sorting by the prefix won't be very useful so we sort by what follows, which is
+				local part_sort_base = nil
-			-- presumably the root.
+				local part_sort = part.sort or data.sort_key
-			local part_sort_base = nil
-			local part_sort = part.sort or data.sort_key
+				if i == 1 and data.parts[2] and data.parts[2].term then
+					local part2 = data.parts[2]
-			if i == 1 and data.parts[2] and data.parts[2].term then
+					-- If the second-part link term is empty, the user requested an unlinked term; avoid a wikitext error
-				local part2 = data.parts[2]
+					-- by using the alt value if available.
-				-- If the second-part link term is empty, the user requested an unlinked term; avoid a wikitext error
+					part_sort_base = ine(part2.affix_link_term) or ine(part2.alt)
-				-- by using the alt value if available.
+					if part_sort_base then
-				part_sort_base = ine(part2.affix_link_term) or ine(part2.alt)
+						part_sort_base = strip_diacritics_no_links(part2.lang, part_sort_base)
-				if part_sort_base then
+					end
-					part_sort_base = make_entry_name_no_links(part2.lang, part_sort_base)
+				end
+				if part.pos and rfind(part.pos, "patronym") then
+					table.insert(categories, {cat = "patronymics", sort_key = part_sort, sort_base = part_sort_base})
+				end
+				if data.pos ~= "terms" and part.pos and rfind(part.pos, "diminutive") then
+					table.insert(categories, {cat = "diminutive " .. data.pos, sort_key = part_sort,
+						sort_base = part_sort_base})
+				end
+				-- Don't add a '*fixed with' category if the link term is empty or is in a different language.
+				if ine(part.affix_link_term) and not part.part_lang then
+					table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " ..
+						strip_diacritics_no_links(part.lang, part.affix_link_term) ..
+							(part.id and " (" .. part.id .. ")" or ""),
+						sort_key = part_sort, sort_base = part_sort_base})
+				end
+			else
+				whole_words = whole_words + 1
+				if whole_words == 2 then
+					is_affix_or_compound = true
+					table.insert(categories, "compound " .. data.pos)
 				end
 			end
+		end
+		-- Make sure there was either an affix or a compound (two or more non-affix terms).
+		if not is_affix_or_compound and not data.allow_no_affixes_or_compounds then
+			error("The parameters did not include any affixes, and the term is not a compound. Please provide at least one affix.")
+		end
+	end
-			if part.pos and rfind(part.pos, "patronym") then
+	return text_sections, categories, borrowing_type
-				table.insert(categories, {cat = "patronymics", sort_key = part_sort, sort_base = part_sort_base})
+end
-			end
-			if data.pos ~= "terms" and part.pos and rfind(part.pos, "diminutive") then
+--[==[
-				table.insert(categories, {cat = "diminutive " .. data.pos, sort_key = part_sort,
+Implementation of {{tl|affix}} and {{tl|surface analysis}}. `data` contains all the information describing the affixes to
-					sort_base = part_sort_base})
+be displayed, and contains the following:
-			end
-			-- Don't add a '*fixed with' category if the link term is empty or is in a different language.
+* `.lang` ('''required'''): Overall language object. Different from term-specific language objects (see `.parts` below).
-			if ine(part.affix_link_term) and not part.part_lang then
+* `.sc`: Overall script object (usually omitted). Different from term-specific script objects.
-				table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " ..
+* `.parts` ('''required'''): List of objects describing the affixes to show. The general format of each object is as would
-					make_entry_name_no_links(part.lang, part.affix_link_term) ..
+           be passed to `full_link()`, except that the `.lang` field should be missing unless the term is of a language
-						(part.id and " (" .. part.id .. ")" or ""),
+		   different from the overall `.lang` value (in such a case, the language name is shown along with the term and
-					sort_key = part_sort, sort_base = part_sort_base})
+		   an additional "derived from" category is added). '''WARNING''': The data in `.parts` will be destructively
-			end
+		   modified.
-		else
+* `.pos`: Overall part of speech (used in categories, defaults to {"terms"}). Different from term-specific part of speech.
-			whole_words = whole_words + 1
+* `.sort_key`: Overall sort key. Normally omitted except e.g. in Japanese.
+* `.type`: Type of compound, if the parts in `.parts` describe a compound. Strictly optional, and if supplied, the
+		   compound type is displayed before the parts (normally capitalized, unless `.nocap` is given).
+* `.nocap`: Don't capitalize the first letter of text displayed before the parts (relevant only if `.type` or
+		    `.surface_analysis` is given).
+* `.notext`: Don't display any text before the parts (relevant only if `.type` or `.surface_analysis` is given).
+* `.nocat`: Disable all categorization.
+* `.noaffixcat`: Disable affix (and compound) categorization. Relevant for e.g. blends, which may otherwise
+                 be incorrectly categorized as compound terms.
+* `.lit`: Overall literal definition. Different from term-specific literal definitions.
+* `.force_cat`: Always display categories, even on userspace pages.
+* `.surface_analysis`: Implement {{surface analysis}}; adds `By surface analysis, ` before the parts.
-			if whole_words == 2 then
+'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`.
-				is_affix_or_compound = true
+]==]
-				table.insert(categories, "compound " .. data.pos)
+function export.show_affix(data)
-			end
+	local text_sections, categories, borrowing_type = generate_affix_categories(data)
-		end
-	end
-	-- Make sure there was either an affix or a compound (two or more regular terms).
+	-- Process each part for display
-	if not is_affix_or_compound then
+	local parts_formatted = {}
-		error("The parameters did not include any affixes, and the term is not a compound. Please provide at least one affix.")
+	for i, part in ipairs_with_gaps(data.parts) do
+		-- Make a link for the part
+		table.insert(parts_formatted, export.link_term(part, data, "include_separator"))
 	end
@@ Line 961: / Line 1,018: @@
 			text = ucfirst(text)
 		end
 		table.insert(text_sections, 1, text)
 	end
 	table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,
-		categories = categories })
+		categories = categories, separator_already_added = true })
 	return table.concat(text_sections)
+end
+--[==[
+Get only the categories that would be generated by show_affix(), without any text output or formatting.
+This is used by Module:etymon to get affix categorization.
+Returns an array of category objects, where
+each entry is either a string (simple category name) or a table with keys `cat`, `sort_key`,
+and `sort_base` for more complex categorization.
+`data` should have the same structure as passed to show_affix():
+* `.lang` (required): Overall language object
+* `.parts` (required): Array of affix part objects with `.term`, `.lang`, `.id`, etc.
+* `.pos`: Part of speech (defaults to "terms")
+* `.sort_key`: Overall sort key for categories
+'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`.
+]==]
+function export.get_affix_categories_only(data)
+	local text_sections, categories, borrowing_type = generate_affix_categories(data)
+	return categories
 end
@@ Line 973: / Line 1,049: @@
 function export.show_surface_analysis(data)
 	data.surface_analysis = true
+	data.allow_no_affixes_or_compounds = true
 	return export.show_affix(data)
 end
@@ Line 983: / Line 1,060: @@
 ]==]
 function export.show_compound(data)
+	local data_for_cats = m_table.shallowCopy(data)
+	data_for_cats.parts = {}
+	for k, part in pairs(data.parts) do
+		data_for_cats.parts[k] = m_table.shallowCopy(part)
+	end
+	data_for_cats.allow_no_affixes_or_compounds = true
+	local categories = export.get_affix_categories_only(data_for_cats)
 	data.pos = data.pos or default_pos
 	data.pos = pluralize(data.pos)
-	local text_sections, categories, borrowing_type =
+	local text_sections, _, borrowing_type =
 		process_etymology_type(data.type, data.nocap, data.notext, #data.parts > 0)
 	data.borrowing_type = borrowing_type
 	local parts_formatted = {}
 	table.insert(categories, "compound " .. data.pos)
@@ Line 999: / Line 1,084: @@
 		-- Determine affix type and get link and display terms (see text at top of file).
 		local affix_type, link_term, display_term = parse_term_for_affixes(part.term, part.lang, part.sc,
-			nil, not part.alt, nil, part.id)
+			part.type, not part.alt, nil, part.id)
-		-- If the term is an infix, recognize it as such (which means e.g. that we will display the term without
+		-- If the term is an interfix or the type was explicitly given, recognize it as such (which means e.g. that we
-		-- hyphens for East Asian languages). Otherwise, ignore the fact that it looks like an affix and display as
+		-- will display the term without hyphens for East Asian languages). Otherwise, ignore the fact that it looks
-		-- specified in the template (but pay attention to the detected affix type for certain tracking purposes).
+		-- like an affix and display as specified in the template (but pay attention to the detected affix type for
-		if affix_type == "infix" then
+		-- certain tracking purposes).
+		if affix_type == "interfix" or (part.type and part.type ~= "non-affix") then
 			-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with
 			-- inline modifiers. The intention in either case is not to link the term. Don't add a '*fixed with'
@@ Line 1,011: / Line 1,097: @@
 			-- redundant alt text.
 			if link_term and link_term ~= "" and not part.part_lang then
-				table.insert(categories, {cat = data.pos .. " interfixed with " .. make_entry_name_no_links(part.lang,
+				table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " ..
-					link_term), sort_key = part.sort or data.sort_key})
+					strip_diacritics_no_links(part.lang, link_term), sort_key = part.sort or data.sort_key})
 			end
 			part.term = link_term ~= "" and link_term or nil
 			part.alt = part.alt or (display_term ~= link_term and display_term) or nil
 		else
-			if affix_type then
+			if affix_type ~= "non-affix" then
 				local langcode = data.lang:getCode()
+				-- If `data.lang` is an etymology-only language, track both using its code and its full parent's code.
+				track { affix_type, affix_type .. "/lang/" .. langcode }
 				local full_langcode = data.lang:getFullCode()
+				if langcode ~= full_langcode then
+					track(affix_type .. "/lang/" .. full_langcode)
+				end
 			else
 				whole_words = whole_words + 1
 			end
 		end
-		table.insert(parts_formatted, export.link_term(part, data))
+		table.insert(parts_formatted, export.link_term(part, data, "include_separator"))
+	end
+	if whole_words == 1 then
+		track("one whole word")
+	elseif whole_words == 0 then
+		track("looks like confix")
 	end
 	table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,
-		categories = categories })
+		categories = categories, separator_already_added = true })
 	return table.concat(text_sections)
 end
@@ Line 1,039: / Line 1,136: @@
 ]==]
 function export.show_compound_like(data)
-	local parts_formatted = {}
+	data.allow_no_affixes_or_compounds = true
-	local categories = {}
+	local text_sections, categories, borrowing_type = generate_affix_categories(data)
 	if data.cat then
@@ Line 1,046: / Line 1,143: @@
 	end
-	-- Make links out of all the parts
+	-- Process each part for display
-	for i, part in ipairs(data.parts) do
+	local parts_formatted = {}
-		canonicalize_part(part, data.lang, data.sc)
+	for i, part in ipairs_with_gaps(data.parts) do
-		table.insert(parts_formatted, export.link_term(part, data))
+		-- Make a link for the part
+		table.insert(parts_formatted, export.link_term(part, data, "include_separator"))
 	end
-	local text_sections = {}
+	if #data.parts > 0 and data.oftext then
+		table.insert(text_sections, 1, " " .. data.oftext .. " ")
+	end
 	if data.text then
-		table.insert(text_sections, data.text)
+		table.insert(text_sections, 1, data.text)
-	end
-	if #data.parts > 0 and data.oftext then
-		table.insert(text_sections, " ")
-		table.insert(text_sections, data.oftext)
-		table.insert(text_sections, " ")
 	end
 	table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted,
-		categories = categories })
+		categories = categories, separator_already_added = true })
 	return table.concat(text_sections)
 end
@@ Line 1,098: / Line 1,194: @@
 	part.ts = export.make_affix(part.ts, part.lang, Latn, affix_type)
 end
+local function track_wrong_affix_type(template, part, expected_affix_type)
+	if part and not part.type then
+		local affix_type = parse_term_for_affixes(part.term, part.lang, part.sc)
+		if affix_type ~= expected_affix_type then
+			local part_name = expected_affix_type or "base"
+			local langcode = part.lang:getCode()
+			local full_langcode = part.lang:getFullCode()
+			require("Module:debug/track") {
+				template,
+				template .. "/" .. part_name,
+				template .. "/" .. part_name .. "/" .. (affix_type or "none"),
+				template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. langcode
+			}
+			-- If `part.lang` is an etymology-only language, track both using its code and its full parent's code.
+			if full_langcode ~= langcode then
+				require("Module:debug/track")(
+					template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. full_langcode
+				)
+			end
+		end
+	end
+end
 local function insert_affix_category(categories, pos, affix_type, part, sort_key, sort_base)
 	-- Don't add a '*fixed with' category if the link term is empty or is in a different language.
 	if part.term and not part.part_lang then
-		local cat = pos .. " " .. affix_type .. "ed with " .. make_entry_name_no_links(part.lang, part.term) ..
+		local cat = pos .. " " .. affix_type .. "ed with " .. strip_diacritics_no_links(part.lang, part.term) ..
 			(part.id and " (" .. part.id .. ")" or "")
 		if sort_key or sort_base then
@@ Line 1,126: / Line 1,247: @@
 	make_part_into_affix(data.prefix, data.lang, data.sc, "prefix")
 	make_part_into_affix(data.suffix, data.lang, data.sc, "suffix")
+	track_wrong_affix_type("circumfix", data.prefix, "prefix")
+	track_wrong_affix_type("circumfix", data.base, nil)
+	track_wrong_affix_type("circumfix", data.suffix, "suffix")
 	-- Create circumfix term.
@@ Line 1,143: / Line 1,268: @@
 	local sort_base
 	if data.base.term then
-		sort_base = make_entry_name_no_links(data.base.lang, data.base.term)
+		sort_base = strip_diacritics_no_links(data.base.lang, data.base.term)
 	end
@@ Line 1,152: / Line 1,277: @@
 	-- Insert the categories, but don't add a '*fixed with' category if the link term is in a different language.
 	if not data.prefix.part_lang then
-		table.insert(categories, {cat=data.pos .. " circumfixed with " .. make_entry_name_no_links(data.prefix.lang,
+		table.insert(categories, {cat=data.pos .. " circumfixed with " .. strip_diacritics_no_links(data.prefix.lang,
 			circumfix), sort_key=data.sort_key, sort_base=sort_base})
 	end
@@ Line 1,173: / Line 1,298: @@
 	make_part_into_affix(data.prefix, data.lang, data.sc, "prefix")
 	make_part_into_affix(data.suffix, data.lang, data.sc, "suffix")
+	track_wrong_affix_type("confix", data.prefix, "prefix")
+	track_wrong_affix_type("confix", data.base, nil)
+	track_wrong_affix_type("confix", data.suffix, "suffix")
 	-- Make links out of all the parts.
@@ Line 1,178: / Line 1,307: @@
 	local prefix_sort_base
 	if data.base and data.base.term then
-		prefix_sort_base = make_entry_name_no_links(data.base.lang, data.base.term)
+		prefix_sort_base = strip_diacritics_no_links(data.base.lang, data.base.term)
 	elseif data.suffix.term then
-		prefix_sort_base = make_entry_name_no_links(data.suffix.lang, data.suffix.term)
+		prefix_sort_base = strip_diacritics_no_links(data.suffix.lang, data.suffix.term)
 	end
@@ Line 1,213: / Line 1,342: @@
 	-- Hyphenate the affixes and apply any affix mappings.
 	make_part_into_affix(data.infix, data.lang, data.sc, "infix")
+	track_wrong_affix_type("infix", data.base, nil)
+	track_wrong_affix_type("infix", data.infix, "infix")
 	-- Make links out of all the parts.
@@ Line 1,243: / Line 1,375: @@
 		make_part_into_affix(prefix, data.lang, data.sc, "prefix")
 	end
+	for i, prefix in ipairs(data.prefixes) do
+		track_wrong_affix_type("prefix", prefix, "prefix")
+	end
+	track_wrong_affix_type("prefix", data.base, nil)
 	-- Make links out of all the parts.
@@ Line 1,252: / Line 1,390: @@
 		first_sort_base = ine(data.prefixes[2].term) or ine(data.prefixes[2].alt)
 		if first_sort_base then
-			first_sort_base = make_entry_name_no_links(data.prefixes[2].lang, first_sort_base)
+			first_sort_base = strip_diacritics_no_links(data.prefixes[2].lang, first_sort_base)
 		end
 	elseif data.base then
 		first_sort_base = ine(data.base.term) or ine(data.base.alt)
 		if first_sort_base then
-			first_sort_base = make_entry_name_no_links(data.base.lang, first_sort_base)
+			first_sort_base = strip_diacritics_no_links(data.base.lang, first_sort_base)
 		end
 	end
@@ Line 1,291: / Line 1,429: @@
 	for i, suffix in ipairs(data.suffixes) do
 		make_part_into_affix(suffix, data.lang, data.sc, "suffix")
+	end
+	track_wrong_affix_type("suffix", data.base, nil)
+	for i, suffix in ipairs(data.suffixes) do
+		track_wrong_affix_type("suffix", suffix, "suffix")
 	end