Module:affix: Difference between revisions
Created page with "local export = {} local debug_force_cat = false -- if set to true, always display categories even on userspace pages local m_links = require("Module:links") local m_str_utils = require("Module:string utilities") local m_table = require("Module:table") local etymology_module = "Module:etymology" local pron_qualifier_module = "Module:pron qualifier" local scripts_module = "Module:scripts" local utilities_module = "Module:utilities" -- Export this so the category code in..." |
No edit summary |
||
| (16 intermediate revisions by the same user not shown) | |||
| Line 6: | Line 6: | ||
local m_str_utils = require("Module:string utilities") | local m_str_utils = require("Module:string utilities") | ||
local m_table = require("Module:table") | local m_table = require("Module:table") | ||
local en_utilities_module = "Module:en-utilities" | |||
local etymology_module = "Module:etymology" | local etymology_module = "Module:etymology" | ||
local pron_qualifier_module = "Module:pron qualifier" | local pron_qualifier_module = "Module:pron qualifier" | ||
local scripts_module = "Module:scripts" | local scripts_module = "Module:scripts" | ||
local utilities_module = "Module:utilities" | local utilities_module = "Module:utilities" | ||
-- Export this so the category code in [[Module:category tree/ | -- Export this so the category code in [[Module:category tree/etymology]] can access it. | ||
export.affix_lang_data_module_prefix = "Module:affix/lang-data/" | export.affix_lang_data_module_prefix = "Module:affix/lang-data/" | ||
| Line 18: | Line 19: | ||
local rfind = m_str_utils.find | local rfind = m_str_utils.find | ||
local rmatch = m_str_utils.match | local rmatch = m_str_utils.match | ||
local pluralize = | local pluralize = require(en_utilities_module).pluralize | ||
local u = m_str_utils.char | local u = m_str_utils.char | ||
local ucfirst = m_str_utils.ucfirst | local ucfirst = m_str_utils.ucfirst | ||
local unpack = unpack or table.unpack -- Lua 5.2 compatibility | |||
-- Export this so the category code in [[Module:category tree/ | function export.affix_variants(canonical, variants) | ||
local mappings = {} | |||
for _, variant in ipairs(variants) do | |||
mappings[variant] = canonical | |||
end | |||
return mappings | |||
end | |||
function export.id_mapping(default, ids) | |||
local mapping = { default = default } | |||
if ids then | |||
for id, target in pairs(ids) do | |||
mapping[id] = target | |||
end | |||
end | |||
return mapping | |||
end | |||
function export.id_mapping_with_affix_variants(base, id_variants) | |||
local mappings = {} | |||
for id, variants in pairs(id_variants) do | |||
for _, variant in ipairs(variants) do | |||
mappings[variant] = export.id_mapping(base, {[id] = base}) | |||
end | |||
end | |||
return mappings | |||
end | |||
function export.merge_tables(...) | |||
local result = {} | |||
for i = 1, select('#', ...) do | |||
local t = select(i, ...) | |||
if t then | |||
for k, v in pairs(t) do | |||
result[k] = v | |||
end | |||
end | |||
end | |||
return result | |||
end | |||
-- Export this so the category code in [[Module:category tree/etymology]] can access it. | |||
export.langs_with_lang_specific_data = { | export.langs_with_lang_specific_data = { | ||
["az"] = true, | ["az"] = true, | ||
["fi"] = true, | ["fi"] = true, | ||
["fr"] = true, | |||
["izh"] = true, | ["izh"] = true, | ||
["la"] = true, | ["la"] = true, | ||
["sah"] = true, | ["sah"] = true, | ||
["tr"] = true, | ["tr"] = true, | ||
["trk-pro"] = true, | |||
} | } | ||
| Line 57: | Line 102: | ||
===About different types of affixes ("template", "display", "link", "lookup" and "category"):=== | ===About different types of affixes ("template", "display", "link", "lookup" and "category"):=== | ||
* A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has | * A "template affix" is an affix in its source form as it appears in a template call. Generally, a template affix has an | ||
attached template hyphen (see above) to indicate that it is an affix and indicate what type of affix it is (prefix, | |||
suffix, interfix or circumfix), but some of the older-style templates such as {{tl|suffix}}, {{tl|prefix}}, | |||
{{tl|confix}}, etc. have "positional" affixes where the presence of the affix in a certain position (e.g. the second | |||
or third parameter) indicates that it is a certain type of affix, whether or not it has an attached template hyphen. | |||
* A "display affix" is the corresponding affix as it is actually displayed to the user. The display affix may differ | * A "display affix" is the corresponding affix as it is actually displayed to the user. The display affix may differ | ||
from the template affix for various reasons: | from the template affix for various reasons: | ||
| Line 72: | Line 116: | ||
languages have differences between the "template hyphen" specified in the template (which always needs to be | languages have differences between the "template hyphen" specified in the template (which always needs to be | ||
specified somehow or other in templates like {{tl|affix}}, to indicate that the term is an affix and what type of | specified somehow or other in templates like {{tl|affix}}, to indicate that the term is an affix and what type of | ||
affix it is) and the display hyphen (see above), with corresponding differences between template and display affixes. | affix it is) and the display hyphen (see above), with corresponding differences between template and display | ||
affixes. | |||
* A (regular) "link affix" is the affix that is linked to when the affix is shown to the user. The link affix is usually | * A (regular) "link affix" is the affix that is linked to when the affix is shown to the user. The link affix is usually | ||
the same as the display affix, but will differ in one of three circumstances: | the same as the display affix, but will differ in one of three circumstances: | ||
| Line 78: | Line 123: | ||
inline modifiers or piped links, as described above under "display affix". | inline modifiers or piped links, as described above under "display affix". | ||
*# For certain languages, certain affixes are mapped to canonical form using language-specific mappings. For example, | *# For certain languages, certain affixes are mapped to canonical form using language-specific mappings. For example, | ||
in Finnish, the adjective-forming suffix | in Finnish, the adjective-forming suffix {{m|fi|-kas}} appears as {{m|fi|-käs}} after front vowels, but logically | ||
forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and | both forms are the same suffix and should be linked and categorized the same. Similarly, in Latin, the negative and | ||
intensive prefixes spelled | intensive prefixes spelled {{m|la|in-}} (etymologically two distinct prefixes) appear variously as {{m|la|il-}}, | ||
{{m|la|im-}} or {{m|la|ir-}} before certain consonants. Mappings are supplied in [[Module:affix/lang-data/LANGCODE]] | |||
to convert Finnish {{m|fi|-käs}} to {{m|fi|-kas}} for linking and categorization purposes. Note that the affixes in | |||
the mappings use "lookup hyphens" to indicate the different types of affixes, which is usually the same as the | |||
template hyphen but differs for Arabic scripts, because there are multiple possible template hyphens recognized but | |||
only one lookup hyphen (tatweel). The form of the affix as used to look up in the mapping tables is called the | |||
"lookup affix"; see below. | |||
* A "stripped link affix" is a link affix that has been passed through the language's ` | * A "stripped link affix" is a link affix that has been passed through the language's `stripDiacritics()` function, which | ||
may strip certain diacritics: e.g. macrons in Latin and Old English (indicating length); acute and grave accents in | may strip certain diacritics: e.g. macrons in Latin and Old English (indicating length); acute and grave accents in | ||
Russian and various other Slavic languages (indicating stress); vowel diacritics in most Arabic-script languages; and | Russian and various other Slavic languages (indicating stress); vowel diacritics in most Arabic-script languages; and | ||
| Line 99: | Line 144: | ||
link. | link. | ||
*# If no entry is found, the affix is then looked up in a modified link form (specifically, the modified display | *# If no entry is found, the affix is then looked up in a modified link form (specifically, the modified display | ||
form passed through the language's ` | form passed through the language's `stripDiacritics()` function, which strips out certain diacritics, but with the | ||
lookup hyphen re-added if it was stripped out, as in the case of tatweel in many Arabic-script languages). | lookup hyphen re-added if it was stripped out, as in the case of tatweel in many Arabic-script languages). | ||
The reason for this double lookup procedure is to allow for mappings that are sensitive to the extra diacritics, but | The reason for this double lookup procedure is to allow for mappings that are sensitive to the extra diacritics, but | ||
also allow for mappings that are not sensitive in this fashion (e.g. Russian | also allow for mappings that are not sensitive in this fashion (e.g. Russian {{m|ru|-ливый}} occurs both stressed and | ||
unstressed, but is the same prefix either way). | unstressed, but is the same prefix either way). | ||
* A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas]]. | * A "category affix" is the affix as it appears in categories such as [[:Category:Finnish terms suffixed with -kas| | ||
Category:Finnish terms suffixed with ''-kas'']]. The category affix is currently always the same as the stripped link | |||
affix. This means that for Arabic-script languages, it may or may not have a tatweel, even if the correponding display | |||
affix and regular link affix have a tatweel. As mentioned above, stripDiacritics() strips tatweel for Arabic, Persian | |||
and Urdu, but not for Ottoman Turkish. Hence affix categories for Arabic, Persian and Urdu will be missing the | |||
tatweel, but affix categories for Ottoman Turkish will have it. An additional complication is that if the template | |||
affix contains a ZWNJ, the display (and hence the link and category affixes) will have no hyphen attached in any case. | |||
]==] | ]==] | ||
| Line 137: | Line 182: | ||
local ZWNJ = u(0x200C) -- zero-width non-joiner | local ZWNJ = u(0x200C) -- zero-width non-joiner | ||
local template_hyphens = { | local template_hyphens = { | ||
-- This covers all Arabic scripts. See above. | |||
["Arab"] = "ـ" .. ZWNJ .. "-", -- tatweel + zero-width non-joiner + regular hyphen | ["Arab"] = "ـ" .. ZWNJ .. "-", -- tatweel + zero-width non-joiner + regular hyphen | ||
["Hebr"] = "־", -- Hebrew-specific hyphen termed "maqqef" | ["Hebr"] = "־", -- Hebrew-specific hyphen termed "maqqef" | ||
["Mong"] = "᠊", | ["Mong"] = "᠊", | ||
-- FIXME! What about the following right-to-left scripts? | -- FIXME! What about the following right-to-left scripts? | ||
-- Adlm (Adlam) | -- Adlm (Adlam) | ||
| Line 224: | Line 266: | ||
["Thaa"] = no_display_hyphen, | ["Thaa"] = no_display_hyphen, | ||
["Thai"] = no_display_hyphen, | ["Thai"] = no_display_hyphen, | ||
["Tibt"] = no_display_hyphen, | |||
} | } | ||
| Line 232: | Line 275: | ||
local function glossary_link(entry, text) | local function glossary_link(entry, text) | ||
text = text or entry | text = text or entry | ||
return "[[ | return "[[Appendix:Glossary#" .. entry .. "|" .. text .. "]]" | ||
end | |||
local function track(page) | |||
if type(page) == "table" then | |||
for i, pg in ipairs(page) do | |||
page[i] = "affix/" .. pg | |||
end | |||
else | |||
page = "affix/" .. page | |||
end | |||
require("Module:debug/track")(page) | |||
end | end | ||
| Line 401: | Line 456: | ||
for i, cat in ipairs(data.categories) do | for i, cat in ipairs(data.categories) do | ||
if type(cat) == "table" then | if type(cat) == "table" then | ||
data.categories[i] = require(utilities_module).format_categories( | data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat.cat, | ||
lang, cat.sort_key, cat.sort_base, force_cat) | lang, cat.sort_key, cat.sort_base, force_cat) | ||
else | else | ||
data.categories[i] = require(utilities_module).format_categories( | data.categories[i] = require(utilities_module).format_categories(lang:getFullName() .. " " .. cat, lang, | ||
data.data.sort_key, nil, force_cat) | data.data.sort_key, nil, force_cat) | ||
end | end | ||
| Line 410: | Line 465: | ||
cattext = table.concat(data.categories) | cattext = table.concat(data.categories) | ||
end | end | ||
local result = table.concat(data.parts_formatted, " +‎ ") .. (data.data.lit and ", literally " .. | local result = table.concat(data.parts_formatted, not data.separator_already_added and " +‎ " or nil) .. | ||
(data.data.lit and ", literally " .. m_links.mark(data.data.lit, "gloss") or "") | |||
local q = data.data.q | local q = data.data.q | ||
local qq = data.data.qq | local qq = data.data.qq | ||
local l = data.data.l | local l = data.data.l | ||
local ll = data.data.ll | local ll = data.data.ll | ||
if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] then | local infl = data.data.infl | ||
if q and q[1] or qq and qq[1] or l and l[1] or ll and ll[1] or infl and infl[1] then | |||
result = require(pron_qualifier_module).format_qualifiers { | result = require(pron_qualifier_module).format_qualifiers { | ||
lang = lang, | lang = lang, | ||
| Line 424: | Line 480: | ||
l = l, | l = l, | ||
ll = ll, | ll = ll, | ||
infl = infl, | |||
} | } | ||
end | end | ||
return result .. cattext | return result .. cattext | ||
end | end | ||
| Line 461: | Line 500: | ||
-- Remove links and call lang: | -- Remove links and call lang:stripDiacritics(term). | ||
local function | local function strip_diacritics_no_links(lang, term) | ||
return lang:stripDiacritics(m_links.remove_links(term)) | |||
return | |||
end | end | ||
| Line 503: | Line 541: | ||
to access information for constructing the categories added by `format_derived()`. | to access information for constructing the categories added by `format_derived()`. | ||
]==] | ]==] | ||
function export.link_term(part, data) | function export.link_term(part, data, include_separator) | ||
local result | local result | ||
| Line 509: | Line 547: | ||
result = require(etymology_module).format_derived { | result = require(etymology_module).format_derived { | ||
lang = data.lang, | lang = data.lang, | ||
terms = {part}, | |||
sources = {part.lang}, | |||
sort_key = data.sort_key, | sort_key = data.sort_key, | ||
nocat = data.nocat, | nocat = data.nocat, | ||
template_name = "affix", | |||
qualifiers_labels_on_outside = true, | |||
borrowing_type = data.borrowing_type, | borrowing_type = data.borrowing_type, | ||
force_cat = data.force_cat or debug_force_cat, | force_cat = data.force_cat or debug_force_cat, | ||
} | } | ||
else | else | ||
result = m_links.full_link(part, "term", nil, "show qualifiers") | |||
result = m_links.full_link(part, "term") | |||
end | end | ||
if | if include_separator and part.separator then | ||
part. | return part.separator .. result | ||
else | |||
return result | |||
end | end | ||
end | end | ||
| Line 612: | Line 643: | ||
end | end | ||
if | if affix_type == "non-affix" then | ||
return term | return term | ||
elseif affix_type == "circumfix" then | elseif affix_type == "circumfix" then | ||
| Line 689: | Line 720: | ||
if mapping then | if mapping then | ||
if type(mapping) == "table" then | if type(mapping) == "table" then | ||
mapping = mapping[affix_id or false] | mapping = mapping[affix_id] or mapping.default or mapping[affix_id or false] | ||
if mapping then | if mapping then | ||
return mapping | return mapping | ||
| Line 721: | Line 752: | ||
end | end | ||
return do_lookup(affix) or do_lookup(lang:stripDiacritics(affix)) or nil | |||
return do_lookup(affix) or do_lookup | |||
end | end | ||
| Line 728: | Line 758: | ||
--[==[ | --[==[ | ||
For a given template term in a given language (see the definition of "template affix" near the top of the file), | For a given template term in a given language (see the definition of "template affix" near the top of the file), | ||
possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"}, {" | possibly in an explicitly specified script `sc` (but usually nil), return the term's affix type ({"prefix"}, | ||
{"suffix"}, {"circumfix"} or { | {"interfix"}, {"suffix"}, {"circumfix"} or {"non-affix"}) along with the corresponding link and display affixes | ||
near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified). The term passed | (see definitions near the top of the file); also the corresponding lookup affix (if `return_lookup_affix` is specified). | ||
in should already have any fragment (after the # sign) parsed off of it. Four values are returned: `affix_type`, | The term passed in should already have any fragment (after the # sign) parsed off of it. Four values are returned: | ||
`link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected | `affix_type`, `link_term`, `display_term` and `lookup_term`. The affix type can be passed in instead of autodetected; in | ||
this case, the template term need not have any attached hyphens, and the appropriate hyphens will be added in the | |||
hyphens will be added in the appropriate places. If `do_affix_mapping` is specified, look up the affix in the | appropriate places. If `do_affix_mapping` is specified, look up the affix in the lang-specific affix mappings, as | ||
lang-specific affix mappings, as described in the comment at the top of the file; otherwise, the link and display terms | described in the comment at the top of the file; otherwise, the link and display terms will always be the same. (They | ||
will always be the same. (They will be the same in any case if the template term has a bracketed link in it or is not | will be the same in any case if the template term has a bracketed link in it or is not an affix.) If | ||
an affix.) If `return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens | `return_lookup_affix` is given, the fourth return value contains the term with appropriate lookup hyphens in the | ||
in the appropriate places; otherwise, it is the same as the display term. (This functionality is used in | appropriate places; otherwise, it is the same as the display term. (This functionality is used in | ||
[[Module:category tree | [[Module:category tree/affixes and compounds]] to convert link affixes into lookup affixes so that they can be looked up | ||
they can be looked up in the affix mapping tables.) | in the affix mapping tables.) | ||
]==] | ]==] | ||
local function parse_term_for_affixes(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id) | local function parse_term_for_affixes(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id) | ||
if not term then | if not term then | ||
return | return "non-affix", nil, nil, nil | ||
end | end | ||
if term == "^" then | |||
-- Indicates a null term to emulate the behavior of {{suffix|foo||bar}}. | |||
term = "" | |||
return "non-affix", term, term, term | |||
end | |||
if term:find("^%^") then | if term:find("^%^") then | ||
-- | -- HACK! ^ at the beginning of Korean languages has a special meaning, triggering capitalization of the | ||
-- transliteration. Don't interpret it as "force non-affix" for those languages. | |||
local langcode = lang:getCode() | |||
if langcode ~= "ko" and langcode ~= "okm" and langcode ~= "jje" then | |||
-- Formerly we allowed ^ to force non-affix type; this is now handled using an inline modifier | |||
-- <naf>, <root>, etc. Throw an error for the moment when the old way is encountered. | |||
error("Use of ^ to force non-affix status is no longer supported; use an inline modifier <naf> or <root> " .. | |||
"after the component") | |||
end | |||
end | end | ||
| Line 763: | Line 805: | ||
thyph = "([" .. thyph .. "])" | thyph = "([" .. thyph .. "])" | ||
if affix_type | if not affix_type then | ||
if rfind(term, thyph .. " " .. thyph) then | if rfind(term, thyph .. " " .. thyph) then | ||
affix_type = "circumfix" | affix_type = "circumfix" | ||
| Line 770: | Line 812: | ||
local has_ending_hyphen = rfind(term, thyph .. "$") | local has_ending_hyphen = rfind(term, thyph .. "$") | ||
if has_beginning_hyphen and has_ending_hyphen then | if has_beginning_hyphen and has_ending_hyphen then | ||
affix_type = " | affix_type = "interfix" | ||
elseif has_ending_hyphen then | elseif has_ending_hyphen then | ||
affix_type = "prefix" | affix_type = "prefix" | ||
elseif has_beginning_hyphen then | elseif has_beginning_hyphen then | ||
affix_type = "suffix" | affix_type = "suffix" | ||
else | |||
affix_type = "non-affix" | |||
end | end | ||
end | end | ||
| Line 780: | Line 824: | ||
local link_term, display_term, lookup_term | local link_term, display_term, lookup_term | ||
if affix_type then | if affix_type == "non-affix" then | ||
link_term = term | |||
display_term = term | |||
lookup_term = term | |||
else | |||
display_term = reconstruct_term_per_hyphens(term, affix_type, scode, thyph, dhyph) | display_term = reconstruct_term_per_hyphens(term, affix_type, scode, thyph, dhyph) | ||
if do_affix_mapping then | if do_affix_mapping then | ||
| Line 800: | Line 848: | ||
lookup_term = display_term | lookup_term = display_term | ||
end | end | ||
end | end | ||
| Line 819: | Line 863: | ||
is of the wrong type). Three values are returned: the link term, display term and lookup term. This function is a thin | is of the wrong type). Three values are returned: the link term, display term and lookup term. This function is a thin | ||
wrapper around `parse_term_for_affixes`; see the comments above that function for more information. Note that this | wrapper around `parse_term_for_affixes`; see the comments above that function for more information. Note that this | ||
function is exposed externally because it is called by [[Module:category tree | function is exposed externally because it is called by [[Module:category tree/affixes and compounds]]; see the comment | ||
see the comment in `parse_term_for_affixes` for more information. | in `parse_term_for_affixes` for more information. | ||
]==] | ]==] | ||
function export.make_affix(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id) | function export.make_affix(term, lang, sc, affix_type, do_affix_mapping, return_lookup_affix, affix_id) | ||
if not (affix_type == "prefix" or affix_type == "suffix" or affix_type == "circumfix" or affix_type == "infix" or | if not (affix_type == "prefix" or affix_type == "suffix" or affix_type == "circumfix" or affix_type == "infix" or | ||
affix_type == "interfix") then | affix_type == "interfix" or affix_type == "non-affix") then | ||
error("Internal error: Invalid affix type " .. (affix_type or "(nil)")) | error("Internal error: Invalid affix type " .. (affix_type or "(nil)")) | ||
end | end | ||
| Line 839: | Line 883: | ||
--[==[ | --[==[ | ||
Core categorization logic for affixes. This is shared between show_affix(), show_compound_like() and | |||
get_affix_categories_only(). Returns the categories array and other metadata needed for formatting. | |||
]==] | ]==] | ||
function | local function generate_affix_categories(data) | ||
data.pos = data.pos or default_pos | data.pos = data.pos or default_pos | ||
data.pos = pluralize(data.pos) | data.pos = pluralize(data.pos) | ||
| Line 872: | Line 895: | ||
-- Process each part | -- Process each part | ||
local whole_words = 0 | local whole_words = 0 | ||
local is_affix_or_compound = false | local is_affix_or_compound = false | ||
| Line 882: | Line 904: | ||
data.parts[i] = part | data.parts[i] = part | ||
canonicalize_part(part, data.lang, data.sc) | canonicalize_part(part, data.lang, data.sc) | ||
-- Determine affix type and get link and display terms (see text at top of file). Store them in the part | -- Determine affix type and get link and display terms (see text at top of file). Store them in the part | ||
-- (in fields that won't clash with fields used by full_link() in [[Module:links]] or link_term()), so they | -- (in fields that won't clash with fields used by full_link() in [[Module:links]] or link_term()), so they | ||
-- can be used in the loop below when categorizing. | -- can be used in the loop below when categorizing. | ||
part.affix_type, part.affix_link_term, part.affix_display_term = parse_term_for_affixes(part.term, | part.affix_type, part.affix_link_term, part.affix_display_term = parse_term_for_affixes(part.term, | ||
part.lang, part.sc, | part.lang, part.sc, part.type, not part.alt, nil, part.id) | ||
-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with inline | -- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with inline | ||
| Line 895: | Line 916: | ||
-- redundant alt text. | -- redundant alt text. | ||
part.alt = part.alt or (part.affix_display_term ~= part.affix_link_term and part.affix_display_term) or nil | part.alt = part.alt or (part.affix_display_term ~= part.affix_link_term and part.affix_display_term) or nil | ||
end | end | ||
-- Now do categorization. | if not data.noaffixcat then | ||
-- Now do categorization. | |||
for i, part in ipairs_with_gaps(data.parts) do | |||
local affix_type = part.affix_type | |||
if affix_type ~= "non-affix" then | |||
is_affix_or_compound = true | |||
if affix_type | -- Make a sort key. For the first part, use the second part as the sort key; the intention is that if the | ||
-- term has a prefix, sorting by the prefix won't be very useful so we sort by what follows, which is | |||
-- presumably the root. | |||
local part_sort_base = nil | |||
local part_sort = part.sort or data.sort_key | |||
if i == 1 and data.parts[2] and data.parts[2].term then | |||
local part2 = data.parts[2] | |||
-- If the second-part link term is empty, the user requested an unlinked term; avoid a wikitext error | |||
-- by using the alt value if available. | |||
part_sort_base = ine(part2.affix_link_term) or ine(part2.alt) | |||
if part_sort_base then | |||
part_sort_base = strip_diacritics_no_links(part2.lang, part_sort_base) | |||
if part_sort_base then | end | ||
part_sort_base = | end | ||
if part.pos and rfind(part.pos, "patronym") then | |||
table.insert(categories, {cat = "patronymics", sort_key = part_sort, sort_base = part_sort_base}) | |||
end | |||
if data.pos ~= "terms" and part.pos and rfind(part.pos, "diminutive") then | |||
table.insert(categories, {cat = "diminutive " .. data.pos, sort_key = part_sort, | |||
sort_base = part_sort_base}) | |||
end | |||
-- Don't add a '*fixed with' category if the link term is empty or is in a different language. | |||
if ine(part.affix_link_term) and not part.part_lang then | |||
table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " .. | |||
strip_diacritics_no_links(part.lang, part.affix_link_term) .. | |||
(part.id and " (" .. part.id .. ")" or ""), | |||
sort_key = part_sort, sort_base = part_sort_base}) | |||
end | |||
else | |||
whole_words = whole_words + 1 | |||
if whole_words == 2 then | |||
is_affix_or_compound = true | |||
table.insert(categories, "compound " .. data.pos) | |||
end | end | ||
end | end | ||
end | |||
-- Make sure there was either an affix or a compound (two or more non-affix terms). | |||
if not is_affix_or_compound and not data.allow_no_affixes_or_compounds then | |||
error("The parameters did not include any affixes, and the term is not a compound. Please provide at least one affix.") | |||
end | |||
end | |||
return text_sections, categories, borrowing_type | |||
end | |||
--[==[ | |||
Implementation of {{tl|affix}} and {{tl|surface analysis}}. `data` contains all the information describing the affixes to | |||
be displayed, and contains the following: | |||
* `.lang` ('''required'''): Overall language object. Different from term-specific language objects (see `.parts` below). | |||
* `.sc`: Overall script object (usually omitted). Different from term-specific script objects. | |||
* `.parts` ('''required'''): List of objects describing the affixes to show. The general format of each object is as would | |||
be passed to `full_link()`, except that the `.lang` field should be missing unless the term is of a language | |||
different from the overall `.lang` value (in such a case, the language name is shown along with the term and | |||
an additional "derived from" category is added). '''WARNING''': The data in `.parts` will be destructively | |||
modified. | |||
* `.pos`: Overall part of speech (used in categories, defaults to {"terms"}). Different from term-specific part of speech. | |||
* `.sort_key`: Overall sort key. Normally omitted except e.g. in Japanese. | |||
* `.type`: Type of compound, if the parts in `.parts` describe a compound. Strictly optional, and if supplied, the | |||
compound type is displayed before the parts (normally capitalized, unless `.nocap` is given). | |||
* `.nocap`: Don't capitalize the first letter of text displayed before the parts (relevant only if `.type` or | |||
`.surface_analysis` is given). | |||
* `.notext`: Don't display any text before the parts (relevant only if `.type` or `.surface_analysis` is given). | |||
* `.nocat`: Disable all categorization. | |||
* `.noaffixcat`: Disable affix (and compound) categorization. Relevant for e.g. blends, which may otherwise | |||
be incorrectly categorized as compound terms. | |||
* `.lit`: Overall literal definition. Different from term-specific literal definitions. | |||
* `.force_cat`: Always display categories, even on userspace pages. | |||
* `.surface_analysis`: Implement {{surface analysis}}; adds `By surface analysis, ` before the parts. | |||
'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`. | |||
]==] | |||
function export.show_affix(data) | |||
local text_sections, categories, borrowing_type = generate_affix_categories(data) | |||
-- | -- Process each part for display | ||
local parts_formatted = {} | |||
for i, part in ipairs_with_gaps(data.parts) do | |||
-- Make a link for the part | |||
table.insert(parts_formatted, export.link_term(part, data, "include_separator")) | |||
end | end | ||
| Line 961: | Line 1,018: | ||
text = ucfirst(text) | text = ucfirst(text) | ||
end | end | ||
table.insert(text_sections, 1, text) | table.insert(text_sections, 1, text) | ||
end | end | ||
table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | ||
categories = categories }) | categories = categories, separator_already_added = true }) | ||
return table.concat(text_sections) | return table.concat(text_sections) | ||
end | |||
--[==[ | |||
Get only the categories that would be generated by show_affix(), without any text output or formatting. | |||
This is used by Module:etymon to get affix categorization. | |||
Returns an array of category objects, where | |||
each entry is either a string (simple category name) or a table with keys `cat`, `sort_key`, | |||
and `sort_base` for more complex categorization. | |||
`data` should have the same structure as passed to show_affix(): | |||
* `.lang` (required): Overall language object | |||
* `.parts` (required): Array of affix part objects with `.term`, `.lang`, `.id`, etc. | |||
* `.pos`: Part of speech (defaults to "terms") | |||
* `.sort_key`: Overall sort key for categories | |||
'''WARNING''': This destructively modifies both `data` and the individual structures within `.parts`. | |||
]==] | |||
function export.get_affix_categories_only(data) | |||
local text_sections, categories, borrowing_type = generate_affix_categories(data) | |||
return categories | |||
end | end | ||
| Line 973: | Line 1,049: | ||
function export.show_surface_analysis(data) | function export.show_surface_analysis(data) | ||
data.surface_analysis = true | data.surface_analysis = true | ||
data.allow_no_affixes_or_compounds = true | |||
return export.show_affix(data) | return export.show_affix(data) | ||
end | end | ||
| Line 983: | Line 1,060: | ||
]==] | ]==] | ||
function export.show_compound(data) | function export.show_compound(data) | ||
local data_for_cats = m_table.shallowCopy(data) | |||
data_for_cats.parts = {} | |||
for k, part in pairs(data.parts) do | |||
data_for_cats.parts[k] = m_table.shallowCopy(part) | |||
end | |||
data_for_cats.allow_no_affixes_or_compounds = true | |||
local categories = export.get_affix_categories_only(data_for_cats) | |||
data.pos = data.pos or default_pos | data.pos = data.pos or default_pos | ||
data.pos = pluralize(data.pos) | data.pos = pluralize(data.pos) | ||
local text_sections, | local text_sections, _, borrowing_type = | ||
process_etymology_type(data.type, data.nocap, data.notext, #data.parts > 0) | process_etymology_type(data.type, data.nocap, data.notext, #data.parts > 0) | ||
data.borrowing_type = borrowing_type | data.borrowing_type = borrowing_type | ||
local parts_formatted = {} | local parts_formatted = {} | ||
table.insert(categories, "compound " .. data.pos) | table.insert(categories, "compound " .. data.pos) | ||
| Line 999: | Line 1,084: | ||
-- Determine affix type and get link and display terms (see text at top of file). | -- Determine affix type and get link and display terms (see text at top of file). | ||
local affix_type, link_term, display_term = parse_term_for_affixes(part.term, part.lang, part.sc, | local affix_type, link_term, display_term = parse_term_for_affixes(part.term, part.lang, part.sc, | ||
part.type, not part.alt, nil, part.id) | |||
-- If the term is an | -- If the term is an interfix or the type was explicitly given, recognize it as such (which means e.g. that we | ||
-- will display the term without hyphens for East Asian languages). Otherwise, ignore the fact that it looks | |||
-- like an affix and display as specified in the template (but pay attention to the detected affix type for | |||
if affix_type == " | -- certain tracking purposes). | ||
if affix_type == "interfix" or (part.type and part.type ~= "non-affix") then | |||
-- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with | -- If link_term is an empty string, either a bare ^ was specified or an empty term was used along with | ||
-- inline modifiers. The intention in either case is not to link the term. Don't add a '*fixed with' | -- inline modifiers. The intention in either case is not to link the term. Don't add a '*fixed with' | ||
| Line 1,011: | Line 1,097: | ||
-- redundant alt text. | -- redundant alt text. | ||
if link_term and link_term ~= "" and not part.part_lang then | if link_term and link_term ~= "" and not part.part_lang then | ||
table.insert(categories, {cat = data.pos .. " | table.insert(categories, {cat = data.pos .. " " .. affix_type .. "ed with " .. | ||
strip_diacritics_no_links(part.lang, link_term), sort_key = part.sort or data.sort_key}) | |||
end | end | ||
part.term = link_term ~= "" and link_term or nil | part.term = link_term ~= "" and link_term or nil | ||
part.alt = part.alt or (display_term ~= link_term and display_term) or nil | part.alt = part.alt or (display_term ~= link_term and display_term) or nil | ||
else | else | ||
if affix_type then | if affix_type ~= "non-affix" then | ||
local langcode = data.lang:getCode() | local langcode = data.lang:getCode() | ||
-- If `data.lang` is an etymology-only language, track both using its code and its full parent's code. | |||
track { affix_type, affix_type .. "/lang/" .. langcode } | |||
local full_langcode = data.lang:getFullCode() | local full_langcode = data.lang:getFullCode() | ||
if langcode ~= full_langcode then | |||
track(affix_type .. "/lang/" .. full_langcode) | |||
end | |||
else | else | ||
whole_words = whole_words + 1 | whole_words = whole_words + 1 | ||
end | end | ||
end | end | ||
table.insert(parts_formatted, export.link_term(part, data)) | table.insert(parts_formatted, export.link_term(part, data, "include_separator")) | ||
end | |||
if whole_words == 1 then | |||
track("one whole word") | |||
elseif whole_words == 0 then | |||
track("looks like confix") | |||
end | end | ||
table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | ||
categories = categories }) | categories = categories, separator_already_added = true }) | ||
return table.concat(text_sections) | return table.concat(text_sections) | ||
end | end | ||
| Line 1,039: | Line 1,136: | ||
]==] | ]==] | ||
function export.show_compound_like(data) | function export.show_compound_like(data) | ||
data.allow_no_affixes_or_compounds = true | |||
local categories = | local text_sections, categories, borrowing_type = generate_affix_categories(data) | ||
if data.cat then | if data.cat then | ||
| Line 1,046: | Line 1,143: | ||
end | end | ||
-- | -- Process each part for display | ||
for i, part in | local parts_formatted = {} | ||
for i, part in ipairs_with_gaps(data.parts) do | |||
table.insert(parts_formatted, export.link_term(part, data)) | -- Make a link for the part | ||
table.insert(parts_formatted, export.link_term(part, data, "include_separator")) | |||
end | end | ||
if #data.parts > 0 and data.oftext then | |||
table.insert(text_sections, 1, " " .. data.oftext .. " ") | |||
end | |||
if data.text then | if data.text then | ||
table.insert(text_sections, data.text | table.insert(text_sections, 1, data.text) | ||
end | end | ||
table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | table.insert(text_sections, export.join_formatted_parts { data = data, parts_formatted = parts_formatted, | ||
categories = categories }) | categories = categories, separator_already_added = true }) | ||
return table.concat(text_sections) | return table.concat(text_sections) | ||
end | end | ||
| Line 1,098: | Line 1,194: | ||
part.ts = export.make_affix(part.ts, part.lang, Latn, affix_type) | part.ts = export.make_affix(part.ts, part.lang, Latn, affix_type) | ||
end | end | ||
local function track_wrong_affix_type(template, part, expected_affix_type) | |||
if part and not part.type then | |||
local affix_type = parse_term_for_affixes(part.term, part.lang, part.sc) | |||
if affix_type ~= expected_affix_type then | |||
local part_name = expected_affix_type or "base" | |||
local langcode = part.lang:getCode() | |||
local full_langcode = part.lang:getFullCode() | |||
require("Module:debug/track") { | |||
template, | |||
template .. "/" .. part_name, | |||
template .. "/" .. part_name .. "/" .. (affix_type or "none"), | |||
template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. langcode | |||
} | |||
-- If `part.lang` is an etymology-only language, track both using its code and its full parent's code. | |||
if full_langcode ~= langcode then | |||
require("Module:debug/track")( | |||
template .. "/" .. part_name .. "/" .. (affix_type or "none") .. "/lang/" .. full_langcode | |||
) | |||
end | |||
end | |||
end | |||
end | |||
local function insert_affix_category(categories, pos, affix_type, part, sort_key, sort_base) | local function insert_affix_category(categories, pos, affix_type, part, sort_key, sort_base) | ||
-- Don't add a '*fixed with' category if the link term is empty or is in a different language. | -- Don't add a '*fixed with' category if the link term is empty or is in a different language. | ||
if part.term and not part.part_lang then | if part.term and not part.part_lang then | ||
local cat = pos .. " " .. affix_type .. "ed with " .. | local cat = pos .. " " .. affix_type .. "ed with " .. strip_diacritics_no_links(part.lang, part.term) .. | ||
(part.id and " (" .. part.id .. ")" or "") | (part.id and " (" .. part.id .. ")" or "") | ||
if sort_key or sort_base then | if sort_key or sort_base then | ||
| Line 1,126: | Line 1,247: | ||
make_part_into_affix(data.prefix, data.lang, data.sc, "prefix") | make_part_into_affix(data.prefix, data.lang, data.sc, "prefix") | ||
make_part_into_affix(data.suffix, data.lang, data.sc, "suffix") | make_part_into_affix(data.suffix, data.lang, data.sc, "suffix") | ||
track_wrong_affix_type("circumfix", data.prefix, "prefix") | |||
track_wrong_affix_type("circumfix", data.base, nil) | |||
track_wrong_affix_type("circumfix", data.suffix, "suffix") | |||
-- Create circumfix term. | -- Create circumfix term. | ||
| Line 1,143: | Line 1,268: | ||
local sort_base | local sort_base | ||
if data.base.term then | if data.base.term then | ||
sort_base = | sort_base = strip_diacritics_no_links(data.base.lang, data.base.term) | ||
end | end | ||
| Line 1,152: | Line 1,277: | ||
-- Insert the categories, but don't add a '*fixed with' category if the link term is in a different language. | -- Insert the categories, but don't add a '*fixed with' category if the link term is in a different language. | ||
if not data.prefix.part_lang then | if not data.prefix.part_lang then | ||
table.insert(categories, {cat=data.pos .. " circumfixed with " .. | table.insert(categories, {cat=data.pos .. " circumfixed with " .. strip_diacritics_no_links(data.prefix.lang, | ||
circumfix), sort_key=data.sort_key, sort_base=sort_base}) | circumfix), sort_key=data.sort_key, sort_base=sort_base}) | ||
end | end | ||
| Line 1,173: | Line 1,298: | ||
make_part_into_affix(data.prefix, data.lang, data.sc, "prefix") | make_part_into_affix(data.prefix, data.lang, data.sc, "prefix") | ||
make_part_into_affix(data.suffix, data.lang, data.sc, "suffix") | make_part_into_affix(data.suffix, data.lang, data.sc, "suffix") | ||
track_wrong_affix_type("confix", data.prefix, "prefix") | |||
track_wrong_affix_type("confix", data.base, nil) | |||
track_wrong_affix_type("confix", data.suffix, "suffix") | |||
-- Make links out of all the parts. | -- Make links out of all the parts. | ||
| Line 1,178: | Line 1,307: | ||
local prefix_sort_base | local prefix_sort_base | ||
if data.base and data.base.term then | if data.base and data.base.term then | ||
prefix_sort_base = | prefix_sort_base = strip_diacritics_no_links(data.base.lang, data.base.term) | ||
elseif data.suffix.term then | elseif data.suffix.term then | ||
prefix_sort_base = | prefix_sort_base = strip_diacritics_no_links(data.suffix.lang, data.suffix.term) | ||
end | end | ||
| Line 1,213: | Line 1,342: | ||
-- Hyphenate the affixes and apply any affix mappings. | -- Hyphenate the affixes and apply any affix mappings. | ||
make_part_into_affix(data.infix, data.lang, data.sc, "infix") | make_part_into_affix(data.infix, data.lang, data.sc, "infix") | ||
track_wrong_affix_type("infix", data.base, nil) | |||
track_wrong_affix_type("infix", data.infix, "infix") | |||
-- Make links out of all the parts. | -- Make links out of all the parts. | ||
| Line 1,243: | Line 1,375: | ||
make_part_into_affix(prefix, data.lang, data.sc, "prefix") | make_part_into_affix(prefix, data.lang, data.sc, "prefix") | ||
end | end | ||
for i, prefix in ipairs(data.prefixes) do | |||
track_wrong_affix_type("prefix", prefix, "prefix") | |||
end | |||
track_wrong_affix_type("prefix", data.base, nil) | |||
-- Make links out of all the parts. | -- Make links out of all the parts. | ||
| Line 1,252: | Line 1,390: | ||
first_sort_base = ine(data.prefixes[2].term) or ine(data.prefixes[2].alt) | first_sort_base = ine(data.prefixes[2].term) or ine(data.prefixes[2].alt) | ||
if first_sort_base then | if first_sort_base then | ||
first_sort_base = | first_sort_base = strip_diacritics_no_links(data.prefixes[2].lang, first_sort_base) | ||
end | end | ||
elseif data.base then | elseif data.base then | ||
first_sort_base = ine(data.base.term) or ine(data.base.alt) | first_sort_base = ine(data.base.term) or ine(data.base.alt) | ||
if first_sort_base then | if first_sort_base then | ||
first_sort_base = | first_sort_base = strip_diacritics_no_links(data.base.lang, first_sort_base) | ||
end | end | ||
end | end | ||
| Line 1,291: | Line 1,429: | ||
for i, suffix in ipairs(data.suffixes) do | for i, suffix in ipairs(data.suffixes) do | ||
make_part_into_affix(suffix, data.lang, data.sc, "suffix") | make_part_into_affix(suffix, data.lang, data.sc, "suffix") | ||
end | |||
track_wrong_affix_type("suffix", data.base, nil) | |||
for i, suffix in ipairs(data.suffixes) do | |||
track_wrong_affix_type("suffix", suffix, "suffix") | |||
end | end | ||