Module:form of: Difference between revisions

No edit summary
No edit summary
 
(One intermediate revision by the same user not shown)
Line 3: Line 3:
export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages
export.force_cat = false -- for testing; set to true to display categories even on non-mainspace pages


local debug_track_module = "Module:debug/track"
local etymology_module = "Module:etymology"
local form_of_cats_module = "Module:form of/cats"
local form_of_cats_module = "Module:form of/cats"
local form_of_data_module = "Module:form of/data"
local form_of_data_module = "Module:form of/data"
Line 39: Line 41:
local sort = table.sort
local sort = table.sort
local split_tag_set -- Defined below.
local split_tag_set -- Defined below.
local tagged_inflections -- Defined below.
local type = type
local type = type


Line 52: Line 53:
contains = require(table_module).contains
contains = require(table_module).contains
return contains(...)
return contains(...)
end
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
end


Line 107: Line 113:
parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
parse_inline_modifiers = require(parse_utilities_module).parse_inline_modifiers
return parse_inline_modifiers(...)
return parse_inline_modifiers(...)
end
local function remove_links(...)
remove_links = require(links_module).remove_links
return remove_links(...)
end
end


Line 273: Line 284:
   common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
   common syncretism between simple past and past participle in English (and in this case, `spast` is itself a list
   shortcut that expands to `simple|past`).]==]
   shortcut that expands to `simple|past`).]==]
-- Add tracking category for PAGE when called from {{inflection of}} or
-- similar TEMPLATE. The tracking category linked to is
-- [[Wiktionary:Tracking/inflection of/PAGE]].
local function track(page)
debug_track("inflection of/" ..
-- avoid including links in pages (may cause error)
page:gsub("%[", "("):gsub("%]", ")"):gsub("|", "!")
)
end


local function wrap_in_span(text, classes)
local function wrap_in_span(text, classes)
Line 280: Line 301:
return text
return text
end
end
end
local function show_linked_term(data)
local termobj, face, span_classes, ok_to_destructively_modify, overall_lang, text_classes =
data.termobj, data.face, data.span_classes, data.ok_to_destructively_modify, data.overall_lang,
data.text_classes
local need_to_copy, pretext_lang
local categories = {}
if overall_lang and overall_lang:getCode() ~= termobj.lang:getCode() then
local lang_display
lang_display, categories = require(etymology_module).insert_source_cat_get_display {
lang = data.overall_lang,
source = termobj.lang,
}
pretext_lang = wrap_in_span(lang_display .. " ", text_classes)
end
local need_to_show_qualifiers = termobj.q or termobj.qq or termobj.a or termobj.aa or termobj.l or termobj.ll or
termobj.refs
need_to_copy = not ok_to_destructively_modify and (pretext_lang or need_to_show_qualifiers)
if need_to_copy then
termobj = shallow_copy(termobj)
end
if pretext_lang then
termobj.pretext = pretext_lang
end
if need_to_show_qualifiers then
termobj.show_qualifiers = true
end
return wrap_in_span(full_link(termobj, face), span_classes), categories
end
end


Line 288: Line 338:
the following fields:
the following fields:


* `.lang`: Overall language of the form-of template. If specified, any lemmas, enclitics or base lemmas that are of a
  different language will have that language displayed before the term in question.
* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.
* `.text`: Text to insert before the lemmas. Wrapped in the value of `.text_classes`, or its default; see below.
* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed
* `.lemmas`: List of objects describing the lemma(s) of which the term in question is a non-lemma form. These are passed
Line 296: Line 348:
   text.
   text.
* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.
* `.lemma_face`: "Face" to use when displaying the lemma objects. Usually should be set to {"term"}.
* `.conj`: Conjunction or separator to use when joining multiple lemma objects. Defaults to {"and"}.
* `.conj`: Conjunction or separator to use when joining multiple lemma objects. If {nil}, defaults to {"and"}. If this
  has the value {false}, the lemmas are preceded with the `.separator` field in each lemma.
* `.enclitics`: List of enclitics to display after the lemmas, in parens.
* `.enclitics`: List of enclitics to display after the lemmas, in parens.
* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.
* `.enclitic_conj`: Conjunction or separator to use when joining multiple enclitics. Defaults to {"and"}.
Line 312: Line 365:
* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
   Use `false` for no wrapping.
   Use `false` for no wrapping.
* `.posttext`: Additional text to display after the lemma links.]==]
* `.posttext`: Additional text to display after the lemma links.
* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified
  in-place to save memory; otherwise they will be copied before modifying.
 
Returns two values, the formatted string and any categories to add the page to (which will arise if `.lang` is
specified and a language other than `.lang` is given in one of the lemmas in `.lemmas` or enclitics in `.enclitics`).
]==]
function export.format_form_of(data)
function export.format_form_of(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
Line 318: Line 377:
end
end
local text_classes = data.text_classes
local text_classes = data.text_classes
if text_classes == nil then
if text_classes == nil and not data.nowrap then
text_classes = "form-of-definition use-with-mention"
text_classes = "form-of-definition use-with-mention"
end
end
Line 333: Line 392:
insert(parts, " ")
insert(parts, " ")
end
end
local categories = {}
if data.lemmas then
if data.lemmas then
if type(data.lemmas) == "string" then
if type(data.lemmas) == "string" then
Line 339: Line 399:
local formatted_terms = {}
local formatted_terms = {}
for _, lemma in ipairs(data.lemmas) do
for _, lemma in ipairs(data.lemmas) do
insert(formatted_terms, wrap_in_span(
local linked_term, this_categories = show_linked_term {
full_link(lemma, data.lemma_face, nil, "show qualifiers"), lemma_classes
termobj = lemma,
))
face = data.lemma_face,
span_classes = lemma_classes,
ok_to_destructively_modify = data.ok_to_destructively_modify,
overall_lang = data.lang,
text_classes = text_classes
}
if this_categories[1] then
extend(categories, this_categories)
end
if data.conj == false and lemma.separator then
insert(formatted_terms, lemma.separator)
end
insert(formatted_terms, linked_term)
end
if data.conj == false then
insert(parts, concat(formatted_terms))
else
insert(parts, serial_comma_join(formatted_terms, {conj = data.conj or "and"}))
end
end
insert(parts, serial_comma_join(formatted_terms, {conj = data.conj or "and"}))
end
if data.lit then
insert(parts, ", literally " .. require(links_module).mark(data.lit, "gloss"))
end
end
end
end
Line 355: Line 434:
for _, enclitic in ipairs(data.enclitics) do
for _, enclitic in ipairs(data.enclitics) do
-- FIXME, should we have separate clitic face and/or classes?
-- FIXME, should we have separate clitic face and/or classes?
insert(formatted_terms, wrap_in_span(
local linked_term, this_categories = show_linked_term {
full_link(enclitic, data.lemma_face, nil, "show qualifiers"), lemma_classes
termobj = enclitic,
))
face = data.lemma_face,
span_classes = lemma_classes,
ok_to_destructively_modify = data.ok_to_destructively_modify,
overall_lang = data.lang,
text_classes = text_classes
}
if this_categories[1] then
extend(categories, this_categories)
end
insert(formatted_terms, linked_term)
end
end
insert(parts, " (")
insert(parts, " (")
Line 373: Line 461:
insert(parts, "</span>")
insert(parts, "</span>")
end
end
insert(parts, (tagged_inflections {
insert(parts, (export.tagged_inflections {
lang = base_lemma.lemmas[1].lang,
lang = data.lang or base_lemma.lemmas[1].lang,
tags = base_lemma.paramobj.tags,
tags = base_lemma.paramobj.tags,
lemmas = base_lemma.lemmas,
lemmas = base_lemma.lemmas,
Line 382: Line 470:
nocat = true,
nocat = true,
text_classes = data.text_classes,
text_classes = data.text_classes,
ok_to_destructively_modify = ok_to_destructively_modify,
}))
}))
if text_classes then
if text_classes then
Line 396: Line 485:
insert(parts, "</span>")
insert(parts, "</span>")
end
end
return concat(parts)
return concat(parts), categories
end
end
format_form_of = export.format_form_of
format_form_of = export.format_form_of
Line 465: Line 554:
return tag
return tag
end
end
-- Maybe track the expansion if it's not the same as the raw tag.
if do_track and expansion ~= tag and type(expansion) == "string" then
track("tag/" .. tag)
end
return expansion
return expansion
end
end
Line 505: Line 597:
end
end
tag = expansion
tag = expansion
 
if not lookup_tag(tag, lang) and do_track then
-- If after all expansions and normalizations we don't recognize the canonical tag, track it.
track("unknown")
track("unknown/" .. tag)
end
return tag
return tag
end
end
Line 527: Line 623:
tag = lookup_shortcut(tag, lang, do_track)
tag = lookup_shortcut(tag, lang, do_track)
if type(tag) == "table" then
if type(tag) == "table" then
-- Temporary tracking as we will disallow this.
track("list-tag-inside-of-multipart")
-- We found a list-tag shortcut; treat as if colon-separated.
-- We found a list-tag shortcut; treat as if colon-separated.
components = tag
components = tag
Line 534: Line 632:
end
end
local normtags = {}
local normtags = {}
-- Temporary tracking as we will disallow this.
track("two-level-multipart")
for _, component in ipairs(components) do
for _, component in ipairs(components) do
if do_track then
-- There are multiple components; track each of the individual
-- raw tags.
track("tag/" .. component)
end
insert(normtags, normalize_single_tag(component, lang, do_track))
insert(normtags, normalize_single_tag(component, lang, do_track))
end
end
Line 566: Line 671:
local normtags = {}
local normtags = {}
for _, single_tag in ipairs(split_tags) do
for _, single_tag in ipairs(split_tags) do
if do_track then
-- If the tag was a multipart tag, track each of individual raw tags.
track("tag/" .. single_tag)
end
insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
insert(normtags, normalize_multipart_component(single_tag, lang, do_track))
end
end
Line 611: Line 720:


for _, tag in ipairs(tag_set) do
for _, tag in ipairs(tag_set) do
if do_track then
-- Track the raw tag.
track("tag/" .. tag)
end
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- of tags).
-- of tags).
Line 623: Line 736:


for _, t in ipairs(tag) do
for _, t in ipairs(tag) do
if do_track then
-- If the tag expands to a list of raw tags, track each of those.
track("tag/" .. t)
end
insert(output_tag_set, normalize_tag(t, lang, do_track))
insert(output_tag_set, normalize_tag(t, lang, do_track))
end
end
Line 637: Line 754:
output_tag_set = {}
output_tag_set = {}
for i, tag in ipairs(tag_set) do
for i, tag in ipairs(tag_set) do
if do_track then
-- Track the raw tag.
track("tag/" .. tag)
end
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- Expand the tag, which may generate a new tag (either a fully canonicalized tag, a multipart tag, or a list
-- of tags).
-- of tags).
Line 733: Line 854:
-- passed in must be a string (i.e. it cannot be a list describing a
-- passed in must be a string (i.e. it cannot be a list describing a
-- multipart tag). To handle multipart tags, use get_tag_display_form().
-- multipart tag). To handle multipart tags, use get_tag_display_form().
local function get_single_tag_display_form(normtag, lang)
-- A truthy `nolink` suppresses linking.
local function get_single_tag_display_form(normtag, lang, nolink)
local data = lookup_tag(normtag, lang)
local data = lookup_tag(normtag, lang)
local display = normtag
local display = normtag
Line 740: Line 862:
if data and data.display then
if data and data.display then
display = data.display
display = data.display
if nolink then
display = remove_links(display)
end
end
end


-- If there is a nonempty glossary index, then show a link to it
-- If there is a nonempty glossary index, then show a link to it
local glossary = data and data[(m_data or get_m_data()).GLOSSARY]
if not nolink then
if glossary ~= nil then
local glossary = data and data[(m_data or get_m_data()).GLOSSARY]
if glossary == m_data.WIKT then
if glossary ~= nil then
display = "[[" .. normtag .. "|" .. display .. "]]"
if glossary == m_data.WIKT then
elseif glossary == m_data.WP then
display = "[[wikt:" .. normtag .. "|" .. display .. "]]"
display = "[[w:" .. normtag .. "|" .. display .. "]]"
elseif glossary == m_data.WP then
elseif glossary == m_data.APPENDIX then
display = "[[w:" .. normtag .. "|" .. display .. "]]"
display = "[[Appendix:Glossary#" .. anchor_encode(normtag) .. "|" .. display .. "]]"
elseif glossary == m_data.APPENDIX then
elseif type(glossary) ~= "string" then
display = "[[wikt:Appendix:Glossary#" .. anchor_encode(normtag) .. "|" .. display .. "]]"
error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
elseif type(glossary) ~= "string" then
type(glossary), dump(glossary), normtag))
error(("Internal error: Wrong type %s for glossary value %s for tag %s"):format(
else
type(glossary), dump(glossary), normtag))
local link = glossary:match("^wikt:(.*)")
else
if link then
local link = glossary:match("^(wikt:.*)")
display = "[[" .. link .. "|" .. display .. "]]"
end
if not link then
link = glossary:match("^w:(.*)")
if link then
if link then
display = "[[w:" .. link .. "|" .. display .. "]]"
display = "[[" .. link .. "|" .. display .. "]]"
end
if not link then
link = glossary:match("^w:(.*)")
if link then
display = "[[w:" .. link .. "|" .. display .. "]]"
end
end
if not link then
display = "[[wikt:Appendix:Glossary#" .. anchor_encode(glossary) .. "|" .. display .. "]]"
end
end
end
if not link then
display = "[[Appendix:Glossary#" .. anchor_encode(glossary) .. "|" .. display .. "]]"
end
end
end
end
Line 780: Line 907:
more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
more), {"slash"} ("foo/bar"), {"en-dash"} ("foo–bar") or {nil}, which uses the global default found in
{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
{multipart_join_strategy()} in [[Module:form of/functions]]. (NOTE: The global default is {"slash"} and this seems
unlikely to change.)]==]
unlikely to change.) A truthy `nolink` suppresses linking.]==]
function export.get_tag_display_form(tagspec, lang, joiner)
function export.get_tag_display_form(tagspec, lang, joiner, nolink)
if type(tagspec) == "string" then
if type(tagspec) == "string" then
return get_single_tag_display_form(tagspec, lang)
return get_single_tag_display_form(tagspec, lang, nolink)
end
end
-- We have a multipart tag. See if there's a display handler to display them specially.
-- We have a multipart tag. See if there's a display handler to display them specially.
Line 789: Line 916:
local displayval = handler(tagspec, joiner)
local displayval = handler(tagspec, joiner)
if displayval then
if displayval then
if nolink then
displayval = remove_links(displayval)
end
return displayval
return displayval
end
end
Line 796: Line 926:
for _, first_level_tag in ipairs(tagspec) do
for _, first_level_tag in ipairs(tagspec) do
if type(first_level_tag) == "string" then
if type(first_level_tag) == "string" then
insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang))
insert(displayed_tags, get_single_tag_display_form(first_level_tag, lang, nolink))
else
else
-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
-- A first-level element of a two-level multipart tag. Currently we just separate the individual components
Line 802: Line 932:
local components = {}
local components = {}
for _, component in ipairs(first_level_tag) do
for _, component in ipairs(first_level_tag) do
insert(components, get_single_tag_display_form(component, lang))
insert(components, get_single_tag_display_form(component, lang, nolink))
end
end
insert(displayed_tags, concat(components, " "))
insert(displayed_tags, concat(components, " "))
Line 814: Line 944:
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
Given a normalized tag set (i.e. as output by {normalize_tag_set()}; all tags are in canonical form, multipart tags are
represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
represented as lists, and two-level multipart tags as lists of lists), convert to displayed form (a string). See
{get_tag_display_form()} for the meaning of `joiner`.]==]
{get_tag_display_form()} for the meaning of `joiner`. A truthy `nolink` suppresses linking.]==]
function export.get_tag_set_display_form(normalized_tag_set, lang, joiner)
function export.get_tag_set_display_form(normalized_tag_set, lang, joiner, nolink)
local parts = {}
local parts = {}


for _, tagspec in ipairs(normalized_tag_set) do
for _, tagspec in ipairs(normalized_tag_set) do
local to_insert = get_tag_display_form(tagspec, lang, joiner)
local to_insert = get_tag_display_form(tagspec, lang, joiner, nolink)
-- Maybe insert a space before inserting the display form of the tag. We insert a space if
-- Maybe insert a space before inserting the display form of the tag. We insert a space if
-- (a) we're not the first tag; and
-- (a) we're not the first tag; and
Line 1,231: Line 1,361:
* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).
* `.nocat`: Suppress computation of categories (even if `.no_format_categories` is not given).
* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)
* `.notext`: Disable display of all tag text and `inflection of` text. (FIXME: Maybe not implemented correctly.)
* `.nolink`: Suppress linking of terms in inflection tags.
* `.capfirst`: Capitalize the first word displayed.
* `.capfirst`: Capitalize the first word displayed.
* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.
* `.pretext`: Additional text to display before the inflection tags, but after any top-level labels.
Line 1,237: Line 1,368:
   {"form-of-definition use-with-mention"}.
   {"form-of-definition use-with-mention"}.
* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
* `.lemma_classes`: Additional CSS classes used to wrap the lemma links. Default is {"form-of-definition-link"}.
`.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.
* `.joiner`: Override the joiner (normally a slash) used to join multipart tags. You should normally not specify this.
* `.nowrap`: Do not wrap the form-of definitions in a span, unless `.text_classes` is specified.
* `.ok_to_destructively_modify`: If set, data structures (including the nested lemma structures) can be modified
  in-place to save memory; otherwise they will be copied before modifying.


A typical call might look like this (for {{m+|es|amo}}): {
A typical call might look like this (for {{m+|es|amo}}): {
Line 1,291: Line 1,425:
extend(categories, this_categories)
extend(categories, this_categories)
end
end
local cur_infl = get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner)
local cur_infl = get_tag_set_display_form(normalized_tag_set, data.lang, data.joiner, data.nolink)
if #cur_infl > 0 then
if #cur_infl > 0 then
if tag_set.labels then
if tag_set.labels then
Line 1,321: Line 1,455:


local of_text = data.lemmas and " of" or ""
local of_text = data.lemmas and " of" or ""
local formatted_text
local formatted_text, this_categories
if #inflections == 1 then
if #inflections == 1 then
if need_per_tag_set_labels then
if need_per_tag_set_labels then
Line 1,328: Line 1,462:
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
((data.capfirst and ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
((data.capfirst and ucfirst(inflections[1].infl_text) or inflections[1].infl_text) .. of_text))
formatted_text = format_form_of(format_data)
formatted_text, this_categories = format_form_of(format_data)
else
else
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
format_data.text = format_labels(overall_labels, data, data.notext) .. (data.pretext or "") .. (data.notext and "" or
((data.capfirst and "Inflection" or "inflection") .. of_text))
((data.capfirst and "Inflection" or "inflection") .. of_text))
format_data.posttext = (data.posttext or "") .. ":"
format_data.posttext = (data.posttext or "") .. ":"
local link = format_form_of(format_data)
local link
link, this_categories = format_form_of(format_data)
local text_classes = data.text_classes
local text_classes = data.text_classes
if text_classes == nil then
if text_classes == nil and not data.nowrap then
text_classes = "form-of-definition use-with-mention"
text_classes = "form-of-definition use-with-mention"
end
end
Line 1,343: Line 1,478:
end
end
formatted_text = link .. concat(inflections)
formatted_text = link .. concat(inflections)
end
if this_categories[1] then
extend(categories, this_categories)
end
end


if not data.no_format_categories then
if not data.no_format_categories then
if #categories > 0 then
if categories[1] then
formatted_text = formatted_text .. format_categories(categories, data.lang,
formatted_text = formatted_text .. format_categories(categories, data.lang,
data.sort, nil, export.force_cat)
data.sort, nil, export.force_cat)
Line 1,354: Line 1,492:
return formatted_text, categories
return formatted_text, categories
end
end
tagged_inflections = export.tagged_inflections


function export.dump_form_of_data(frame)
function export.dump_form_of_data(frame)