Module:links: Difference between revisions

No edit summary
No edit summary
 
(7 intermediate revisions by the same user not shown)
Line 10: Line 10:
[[Module:scripts]]
[[Module:scripts]]
[[Module:languages]] and its submodules
[[Module:languages]] and its submodules
[[Module:getn]]
[[Module:gender and number]]
]=]
]=]


local anchors_module = "Module:anchors"
local anchors_module = "Module:anchors"
local form_of_module = "Module:form of"
local gender_and_number_module = "Module:getn"
local gender_and_number_module = "Module:getn"
local languages_module = "Module:languages"
local languages_module = "Module:languages"
Line 42: Line 43:
local unstrip = mw.text.unstrip
local unstrip = mw.text.unstrip


local NAMESPACE = get_current_title().namespace
local NAMESPACE = get_current_title().nsText


local function anchor_encode(...)
local function anchor_encode(...)
Line 158: Line 159:
umatch = require(string_utilities_module).match
umatch = require(string_utilities_module).match
return umatch(...)
return umatch(...)
end
local m_headword_data
local function get_headword_data()
m_headword_data = load_data("Module:headword/data")
return m_headword_data
end
end


Line 163: Line 170:
-- Unconditionally trimmed charset.
-- Unconditionally trimmed charset.
local always_trim =
local always_trim =
"\194\128-\194\159" .. -- U+0080-009F (C1 control characters)
"\194\128-\194\159" ..   -- U+0080-009F (C1 control characters)
"\194\173" .. -- U+00AD (soft hyphen)
"\194\173" ..           -- U+00AD (soft hyphen)
"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters)
"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters)
"\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters)
"\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters)


-- Standard trimmed charset.
-- Standard trimmed charset.
local standard_trim = "%s" .. -- (default whitespace charset)
local standard_trim = "%s" .. -- (default whitespace charset)
"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces)
"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces)
always_trim
always_trim


Line 228: Line 235:
function export.get_wikilink_parts(text, allow_bad_target)
function export.get_wikilink_parts(text, allow_bad_target)
-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences.
-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences.
if ( -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]".
if (                       -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]".
not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[".
not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[".
find(text, "[[", 3, true) or
find(text, "[[", 3, true) or
find(text, "]]", 3, true) ~= #text - 1
find(text, "]]", 3, true) ~= #text - 1
) then
) then
return nil, nil
return nil, nil
end
end
Line 249: Line 256:
if title == nil then
if title == nil then
return nil, nil
return nil, nil
-- If the link target starts with "#" then mw.title.new returns a broken
-- If the link target starts with "#" then mw.title.new returns a broken
-- title object, so grab the current title and give it the correct fragment.
-- title object, so grab the current title and give it the correct fragment.
elseif title.prefixedText == "" then
elseif title.prefixedText == "" then
local fragment = title.fragment
local fragment = title.fragment
Line 286: Line 293:
if not close then
if not close then
return get_fragment(text)
return get_fragment(text)
-- If there is one, but it's redundant (i.e. encloses everything with no pipe), remove and process.
-- If there is one, but it's redundant (i.e. encloses everything with no pipe), remove and process.
elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then
elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then
return get_fragment(sub(text, 3, -3))
return get_fragment(sub(text, 3, -3))
Line 294: Line 301:
end
end


local pos_tags
--[==[
--[==[
Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes
Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes
bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets
bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets
beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix
beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix
namespace. Returns up to three values: the page, the original target minus any anti-asterisk !!, and a third
namespace. Returns up to three values:
value `true` if the target had a backslash-escaped * in it.
# the actual page to link to, or {nil} to not link to anything;
# how the target should be displayed as, if the user didn't explicitly specify any display text; generally the
  same as the original target, but minus any anti-asterisk !!;
# the value `true` if the target had a backslash-escaped * in it (FIXME: explain this more clearly).
]==]
]==]
function export.get_link_page_with_auto_display(target, lang, sc, plain)
function export.get_link_page_with_auto_display(target, lang, sc, plain)
Line 308: Line 317:
return nil
return nil
end
end
 
target = remove_formatting(target)
target = remove_formatting(target)
 
if target:sub(1, 1) == ":" then
if target:sub(1, 1) == ":" then
-- FIXME, the auto_display (second return value) should probably remove the colon
-- FIXME, the auto_display (second return value) should probably remove the colon
return target:sub(2), orig_target
return target:sub(2), orig_target
end
end
 
local prefix = target:match("^(.-):")
local prefix = target:match("^(.-):")
-- Convert any escaped colons
-- Convert any escaped colons
target = target:gsub("\\:", ":")
target = target:gsub("\\:", ":")
if prefix then
if prefix then
-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.)
-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then
-- return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term
-- to a category.)
prefix = ulower(trim(prefix))
prefix = ulower(trim(prefix))
if prefix ~= "" and (
if prefix ~= "" and (
load_data("Module:data/namespaces")[prefix] or
load_data("Module:data/namespaces")[prefix] or
load_data("Module:data/interwikis")[prefix]
load_data("Module:data/interwikis")[prefix]
) then
) then
return target, orig_target
return target, orig_target
end
end
Line 346: Line 357:
end
end
target, escaped = target:gsub("^(\\-)\\%*", "%1*")
target, escaped = target:gsub("^(\\-)\\%*", "%1*")
 
if reconstructed == 0 and lang:hasType("reconstructed") and not lang:hasType("conlang") then
orig_target = "*" .. target
reconstructed = 1
end
if not (sc and sc:getCode() ~= "None") then
if not (sc and sc:getCode() ~= "None") then
sc = lang:findBestScript(target)
sc = lang:findBestScript(target)
Line 359: Line 375:


-- Get the entry name for the language.
-- Get the entry name for the language.
target = lang:makeEntryName(target, sc)
target = lang:makeEntryName(target, sc, reconstructed == 1 or lang:hasType("appendix-constructed"))


-- If the link contains unexpanded template parameters, then don't create a link.
-- If the link contains unexpanded template parameters, then don't create a link.
if target:match("{{{.-}}}") then
if target:match("{{{.-}}}") then
-- FIXME: Should we return the original target as the default display value (second return value)?
return nil
return nil
end
end


-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret *
if reconstructed == 1 then
-- literally, however.
if lang:getFullCode() == "und" then
if not lang:hasType("conlang") then
return nil
if lang:hasType("appendix-constructed") then
target = "wikt:Appendix:" .. lang:getFullName() .. "/" .. target
elseif reconstructed == 1 then -- asterisk found
if lang:getFullCode() == "und" then
-- Return the original target as default display value. If we don't do this, we wrongly get
-- [Term?] displayed instead.
return nil, orig_target
end
target = "wikt:Reconstruction:" .. lang:getFullName() .. "/" .. target
elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then
--error("The specified language " .. lang:getCanonicalName()
--.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
orig_target = "*" .. target
end
end
if not lang:hasType("conlang") then
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
end
-- Reconstructed languages and substrates require an initial *.
elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then
error("The specified language " .. lang:getCanonicalName()
.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
elseif lang:hasType("appendix-constructed") then
target = "Appendix:" .. lang:getFullName() .. "/" .. target
else
target = target
end
end
target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target
target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target
 
return target, orig_target, escaped > 0
return target, orig_target, escaped > 0
end
end
Line 415: Line 433:
-- If the target is "" then it's a link like [[#English]], which refers to the current page.
-- If the target is "" then it's a link like [[#English]], which refers to the current page.
if auto_display == "" then
if auto_display == "" then
auto_display = load_data("Module:headword/data").pagename
auto_display = (m_headword_data or get_headword_data()).pagename
end
end


Line 422: Line 440:
auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1)
auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1)
end
end
 
-- Process the display form.
-- Process the display form.
if link.display then
if link.display then
Line 450: Line 468:
link.display = lang:makeDisplayText(auto_display, sc)
link.display = lang:makeDisplayText(auto_display, sc)
end
end
 
if not link.target then
if not link.target then
return link.display
return link.display
end
end
 
-- If the target is the same as the current page, there is no sense id
-- If the target is the same as the current page, there is no sense id
-- and either the language code is "und" or the current L2 is the current
-- and either the language code is "und" or the current L2 is the current
Line 461: Line 479:
local fragment, current_L2 = link.fragment, get_current_L2()
local fragment, current_L2 = link.fragment, get_current_L2()
if (
if (
fragment and fragment == current_L2 or
fragment and fragment == current_L2 or
not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2)
not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2)
) then
) then
return tostring(mw.html.create("strong")
return tostring(mw.html.create("strong")
:addClass("selflink")
:addClass("selflink")
Line 492: Line 510:
end
end
end
end
 
-- Put inward-facing square brackets around a link to isolated spacing character(s).
-- Put inward-facing square brackets around a link to isolated spacing character(s).
if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then
if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then
Line 501: Line 519:
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")
end)
end)
 
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
return "[[" .. link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
return "[[" ..
link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
end
end


Line 509: Line 528:
-- Split a link into its parts
-- Split a link into its parts
local function parse_link(linktext)
local function parse_link(linktext)
local link = {target = linktext}
local link = { target = linktext }


local target = link.target
local target = link.target
Line 520: Line 539:
-- There's no point in processing these, as they aren't real links.
-- There's no point in processing these, as they aren't real links.
local target_lower = link.target:lower()
local target_lower = link.target:lower()
for _, false_positive in ipairs({"category", "cat", "file", "image"}) do
for _, false_positive in ipairs({ "category", "cat", "file", "image" }) do
if target_lower:match("^" .. false_positive .. ":") then
if target_lower:match("^" .. false_positive .. ":") then
return nil
return nil
Line 565: Line 584:
text = text:gsub("^(\\-)\\%*", "%1*")
text = text:gsub("^(\\-)\\%*", "%1*")
end
end
 
check_params_ignored_when_embedded(alt, lang, id, cats)
check_params_ignored_when_embedded(alt, lang, id, cats)


Line 604: Line 623:
-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text.
-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text.
if (
if (
text:find("^", nil, true) and
text:find("^", nil, true) and
not sc:hasCapitalization() and
not sc:hasCapitalization() and
sc:isTransliterated()
sc:isTransliterated()
) then
) then
text = escape(text, "^")
text = escape(text, "^")
:gsub("%^\1", "\1%^")
:gsub("%^\1", "\1%^")
Line 630: Line 649:
lang, plain = get_lang("und"), true
lang, plain = get_lang("und"), true
end
end
 
-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page.
-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page.
if term == "" then
if term == "" then
Line 670: Line 689:
return nil
return nil
end
end
 
-- If there is no script, get one.
-- If there is no script, get one.
if not sc then
if not sc then
sc = lang:findBestScript(alt or term)
sc = lang:findBestScript(alt or term)
end
end
 
-- Embedded wikilinks need to be processed individually.
-- Embedded wikilinks need to be processed individually.
if term then
if term then
Line 684: Line 703:
term = selective_trim(term)
term = selective_trim(term)
end
end
 
-- If not, make a link using the parameters.
-- If not, make a link using the parameters.
return make_link({
return make_link({
Line 720: Line 739:
function export.language_link(data)
function export.language_link(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
error(
"The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
end
end


Line 744: Line 764:
function export.plain_link(data)
function export.plain_link(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
error("The first argument to the function plain_link must be a table. See Module:links/documentation for more information.")
error(
"The first argument to the function plain_link must be a table. See Module:links/documentation for more information.")
end
end


Line 763: Line 784:
function export.embedded_language_links(data)
function export.embedded_language_links(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
error("The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.")
error(
"The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.")
end
end


local term, lang, sc = data.term, data.lang, data.sc
local term, lang, sc = data.term, data.lang, data.sc
 
-- If we don't have a script, get one.
-- If we don't have a script, get one.
if not sc then
if not sc then
sc = lang:findBestScript(term)
sc = lang:findBestScript(term)
end
end
 
-- Do we have embedded wikilinks? If so, they need to be processed individually.
-- Do we have embedded wikilinks? If so, they need to be processed individually.
local open = find(term, "[[", nil, true)
local open = find(term, "[[", nil, true)
Line 778: Line 800:
return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast)
return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast)
end
end
 
-- If not, return the display text.
-- If not, return the display text.
term = selective_trim(term)
term = selective_trim(term)
-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links.
-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links.
term = term:gsub("%%", "%%25")
term = term:gsub("%%", "%%25")
return (lang:makeDisplayText(term, sc, true))
return lang:makeDisplayText(term, sc, true)
end
end


Line 804: Line 826:
elseif item_type == "pos" then
elseif item_type == "pos" then
tag = { '<span class="ann-pos">', '</span>' }
tag = { '<span class="ann-pos">', '</span>' }
elseif item_type == "non-gloss" then
tag = { '<span class="ann-non-gloss">', '</span>' }
elseif item_type == "annotations" then
elseif item_type == "annotations" then
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
'<span class="mention-gloss-paren annotation-paren">)</span>' }
'<span class="mention-gloss-paren annotation-paren">)</span>' }
elseif item_type == "infl" then
tag = { '<span class="ann-infl">', '</span>' }
end
end


Line 816: Line 842:
end
end


--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.  
local pos_tags
 
--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.
* The first argument is a table possessing some or all of the following keys:
* The first argument is a table possessing some or all of the following keys:
*:; <code class="n">genders</code>
*:; <code class="n">genders</code>
Line 825: Line 853:
*:: Gloss that translates the term in the link, or gives some other descriptive information.
*:: Gloss that translates the term in the link, or gives some other descriptive information.
*:; <code class="n">pos</code>
*:; <code class="n">pos</code>
*:: Part of speech of the linked term. If the given argument matches one of the templates in [[:Category:Part of speech tags]], then call that to show a part-of-speech tag. Otherwise, just show the given text as it is.
*:: Part of speech of the linked term. If the given argument matches one of the aliases in `pos_aliases` in [[Module:headword/data]], or consists of a part of speech or alias followed by `f` (for a non-lemma form), expand it appropriately. Otherwise, just show the given text as it is.
*:; <code class="n">ng</code>
*:: Arbitrary non-gloss descriptive text for the link. This should be used in preference to putting descriptive text in `gloss` or `pos`.
*:; <code class="n">lit</code>
*:; <code class="n">lit</code>
*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.
*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.
*:; <code class="n">infl</code>
*:: Table containing a list of grammar tags in the style of [[Module:form of]] `tagged_inflections`.
*:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned.
*:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned.
* The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==]
* The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==]
Line 886: Line 918:
end
end


pos_tags = pos_tags or load_data("Module:headword/data").pos_aliases
-- Canonicalize part of speech aliases as well as non-lemma aliases like 'nf' or 'nounf' for "noun form".
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
pos_tags = pos_tags or (m_headword_data or get_headword_data()).pos_aliases
local pos = pos_tags[data.pos]
if not pos and data.pos:find("f$") then
local pos_form = data.pos:sub(1, -2)
-- We only expand something ending in 'f' if the result is a recognized non-lemma POS.
pos_form = (pos_tags[pos_form] or pos_form) .. " form"
if (m_headword_data or get_headword_data()).nonlemmas[pos_form .. "s"] then
pos = pos_form
end
end
insert(annotations, export.mark(pos or data.pos, "pos"))
end
 
-- Inflection data
if data.infl then
local m_form_of = require(form_of_module)
-- Split tag sets manually, since tagged_inflections creates a numbered list, and we do not want that.
local infl_outputs = {}
local tag_sets = m_form_of.split_tag_set(data.infl)
for _, tag_set in ipairs(tag_sets) do
table.insert(infl_outputs,
m_form_of.tagged_inflections({ tags = tag_set, lang = data.lang, nocat = true, nolink = true, nowrap = true }))
end
insert(annotations, export.mark(table.concat(infl_outputs, "; "), "infl"))
end
 
-- Non-gloss text
if data.ng then
insert(annotations, export.mark(data.ng, "non-gloss"))
end
end


Line 893: Line 953:
if data.lit then
if data.lit then
insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
end
-- Provide a hook to insert additional annotations such as nested inflections.
if data.postprocess_annotations then
data.postprocess_annotations {
data = data,
annotations = annotations
}
end
end


Line 948: Line 1,016:
end
end


local function get_class(lang, tr, accel)
local function insert_if_not_blank(list, item)
if not accel then
if item == "" then
return
end
insert(list, item)
end
 
local function get_class(lang, tr, accel, nowrap)
if not accel and not nowrap then
return ""
return ""
end
end
local form = accel.form
local classes = {}
return "form-of lang-" .. lang:getFullCode() .. " " ..
if accel then
(form and encode_accel_param_chars(form) .. "-form-of" or "") .. " " ..
insert(classes, "form-of lang-" .. lang:getFullCode())
(encode_accel_param("gender-", accel.gender)) .. " " ..
local form = accel.form
(encode_accel_param("pos-", accel.pos)) .. " " ..
if form then
(encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil))) .. " " ..
insert(classes, encode_accel_param_chars(form) .. "-form-of")
(encode_accel_param("target-", accel.target)) .. " " ..
end
(encode_accel_param("origin-", accel.lemma)) .. " " ..
insert_if_not_blank(classes, encode_accel_param("gender-", accel.gender))
(encode_accel_param("origin_transliteration-", accel.lemma_translit)) .. " " ..
insert_if_not_blank(classes, encode_accel_param("pos-", accel.pos))
(accel.no_store and "form-of-nostore" or "") .. " "
insert_if_not_blank(classes, encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil)))
insert_if_not_blank(classes, encode_accel_param("target-", accel.target))
insert_if_not_blank(classes, encode_accel_param("origin-", accel.lemma))
insert_if_not_blank(classes, encode_accel_param("origin_transliteration-", accel.lemma_translit))
if accel.no_store then
insert(classes, "form-of-nostore")
end
end
if nowrap then
insert(classes, nowrap)
end
return concat(classes, " ")
end
end


Line 981: Line 1,067:
local q = data.q
local q = data.q
if type(q) == "string" then
if type(q) == "string" then
q = {q}
q = { q }
end
end
local qq = data.qq
local qq = data.qq
if type(qq) == "string" then
if type(qq) == "string" then
qq = {qq}
qq = { qq }
end
end
if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or
if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or
data.ll and data.ll[1] or data.refs and data.refs[1] then
data.ll and data.ll[1] or data.refs and data.refs[1] then
formatted = format_qualifiers{
formatted = format_qualifiers {
lang = data.lang,
lang = data.lang,
text = formatted,
text = formatted,
Line 1,006: Line 1,092:




--[==[Creates a full link, with annotations (see <code class="n">[[#format_link_annotations|format_link_annotations]]</code>), in the style of {{temp|l}} or {{temp|m}}.
--[==[
The first argument, <code class="n">data</code>, must be a table. It contains the various elements that can be supplied as parameters to {{temp|l}} or {{temp|m}}:
Creates a full link, with annotations (see `[[#format_link_annotations|format_link_annotations]]`), in the style of {{tl|l}} or {{tl|m}}.
The first argument, `data`, must be a table. It contains the various elements that can be supplied as parameters to {{tl|l}} or {{tl|m}}:
{ {
{ {
term = entry_to_link_to,
term = entry_to_link_to,
Line 1,015: Line 1,102:
track_sc = boolean,
track_sc = boolean,
no_nonstandard_sc_cat = boolean,
no_nonstandard_sc_cat = boolean,
fragment = link_fragment
fragment = link_fragment,
id = sense_id,
id = sense_id,
genders = { "gender1", "gender2", ... },
genders = { "gender1", "gender2", ... },
tr = transliteration,
tr = transliteration,
respect_link_tr = boolean,
ts = transcription,
ts = transcription,
gloss = gloss,
gloss = gloss,
pos = part_of_speech_tag,
pos = part_of_speech_tag,
ng = non-gloss text,
lit = literal_translation,
lit = literal_translation,
infl = { "form_of_grammar_tag1", "form_of_grammar_tag2", ... },
no_alt_ast = boolean,
no_alt_ast = boolean,
accel = {accelerated_creation_tags},
accel = {accelerated_creation_tags},
interwiki = interwiki,
interwiki = interwiki,
pretext = "text_at_beginning" or nil,
posttext = "text_at_end" or nil,
q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",
q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",
qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",
qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",
l = { "left_label1", "left_label2", ...},
ll = { "right_label1", "right_label2", ...},
a = { "left_accent_qualifier1", "left_accent_qualifier2", ...},
aa = { "right_accent_qualifier1", "right_accent_qualifier2", ...},
refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },
refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },
show_qualifiers = boolean,
} }
} }
Any one of the items in the <code class="n">data</code> table may be {{code|lua|nil}}, but an error will be shown if neither <code class="n">term</code> nor <code class="n">alt</code> nor <code class="n">tr</code> is present.
Any one of the items in the `data` table may be {nil}, but an error will be shown if neither `term` nor `alt` nor `tr`
Thus, calling {{code|lua|2=full_link{ term = term, lang = lang, sc = sc } }}, where <code class="n">term</code> is an entry name, <code class="n">lang</code>  is a [[Module:languages#Language objects|language object]] from [[Module:languages]], and <code class="n">sc</code> is a [[Module:scripts#Script objects|script object]] from [[Module:scripts]], will give a plain link similar to the one produced by the template {{temp|l}}, and calling {{code|lua|2=full_link( { term = term, lang = lang, sc = sc }, "term" )}} will give a link similar to the one produced by the template {{temp|m}}.
is present. Thus, calling {full_link{ term = term, lang = lang, sc = sc }}, where `term` is the page to link to (which
may have diacritics that will be stripped and/or embedded bracketed links) and `lang` is a
[[Module:languages#Language objects|language object]] from [[Module:languages]], will give a plain link similar to the
one produced by the template {{tl|l}}, and calling {full_link( { term = term, lang = lang, sc = sc }, "term" )} will
give a link similar to the one produced by the template {{tl|m}}.
 
The function will:
The function will:
* Try to determine the script, based on the characters found in the term or alt argument, if the script was not given. If a script is given and <code class="n">track_sc</code> is {{code|lua|true}}, it will check whether the input script is the same as the one which would have been automatically generated and add the category [[:Category:Terms with redundant script codes]] if yes, or [[:Category:Terms with non-redundant manual script codes]] if no. This should be used when the input script object is directly determined by a template's <code class="n">sc=</code> parameter.
* Try to determine the script, based on the characters found in the `term` or `alt` argument, if the script was not
* Call <code class="n">[[#language_link|language_link]]</code> on the term or alt forms, to remove diacritics in the page name, process any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.
  given. If a script is given and `track_sc` is {true}, it will check whether the input script is the same as the one
* Call <code class="n">[[Module:script utilities#tag_text]]</code> to add the appropriate language and script tags to the term, and to italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are included.
  which would have been automatically generated and add the category [[:Category:LANG terms with redundant script codes]]
* Generate a transliteration, based on the alt or term arguments, if the script is not Latin and no transliteration was provided.
  if yes, or [[:Category:LANG terms with non-redundant manual script codes]] if no. This should be used when the input
* Add the annotations (transliteration, gender, gloss etc.) after the link.
  script object is directly determined by a template's `sc` parameter.
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes).
* Call `[[#language_link|language_link]]` on the `term` or `alt` forms, to remove diacritics in the page name, process
* If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==]
  any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.
* Call `[[Module:script utilities#tag_text]]` to add the appropriate language and script tags to the term and
  italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are
  included.
* Generate a transliteration, based on the `alt` or `term` arguments, if the script is not Latin, no transliteration was
  provided in `tr` and the combination of the term's language and script support automatic transliteration. The
  transliteration itself will be linked if both `.respect_link_tr` is specified and the language of the term has the
  `link_tr` property set for the script of the term; but not otherwise.
* Add the annotations (transliteration, gender, gloss, etc.) after the link.
* If `no_alt_ast` is specified, then the `alt` text does not need to contain an asterisk if the language is
  reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't
  display asterisks (e.g. number boxes).
* If `pretext` or `posttext` is specified, this is text to (respectively) prepend or append to the output, directly
  before processing qualifiers, labels and references. This can be used to add arbitrary extra text inside of the
  qualifiers, labels and references.
* If `show_qualifiers` is specified or the `show_qualifiers` argument is given, then left and right qualifiers, accent
  qualifiers, labels and references will be displayed, otherwise they will be ignored. (This is because a fair amount of
  code stores qualifiers, labels and/or references in these fields and displays them itself, rather than expecting
  {full_link()} to display them.)]==]
function export.full_link(data, face, allow_self_link, show_qualifiers)
function export.full_link(data, face, allow_self_link, show_qualifiers)
-- Prevent data from being destructively modified.
local data = shallow_copy(data)
if type(data) ~= "table" then
if type(data) ~= "table" then
error("The first argument to the function full_link must be a table. "
error("The first argument to the function full_link must be a table. "
.. "See Module:links/documentation for more information.")
.. "See Module:links/documentation for more information.")
end
end
 
-- Prevent data from being destructively modified.
local data = shallow_copy(data)
 
-- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned.
-- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned.
data.cats = {}
data.cats = {}
 
-- Categorize links to "und".
-- Categorize links to "und".
local lang, cats = data.lang, data.cats
local lang, cats = data.lang, data.cats
Line 1,058: Line 1,178:
end
end


local terms = {true}
local terms = { true }


-- Generate multiple forms if applicable.
-- Generate multiple forms if applicable.
for _, param in ipairs{"term", "alt"} do
for _, param in ipairs { "term", "alt" } do
if type(data[param]) == "string" and data[param]:find("//", nil, true) then
if type(data[param]) == "string" and data[param]:find("//", nil, true) then
data[param] = export.split_on_slashes(data[param])
data[param] = export.split_on_slashes(data[param])
elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then
elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then
data[param] = lang:generateForms(data[param])
if not data.no_generate_forms then
data[param] = lang:generateForms(data[param])
else
data[param] = { data[param] }
end
else
else
data[param] = {}
data[param] = {}
Line 1,071: Line 1,195:
end
end


for _, param in ipairs{"sc", "tr", "ts"} do
for _, param in ipairs { "sc", "tr", "ts" } do
data[param] = {data[param]}
data[param] = { data[param] }
end
end


for _, param in ipairs{"term", "alt", "sc", "tr", "ts"} do
for _, param in ipairs { "term", "alt", "sc", "tr", "ts" } do
for i in pairs(data[param]) do
for i in pairs(data[param]) do
terms[i] = true
terms[i] = true
end
end
end
end
 
-- Create the link
-- Create the link
local output = {}
local output = {}
local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat, data.accel, data.never_call_transliteration_module
local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat,
data.accel, data.never_call_transliteration_module
local link_tr = data.respect_link_tr and lang:link_tr(data.sc[1])


for i in ipairs(terms) do
for i in ipairs(terms) do
Line 1,094: Line 1,220:
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]]
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]]
if (
if (
not data.no_nonstandard_sc_cat and
not data.no_nonstandard_sc_cat and
best:getCode() == "None" and
best:getCode() == "None" and
find_best_script_without_lang(display_term):getCode() ~= "None"
find_best_script_without_lang(display_term):getCode() ~= "None"
) then
) then
insert(cats, lang:getFullName() .. " terms in nonstandard scripts")
insert(cats, lang:getFullName() .. " terms in nonstandard scripts")
end
end
if not data.sc[i] then
if not data.sc[i] then
data.sc[i] = best
data.sc[i] = best
-- Track uses of sc parameter.
-- Track uses of sc parameter.
elseif data.track_sc then
elseif data.track_sc then
if data.sc[i]:getCode() == best:getCode() then
if data.sc[i]:getCode() == best:getCode() then
Line 1,133: Line 1,259:
if link then
if link then
-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen
-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen
local nowrap = ""
local nowrap
local display_term = data.alt[i] or data.term[i]
local display_term = data.alt[i] or data.term[i]
if display_term and (sub(display_term, 1, 1) == "-" or mw.ustring.sub(display_term, 1, 1) == "־") then -- "sub" does not work for the Hebrew-script hyphen
if display_term and (display_term:find("^%-") or display_term:find("^־")) then -- Hebrew maqqef -- FIXME, use hyphens from [[Module:affix]]
nowrap = " nowrap"
nowrap = "nowrap"
end
end
 
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel) .. nowrap)
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel, nowrap))
else
else
--[[ No term to show.
--[[ No term to show.
Line 1,150: Line 1,276:
remove(output)
remove(output)
break
break
elseif NAMESPACE ~= 10 then -- Template:
elseif NAMESPACE ~= "Template" then
insert(cats, lang:getFullName() .. " term requests")
insert(cats, lang:getFullName() .. " term requests")
end
end
Line 1,160: Line 1,286:
end
end


-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).
-- When suppress_tr is true, do not show or generate any transliteration
if data.tr[1] == "" or data.tr[1] == "-" then
if data.suppress_tr then
data.tr[1] = nil
data.tr[1] = nil
else
else
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction
-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).
phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()]
if data.tr[1] == "" or data.tr[1] == "-" then
data.tr[1] = nil
else
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction
phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()]


if phonetic_extraction then
if phonetic_extraction then
data.tr[1] = data.tr[1] or require(phonetic_extraction).getTranslit(export.remove_links(data.alt[1] or data.term[1]))
data.tr[1] = data.tr[1] or
require(phonetic_extraction).getTranslit(export.remove_links(data.alt[1] or data.term[1]))
elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then
-- Track whenever there is manual translit. The categories below like 'terms with redundant transliterations'
-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in
-- languages without any automatic translit, like Persian and Hebrew.
if data.tr[1] then
local full_code = lang:getFullCode()
end


elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then
if not nevercalltr then
-- Try to generate a transliteration.
local text = data.alt[1] or data.term[1]
if not link_tr then
text = export.remove_links(text, true)
end


if not nevercalltr then
local automated_tr = lang:transliterate(text, data.sc[1])
-- Try to generate a transliteration.
 
local text = data.alt[1] or data.term[1]
if automated_tr then
if not lang:link_tr(data.sc[1]) then
local manual_tr = data.tr[1]
text = export.remove_links(text, true)
 
end
if manual_tr then
if export.remove_links(manual_tr) == export.remove_links(automated_tr) then
local automated_tr, tr_categories
insert(cats, lang:getFullName() .. " terms with redundant transliterations")
automated_tr, data.tr_fail, tr_categories = lang:transliterate(text, data.sc[1])
else
-- Prevents Arabic root categories from flooding the tracking categories.
if automated_tr or data.tr_fail then
if NAMESPACE ~= "Category" then
local manual_tr = data.tr[1]
insert(cats,
lang:getFullName() .. " terms with non-redundant manual transliterations")
if manual_tr then
end
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then
insert(cats, lang:getFullName() .. " terms with redundant transliterations")
elseif not data.tr_fail then
-- Prevents Arabic root categories from flooding the tracking categories.
if NAMESPACE ~= 14 then -- Category:
insert(cats, lang:getFullName() .. " terms with non-redundant manual transliterations")
end
end
end
end
end
 
if not manual_tr or lang:overrideManualTranslit(data.sc[1]) then
if (not manual_tr) or lang:overrideManualTranslit(data.sc[1]) then
data.tr[1] = automated_tr
data.tr[1] = automated_tr
for _, category in ipairs(tr_categories) do
insert(cats, category)
end
end
end
end
Line 1,208: Line 1,342:


-- Link to the transliteration entry for languages that require this
-- Link to the transliteration entry for languages that require this
if data.tr[1] and lang:link_tr(data.sc[1]) and not (data.tr[1]:match("%[%[(.-)%]%]") or data.tr_fail) then
if data.tr[1] and link_tr and not data.tr[1]:match("%[%[(.-)%]%]") then
data.tr[1] = simple_link(
data.tr[1] = simple_link(
data.tr[1],
data.tr[1],
Line 1,220: Line 1,354:
srwc
srwc
)
)
elseif data.tr[1] and not (lang:link_tr(data.sc[1]) or data.tr_fail) then
elseif data.tr[1] and not link_tr then
-- Remove the pseudo-HTML tags added by remove_links.
-- Remove the pseudo-HTML tags added by remove_links.
data.tr[1] = data.tr[1]:gsub("</?link>", "")
data.tr[1] = data.tr[1]:gsub("</?link>", "")
Line 1,228: Line 1,362:
insert(output, export.format_link_annotations(data, face))
insert(output, export.format_link_annotations(data, face))


local categories = #cats > 0 and format_categories(cats, lang, "-", nil, nil, data.sc) or ""
if data.pretext then
insert(output, 1, data.pretext)
end
if data.posttext then
insert(output, data.posttext)
end
 
local categories = cats[1] and format_categories(cats, lang, "-", nil, nil, data.sc) or ""


output = concat(output)
output = concat(output)
if show_qualifiers then
if show_qualifiers or data.show_qualifiers then
output = add_qualifiers_and_refs_to_term(data, output)
output = add_qualifiers_and_refs_to_term(data, output)
end
end
Line 1,260: Line 1,401:
function(c1, c2, c3)
function(c1, c2, c3)
-- Don't remove files.
-- Don't remove files.
for _, false_positive in ipairs({"file", "image"}) do
for _, false_positive in ipairs({ "file", "image" }) do
if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end
if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end
end
end
-- Remove categories completely.
-- Remove categories completely.
for _, false_positive in ipairs({"category", "cat"}) do
for _, false_positive in ipairs({ "category", "cat" }) do
if c2:lower():match("^" .. false_positive .. ":") then return "" end
if c2:lower():match("^" .. false_positive .. ":") then return "" end
end
end
Line 1,287: Line 1,428:
error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.")
error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.")
end
end
 
local target, section = get_fragment((link:gsub("_", " ")))
local target, section = get_fragment((link:gsub("_", " ")))
 
if not section then
if not section then
error("No \"#\" delineating a section name")
error("No \"#\" delineating a section name")