Module:links: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
Line 15: Line 15:
local pron_qualifier_module = "Module:pron qualifier"
local pron_qualifier_module = "Module:pron qualifier"


local anchorEncode = mw.uri.anchorEncode
local anchor_encode = require("Module:memoize")(mw.uri.anchorEncode, true)
local concat = table.concat
local concat = table.concat
local decode_entities = m_str_utils.decode_entities
local decode_entities = m_str_utils.decode_entities
Line 22: Line 22:
local encode_entities = require("Module:string/encode entities") -- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for.
local encode_entities = require("Module:string/encode entities") -- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for.
local insert = table.insert
local insert = table.insert
local ipairs = ipairs
local load_data = mw.loadData
local match = string.match
local match = string.match
local new_title = mw.title.new
local new_title = mw.title.new
local pairs = pairs
local remove = table.remove
local remove = table.remove
local shallowcopy = require("Module:table").shallowCopy
local shallow_copy = require("Module:table").shallowCopy
local split = m_str_utils.split
local split = m_str_utils.split
local sub = string.sub
local sub = string.sub
local toNFC = mw.ustring.toNFC
local toNFC = mw.ustring.toNFC
local tostring = tostring
local trim -- defined below
local trim -- defined below
local type = type
local ulower = m_str_utils.lower
local ulower = m_str_utils.lower
local umatch = m_str_utils.match
local umatch = m_str_utils.match
local unstrip = mw.text.unstrip
local unstrip = mw.text.unstrip
local u = m_str_utils.char
local u = m_str_utils.char
local TEMP_UNDERSCORE = u(0xFFF0)
local TEMP_UNDERSCORE = u(0xFFF0)


Line 104: Line 110:
local function get_fragment(text)
local function get_fragment(text)
text = escape(text, "#")
text = escape(text, "#")
-- Replace numeric character references with the corresponding character ( → '),
-- Replace numeric character references with the corresponding character (' → '),
-- as they contain #, which causes the numeric character reference to be
-- as they contain #, which causes the numeric character reference to be
-- misparsed (wa'a → waa → pagename wa&, fragment 29;a).
-- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a).
text = decode_entities(text)
text = decode_entities(text)
local target, fragment = text:match("^(..-)#(.+)$")
local target, fragment = text:match("^(..-)#(.+)$")
Line 185: Line 191:


elseif target:find("^Reconstruction:") then
elseif target:find("^Reconstruction:") then
return "wikt:" .. target
return target


-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
Line 191: Line 197:
if lang:getFullCode() == "und" then
if lang:getFullCode() == "und" then
return nil
return nil
else
elseif not lang:hasType("conlang") then
target = "wikt:Reconstruction:" .. lang:getFullName() .. "/" .. target
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target
end
end
-- Reconstructed languages and substrates require an initial *.
-- Reconstructed languages and substrates require an initial *.
Line 204: Line 210:
return target
return target
else
else
--[[error("The specified language " .. lang:getCanonicalName()
error("The specified language " .. lang:getCanonicalName()
.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")]]
.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
return "*" .. target
end
end


elseif lang:hasType("appendix-constructed") then
elseif lang:hasType("appendix-constructed") then
target = "wikt:Appendix:" .. lang:getFullName() .. "/" .. target
target = "Appendix:" .. lang:getFullName() .. "/" .. target
end
if not lang:hasType("conlang") then
target = "wikt:" .. target
end
end
 
return target, escaped > 0
return target, escaped > 0
end
end


-- Make a link from a given link's parts
-- Make a link from a given link's parts
local function make_link(link, lang, sc, id, isolated, plain, cats, no_alt_ast) --, to_wikt)
local function make_link(link, lang, sc, id, isolated, plain, cats, no_alt_ast)
-- Convert percent encoding to plaintext.
-- Convert percent encoding to plaintext.
link.target = decode_uri(link.target, "PATH")
link.target = decode_uri(link.target, "PATH")
Line 250: Line 259:
-- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}).
-- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}).
-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.
-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.
if link.display ~= auto_display then
if link.display == auto_display then
insert(cats, lang:getFullName() .. " links with redundant alt parameters")
else
local ok, check
local ok, check
if no_alt_ast then
if no_alt_ast then
Line 257: Line 268:
ok = true
ok = true
check = export.get_link_page(orig_display, lang, sc, plain)
check = export.get_link_page(orig_display, lang, sc, plain)
end
if ok and link.target == check then
insert(cats, lang:getFullName() .. " links with redundant target parameters")
end
end
end
end
Line 272: Line 286:
-- language then return a "self-link" like the software does.
-- language then return a "self-link" like the software does.
if link.target == mw.title.getCurrentTitle().prefixedText then
if link.target == mw.title.getCurrentTitle().prefixedText then
local fragment, current_L2 = link.fragment, require("Module:utilities").get_current_L2()
local fragment, current_L2 = link.fragment, require("Module:pages").get_current_L2()
if (
if (
fragment and fragment == current_L2 or
fragment and fragment == current_L2 or
not (id or fragment) and (lang:getCode() == "und" or lang:getCanonicalName() == current_L2)
not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2)
) then
) then
return tostring(mw.html.create("strong")
return tostring(mw.html.create("strong")
Line 290: Line 304:
prefix = prefix and ulower(prefix)
prefix = prefix and ulower(prefix)


if prefix ~= "category" and not (prefix and mw.loadData("Module:data/interwikis")[prefix]) then
if prefix ~= "category" and not (prefix and load_data("Module:data/interwikis")[prefix]) then
if (link.fragment or link.target:find("#$")) and not plain then
if (link.fragment or link.target:sub(-1) == "#") and not plain then
if cats then
if cats then
insert(cats, lang:getCanonicalName() .. " links with manual fragments")
insert(cats, lang:getFullName() .. " links with manual fragments")
end
end
end
end


if (not link.fragment) and lang:getCode() ~= "und" then
if not link.fragment then
if id then
if id then
link.fragment = require("Module:senseid").anchor(lang, id)
link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or require("Module:anchors").language_anchor(lang, id)
elseif not (link.target:find("^Appendix:") or link.target:find("^Reconstruction:") or plain) then
elseif lang:getFullCode() ~= "und" and not (link.target:find("^Appendix:") or link.target:find("^Reconstruction:")) then
link.fragment = "" -- lang:getCanonicalName()
link.fragment = anchor_encode(lang:getFullName())
end
end
elseif plain and id then
link.fragment = id
end
end
end
end
Line 312: Line 324:
link.display = "]" .. link.display .. "["
link.display = "]" .. link.display .. "["
end
end
 
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
link.target = link.target:gsub("^(:?)(.*)", function(m1, m2)
link.target = link.target:gsub("^(:?)(.*)", function(m1, m2)
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")
end)
end)
link.fragment = "#" .. lang:getCanonicalName()
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
 
if lang:hasType("conlang") then
return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
link.wiki = "Contionary:"
else
link.wiki = "wikt:"
end
if link.target:match("^%*") and lang:hasType("reconstructed") and not lang:hasType("conlang") then
link.target = link.target:gsub("^%*","Reconstruction:" .. lang:getCanonicalName() .. "/")
link.fragment = ""
end
--[[if link.target:match("^%*") and lang:hasType("reconstructed") and to_wikt then
link.target = link.target:gsub("^%*","Reconstruction:" .. lang:getCanonicalName() .. "/")
link.fragment = ""
end]]
--return "[[" .. (to_wikt and "wikt:" or "Contionary:") .. link.target .. link.fragment .. "|" .. link.display .. "]]"
return "[[" .. link.wiki .. link.target .. link.fragment .. "|" .. link.display .. "]]"
end
end


Line 378: Line 371:
local all_reconstructed = false
local all_reconstructed = false
if not plain then
if not plain then
-- anchorEncode removes links etc.
-- anchor_encode removes links etc.
if anchorEncode(text):sub(1, 1) == "*" then
if anchor_encode(text):sub(1, 1) == "*" then
all_reconstructed = true
all_reconstructed = true
end
end
Line 388: Line 381:
if data.alt then
if data.alt then
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored alt:", data.alt, "lang:", data.lang:getCode())
"ignored alt:", data.alt, "lang:", data.lang:getFullCode())
if data.cats then
if data.cats then
insert(data.cats, data.lang:getCanonicalName() .. " links with ignored alt parameters")
insert(data.cats, data.lang:getFullName() .. " links with ignored alt parameters")
end
end
end
end
Line 396: Line 389:
if data.id then
if data.id then
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
mw.log("(from Module:links)", "text with embedded wikilinks:", text,
"ignored id:", data.id, "lang:", data.lang:getCode())
"ignored id:", data.id, "lang:", data.lang:getFullCode())
if data.cats then
if data.cats then
insert(data.cats, data.lang:getCanonicalName() .. " links with ignored id parameters")
insert(data.cats, data.lang:getFullName() .. " links with ignored id parameters")
end
end
end
end
Line 466: Line 459:
end
end
data.term, data.alt = term, pipe and pipe ~= #text and sub(text, pipe + 1) or nil
data.term, data.alt = term, pipe and pipe ~= #text and sub(text, pipe + 1) or nil
if data.cats then
if data.suppress_redundant_wikilink_cat and data.suppress_redundant_wikilink_cat(data.term, data.alt) then
return
end
insert(data.cats, data.lang:getFullName() .. " links with redundant wikilinks")
end
end
end


Line 522: Line 521:
text = text and trim(text)
text = text and trim(text)
data.alt = data.alt and trim(data.alt)
data.alt = data.alt and trim(data.alt)
return make_link({target = text, display = data.alt, fragment = data.fragment}, data.lang, data.sc, data.id, true, nil, data.cats, data.no_alt_ast) --, data.to_wikt)
return make_link({target = text, display = data.alt, fragment = data.fragment}, data.lang, data.sc, data.id, true, nil, data.cats, data.no_alt_ast)
end
end


Line 593: Line 592:
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">',
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">',
'</span><span class="mention-gloss-double-quote">”</span>' }
'</span><span class="mention-gloss-double-quote">”</span>' }
if type(text) == "string" and text:find("^''[^'].*''$") then
-- Temporary tracking for mention glosses that are entirely italicized or bolded, which is probably
-- wrong. (Note that this will also find bolded mention glosses since they use triple apostrophes.)
end
elseif item_type == "tr" then
elseif item_type == "tr" then
if face == "term" then
if face == "term" then
tag = { '<span lang="' .. lang:getCode() .. '" class="tr mention-tr Latn">',
tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr mention-tr Latn">',
'</span>' }
'</span>' }
else
else
tag = { '<span lang="' .. lang:getCode() .. '" class="tr Latn">', '</span>' }
tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr Latn">', '</span>' }
end
end
elseif item_type == "ts" then
elseif item_type == "ts" then
Line 684: Line 687:
end
end


pos_tags = pos_tags or mw.loadData("Module:links/data").pos_tags
pos_tags = pos_tags or load_data("Module:links/data").pos_tags
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
end
end
Line 750: Line 753:
sc = script_object,
sc = script_object,
track_sc = boolean,
track_sc = boolean,
no_nonstandard_sc_cat = boolean,
fragment = link_fragment
fragment = link_fragment
id = sense_id,
id = sense_id,
Line 775: Line 779:
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes).
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes).
* If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==]
* If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==]
function export.full_link(data, face, allow_self_link, show_qualifiers) --, to_wikt)
function export.full_link(data, face, allow_self_link, show_qualifiers)
-- Prevent data from being destructively modified.
-- Prevent data from being destructively modified.
local data = shallowcopy(data)
local data = shallow_copy(data)


if type(data) ~= "table" then
if type(data) ~= "table" then
Line 819: Line 823:
local display_term = data.alt[i] or data.term[i]
local display_term = data.alt[i] or data.term[i]
local best = data.lang:findBestScript(display_term)
local best = data.lang:findBestScript(display_term)
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]]
if (
if (
not data.no_nonstandard_sc_cat and
best:getCode() == "None" and
best:getCode() == "None" and
require("Module:scripts").findBestScriptWithoutLang(display_term):getCode() ~= "None"
require("Module:scripts").findBestScriptWithoutLang(display_term):getCode() ~= "None"
) then
) then
insert(data.cats, data.lang:getCanonicalName() .. " terms in nonstandard scripts")
insert(data.cats, data.lang:getFullName() .. " terms in nonstandard scripts")
end
end
if not data.sc[i] then
if not data.sc[i] then
Line 830: Line 836:
elseif data.track_sc then
elseif data.track_sc then
if data.sc[i]:getCode() == best:getCode() then
if data.sc[i]:getCode() == best:getCode() then
insert(data.cats, data.lang:getCanonicalName() .. " terms with redundant script codes")
insert(data.cats, data.lang:getFullName() .. " terms with redundant script codes")
else
else
insert(data.cats, data.lang:getCanonicalName() .. " terms with non-redundant manual script codes")
insert(data.cats, data.lang:getFullName() .. " terms with non-redundant manual script codes")
end
end
end
end
Line 908: Line 914:
no_store .. " "
no_store .. " "


class = "form-of lang-" .. data.lang:getCode() .. " " .. accel
class = "form-of lang-" .. data.lang:getFullCode() .. " " .. accel
end
end


Line 929: Line 935:
cats = data.cats,
cats = data.cats,
no_alt_ast = data.no_alt_ast,
no_alt_ast = data.no_alt_ast,
--to_wikt = data.to_wikt
suppress_redundant_wikilink_cat = data.suppress_redundant_wikilink_cat,
}
}
link = require("Module:script utilities").tag_text(
link = require("Module:script utilities").tag_text(
Line 945: Line 951:
break
break
elseif mw.title.getCurrentTitle().nsText ~= "Template" then
elseif mw.title.getCurrentTitle().nsText ~= "Template" then
insert(data.cats, data.lang:getCanonicalName() .. " term requests")
insert(data.cats, data.lang:getFullName() .. " term requests")
end
end
link = "<small>[Term?]</small>"
link = "<small>[Term?]</small>"
Line 951: Line 957:
end
end
insert(output, link)
insert(output, link)
if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\"></span>") end
if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\">&nbsp;/ </span>") end
end
end


Line 957: Line 963:
if data.tr[1] == "" or data.tr[1] == "-" then
if data.tr[1] == "" or data.tr[1] == "-" then
data.tr[1] = nil
data.tr[1] = nil
else
else
local phonetic_extraction = mw.loadData("Module:links/data").phonetic_extraction
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction
phonetic_extraction = phonetic_extraction[data.lang:getCode()] or phonetic_extraction[data.lang:getCode()]
phonetic_extraction = phonetic_extraction[data.lang:getCode()] or phonetic_extraction[data.lang:getFullCode()]


if phonetic_extraction then
if phonetic_extraction then
Line 969: Line 976:
-- languages without any automatic translit, like Persian and Hebrew.
-- languages without any automatic translit, like Persian and Hebrew.


-- Try to generate a transliteration, unless transliteration has been supplied and data.no_check_redundant_translit is
-- Try to generate a transliteration.
-- given. (Checking for redundant transliteration can use up significant amounts of memory so we don't want to do it
local text = data.alt[1] or data.term[1]
-- if memory is tight. `no_check_redundant_translit` is currently set when called ultimately from
if not data.lang:link_tr(data.sc[1]) then
-- {{multitrans|...|no-check-redundant-translit=1}}.)
text = export.remove_links(text, true)
if not (data.tr[1] and data.no_check_redundant_translit) then
end
local text = data.alt[1] or data.term[1]
 
if not data.lang:link_tr(data.sc[1]) then
local automated_tr, tr_categories
text = export.remove_links(text, true)
automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1])
end


local automated_tr, tr_categories
if automated_tr or data.tr_fail then
automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1])
local manual_tr = data.tr[1]


if automated_tr or data.tr_fail then
if manual_tr then
local manual_tr = data.tr[1]
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then
insert(data.cats, data.lang:getFullName() .. " terms with redundant transliterations")
if (not manual_tr) or data.lang:overrideManualTranslit(data.sc[1]) then
elseif not data.tr_fail then
data.tr[1] = automated_tr
-- Prevents Arabic root categories from flooding the tracking categories.
for _, category in ipairs(tr_categories) do
if mw.title.getCurrentTitle().nsText ~= "Category" then
insert(data.cats, category)
insert(data.cats, data.lang:getFullName() .. " terms with non-redundant manual transliterations")
end
end
end
end
if (not manual_tr) or data.lang:overrideManualTranslit(data.sc[1]) then
data.tr[1] = automated_tr
for _, category in ipairs(tr_categories) do
insert(data.cats, category)
end
end
end
end
Line 1,001: Line 1,014:
lang = data.lang,
lang = data.lang,
term = data.tr[1],
term = data.tr[1],
suppress_redundant_wikilink_cat = data.suppress_redundant_wikilink_cat,
sc = require("Module:scripts").getByCode("Latn")
sc = require("Module:scripts").getByCode("Latn")
}
}

Navigation menu