47,726
edits
![]() | We're back! Sorry, bad combo of sickness, funeral and a month-long trip abroad. The site is back now. ![]() |
No edit summary |
No edit summary |
||
Line 15: | Line 15: | ||
local pron_qualifier_module = "Module:pron qualifier" | local pron_qualifier_module = "Module:pron qualifier" | ||
local | local anchor_encode = require("Module:memoize")(mw.uri.anchorEncode, true) | ||
local concat = table.concat | local concat = table.concat | ||
local decode_entities = m_str_utils.decode_entities | local decode_entities = m_str_utils.decode_entities | ||
Line 22: | Line 22: | ||
local encode_entities = require("Module:string/encode entities") -- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for. | local encode_entities = require("Module:string/encode entities") -- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for. | ||
local insert = table.insert | local insert = table.insert | ||
local ipairs = ipairs | |||
local load_data = mw.loadData | |||
local match = string.match | local match = string.match | ||
local new_title = mw.title.new | local new_title = mw.title.new | ||
local pairs = pairs | |||
local remove = table.remove | local remove = table.remove | ||
local | local shallow_copy = require("Module:table").shallowCopy | ||
local split = m_str_utils.split | local split = m_str_utils.split | ||
local sub = string.sub | local sub = string.sub | ||
local toNFC = mw.ustring.toNFC | local toNFC = mw.ustring.toNFC | ||
local tostring = tostring | |||
local trim -- defined below | local trim -- defined below | ||
local type = type | |||
local ulower = m_str_utils.lower | local ulower = m_str_utils.lower | ||
local umatch = m_str_utils.match | local umatch = m_str_utils.match | ||
local unstrip = mw.text.unstrip | local unstrip = mw.text.unstrip | ||
local u = m_str_utils.char | local u = m_str_utils.char | ||
local TEMP_UNDERSCORE = u(0xFFF0) | local TEMP_UNDERSCORE = u(0xFFF0) | ||
Line 104: | Line 110: | ||
local function get_fragment(text) | local function get_fragment(text) | ||
text = escape(text, "#") | text = escape(text, "#") | ||
-- Replace numeric character references with the corresponding character (&# | -- Replace numeric character references with the corresponding character (' → '), | ||
-- as they contain #, which causes the numeric character reference to be | -- as they contain #, which causes the numeric character reference to be | ||
-- misparsed (wa'a → wa&# | -- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a). | ||
text = decode_entities(text) | text = decode_entities(text) | ||
local target, fragment = text:match("^(..-)#(.+)$") | local target, fragment = text:match("^(..-)#(.+)$") | ||
Line 185: | Line 191: | ||
elseif target:find("^Reconstruction:") then | elseif target:find("^Reconstruction:") then | ||
return | return target | ||
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however. | -- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however. | ||
Line 191: | Line 197: | ||
if lang:getFullCode() == "und" then | if lang:getFullCode() == "und" then | ||
return nil | return nil | ||
elseif not lang:hasType("conlang") then | |||
target = " | target = "Reconstruction:" .. lang:getFullName() .. "/" .. target | ||
end | end | ||
-- Reconstructed languages and substrates require an initial *. | -- Reconstructed languages and substrates require an initial *. | ||
Line 204: | Line 210: | ||
return target | return target | ||
else | else | ||
error("The specified language " .. lang:getCanonicalName() | |||
.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.") | .. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.") | ||
end | end | ||
elseif lang:hasType("appendix-constructed") then | elseif lang:hasType("appendix-constructed") then | ||
target = " | target = "Appendix:" .. lang:getFullName() .. "/" .. target | ||
end | |||
if not lang:hasType("conlang") then | |||
target = "wikt:" .. target | |||
end | end | ||
return target, escaped > 0 | return target, escaped > 0 | ||
end | end | ||
-- Make a link from a given link's parts | -- Make a link from a given link's parts | ||
local function make_link(link, lang, sc, id, isolated, plain, cats, no_alt_ast | local function make_link(link, lang, sc, id, isolated, plain, cats, no_alt_ast) | ||
-- Convert percent encoding to plaintext. | -- Convert percent encoding to plaintext. | ||
link.target = decode_uri(link.target, "PATH") | link.target = decode_uri(link.target, "PATH") | ||
Line 250: | Line 259: | ||
-- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}). | -- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}). | ||
-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *. | -- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *. | ||
if link.display | if link.display == auto_display then | ||
insert(cats, lang:getFullName() .. " links with redundant alt parameters") | |||
else | |||
local ok, check | local ok, check | ||
if no_alt_ast then | if no_alt_ast then | ||
Line 257: | Line 268: | ||
ok = true | ok = true | ||
check = export.get_link_page(orig_display, lang, sc, plain) | check = export.get_link_page(orig_display, lang, sc, plain) | ||
end | |||
if ok and link.target == check then | |||
insert(cats, lang:getFullName() .. " links with redundant target parameters") | |||
end | end | ||
end | end | ||
Line 272: | Line 286: | ||
-- language then return a "self-link" like the software does. | -- language then return a "self-link" like the software does. | ||
if link.target == mw.title.getCurrentTitle().prefixedText then | if link.target == mw.title.getCurrentTitle().prefixedText then | ||
local fragment, current_L2 = link.fragment, require("Module: | local fragment, current_L2 = link.fragment, require("Module:pages").get_current_L2() | ||
if ( | if ( | ||
fragment and fragment == current_L2 or | fragment and fragment == current_L2 or | ||
not (id or fragment) and (lang: | not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2) | ||
) then | ) then | ||
return tostring(mw.html.create("strong") | return tostring(mw.html.create("strong") | ||
Line 290: | Line 304: | ||
prefix = prefix and ulower(prefix) | prefix = prefix and ulower(prefix) | ||
if prefix ~= "category" and not (prefix and | if prefix ~= "category" and not (prefix and load_data("Module:data/interwikis")[prefix]) then | ||
if (link.fragment or link.target: | if (link.fragment or link.target:sub(-1) == "#") and not plain then | ||
if cats then | if cats then | ||
insert(cats, lang: | insert(cats, lang:getFullName() .. " links with manual fragments") | ||
end | end | ||
end | end | ||
if | if not link.fragment then | ||
if id then | if id then | ||
link.fragment = require("Module: | link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or require("Module:anchors").language_anchor(lang, id) | ||
elseif not (link.target:find("^Appendix:") or link.target:find("^Reconstruction:") | elseif lang:getFullCode() ~= "und" and not (link.target:find("^Appendix:") or link.target:find("^Reconstruction:")) then | ||
link.fragment = | link.fragment = anchor_encode(lang:getFullName()) | ||
end | end | ||
end | end | ||
end | end | ||
Line 312: | Line 324: | ||
link.display = "]" .. link.display .. "[" | link.display = "]" .. link.display .. "[" | ||
end | end | ||
link.target = link.target:gsub("^(:?)(.*)", function(m1, m2) | link.target = link.target:gsub("^(:?)(.*)", function(m1, m2) | ||
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}") | return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}") | ||
end) | end) | ||
link.fragment = | link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}") | ||
return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]" | |||
end | end | ||
Line 378: | Line 371: | ||
local all_reconstructed = false | local all_reconstructed = false | ||
if not plain then | if not plain then | ||
-- | -- anchor_encode removes links etc. | ||
if | if anchor_encode(text):sub(1, 1) == "*" then | ||
all_reconstructed = true | all_reconstructed = true | ||
end | end | ||
Line 388: | Line 381: | ||
if data.alt then | if data.alt then | ||
mw.log("(from Module:links)", "text with embedded wikilinks:", text, | mw.log("(from Module:links)", "text with embedded wikilinks:", text, | ||
"ignored alt:", data.alt, "lang:", data.lang: | "ignored alt:", data.alt, "lang:", data.lang:getFullCode()) | ||
if data.cats then | if data.cats then | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " links with ignored alt parameters") | ||
end | end | ||
end | end | ||
Line 396: | Line 389: | ||
if data.id then | if data.id then | ||
mw.log("(from Module:links)", "text with embedded wikilinks:", text, | mw.log("(from Module:links)", "text with embedded wikilinks:", text, | ||
"ignored id:", data.id, "lang:", data.lang: | "ignored id:", data.id, "lang:", data.lang:getFullCode()) | ||
if data.cats then | if data.cats then | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " links with ignored id parameters") | ||
end | end | ||
end | end | ||
Line 466: | Line 459: | ||
end | end | ||
data.term, data.alt = term, pipe and pipe ~= #text and sub(text, pipe + 1) or nil | data.term, data.alt = term, pipe and pipe ~= #text and sub(text, pipe + 1) or nil | ||
if data.cats then | |||
if data.suppress_redundant_wikilink_cat and data.suppress_redundant_wikilink_cat(data.term, data.alt) then | |||
return | |||
end | |||
insert(data.cats, data.lang:getFullName() .. " links with redundant wikilinks") | |||
end | |||
end | end | ||
Line 522: | Line 521: | ||
text = text and trim(text) | text = text and trim(text) | ||
data.alt = data.alt and trim(data.alt) | data.alt = data.alt and trim(data.alt) | ||
return make_link({target = text, display = data.alt, fragment = data.fragment}, data.lang, data.sc, data.id, true, nil, data.cats, data.no_alt_ast | return make_link({target = text, display = data.alt, fragment = data.fragment}, data.lang, data.sc, data.id, true, nil, data.cats, data.no_alt_ast) | ||
end | end | ||
Line 593: | Line 592: | ||
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | ||
'</span><span class="mention-gloss-double-quote">”</span>' } | '</span><span class="mention-gloss-double-quote">”</span>' } | ||
if type(text) == "string" and text:find("^''[^'].*''$") then | |||
-- Temporary tracking for mention glosses that are entirely italicized or bolded, which is probably | |||
-- wrong. (Note that this will also find bolded mention glosses since they use triple apostrophes.) | |||
end | |||
elseif item_type == "tr" then | elseif item_type == "tr" then | ||
if face == "term" then | if face == "term" then | ||
tag = { '<span lang="' .. lang: | tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr mention-tr Latn">', | ||
'</span>' } | '</span>' } | ||
else | else | ||
tag = { '<span lang="' .. lang: | tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr Latn">', '</span>' } | ||
end | end | ||
elseif item_type == "ts" then | elseif item_type == "ts" then | ||
Line 684: | Line 687: | ||
end | end | ||
pos_tags = pos_tags or | pos_tags = pos_tags or load_data("Module:links/data").pos_tags | ||
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) | insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) | ||
end | end | ||
Line 750: | Line 753: | ||
sc = script_object, | sc = script_object, | ||
track_sc = boolean, | track_sc = boolean, | ||
no_nonstandard_sc_cat = boolean, | |||
fragment = link_fragment | fragment = link_fragment | ||
id = sense_id, | id = sense_id, | ||
Line 775: | Line 779: | ||
* If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes). | * If <code class="n">no_alt_ast</code> is specified, then the alt text does not need to contain an asterisk if the language is reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't display asterisks (e.g. number boxes). | ||
* If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==] | * If <code class="n">show_qualifiers</code> is specified, left and right qualifiers and references will be displayed. (This is for compatibility reasons, since a fair amount of code stores qualifiers and/or references in these fields and displays them itself, expecting {{code|lua|full_link()}} to ignore them.]==] | ||
function export.full_link(data, face, allow_self_link, show_qualifiers | function export.full_link(data, face, allow_self_link, show_qualifiers) | ||
-- Prevent data from being destructively modified. | -- Prevent data from being destructively modified. | ||
local data = | local data = shallow_copy(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
Line 819: | Line 823: | ||
local display_term = data.alt[i] or data.term[i] | local display_term = data.alt[i] or data.term[i] | ||
local best = data.lang:findBestScript(display_term) | local best = data.lang:findBestScript(display_term) | ||
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]] | |||
if ( | if ( | ||
not data.no_nonstandard_sc_cat and | |||
best:getCode() == "None" and | best:getCode() == "None" and | ||
require("Module:scripts").findBestScriptWithoutLang(display_term):getCode() ~= "None" | require("Module:scripts").findBestScriptWithoutLang(display_term):getCode() ~= "None" | ||
) then | ) then | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " terms in nonstandard scripts") | ||
end | end | ||
if not data.sc[i] then | if not data.sc[i] then | ||
Line 830: | Line 836: | ||
elseif data.track_sc then | elseif data.track_sc then | ||
if data.sc[i]:getCode() == best:getCode() then | if data.sc[i]:getCode() == best:getCode() then | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " terms with redundant script codes") | ||
else | else | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " terms with non-redundant manual script codes") | ||
end | end | ||
end | end | ||
Line 908: | Line 914: | ||
no_store .. " " | no_store .. " " | ||
class = "form-of lang-" .. data.lang: | class = "form-of lang-" .. data.lang:getFullCode() .. " " .. accel | ||
end | end | ||
Line 929: | Line 935: | ||
cats = data.cats, | cats = data.cats, | ||
no_alt_ast = data.no_alt_ast, | no_alt_ast = data.no_alt_ast, | ||
suppress_redundant_wikilink_cat = data.suppress_redundant_wikilink_cat, | |||
} | } | ||
link = require("Module:script utilities").tag_text( | link = require("Module:script utilities").tag_text( | ||
Line 945: | Line 951: | ||
break | break | ||
elseif mw.title.getCurrentTitle().nsText ~= "Template" then | elseif mw.title.getCurrentTitle().nsText ~= "Template" then | ||
insert(data.cats, data.lang: | insert(data.cats, data.lang:getFullName() .. " term requests") | ||
end | end | ||
link = "<small>[Term?]</small>" | link = "<small>[Term?]</small>" | ||
Line 951: | Line 957: | ||
end | end | ||
insert(output, link) | insert(output, link) | ||
if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\"> | if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\"> / </span>") end | ||
end | end | ||
Line 957: | Line 963: | ||
if data.tr[1] == "" or data.tr[1] == "-" then | if data.tr[1] == "" or data.tr[1] == "-" then | ||
data.tr[1] = nil | data.tr[1] = nil | ||
else | else | ||
local phonetic_extraction = | local phonetic_extraction = load_data("Module:links/data").phonetic_extraction | ||
phonetic_extraction = phonetic_extraction[data.lang:getCode()] or phonetic_extraction[data.lang: | phonetic_extraction = phonetic_extraction[data.lang:getCode()] or phonetic_extraction[data.lang:getFullCode()] | ||
if phonetic_extraction then | if phonetic_extraction then | ||
Line 969: | Line 976: | ||
-- languages without any automatic translit, like Persian and Hebrew. | -- languages without any automatic translit, like Persian and Hebrew. | ||
-- Try to generate a transliteration | -- Try to generate a transliteration. | ||
local text = data.alt[1] or data.term[1] | |||
if not data.lang:link_tr(data.sc[1]) then | |||
text = export.remove_links(text, true) | |||
end | |||
local automated_tr, tr_categories | |||
automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1]) | |||
if automated_tr or data.tr_fail then | |||
local manual_tr = data.tr[1] | |||
if | if manual_tr then | ||
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then | |||
insert(data.cats, data.lang:getFullName() .. " terms with redundant transliterations") | |||
elseif not data.tr_fail then | |||
-- Prevents Arabic root categories from flooding the tracking categories. | |||
if mw.title.getCurrentTitle().nsText ~= "Category" then | |||
insert(data.cats, | insert(data.cats, data.lang:getFullName() .. " terms with non-redundant manual transliterations") | ||
end | end | ||
end | |||
end | |||
if (not manual_tr) or data.lang:overrideManualTranslit(data.sc[1]) then | |||
data.tr[1] = automated_tr | |||
for _, category in ipairs(tr_categories) do | |||
insert(data.cats, category) | |||
end | end | ||
end | end | ||
Line 1,001: | Line 1,014: | ||
lang = data.lang, | lang = data.lang, | ||
term = data.tr[1], | term = data.tr[1], | ||
suppress_redundant_wikilink_cat = data.suppress_redundant_wikilink_cat, | |||
sc = require("Module:scripts").getByCode("Latn") | sc = require("Module:scripts").getByCode("Latn") | ||
} | } |