Module:links: Difference between revisions
No edit summary |
No edit summary |
||
| (40 intermediate revisions by the same user not shown) | |||
| Line 12: | Line 12: | ||
[[Module:gender and number]] | [[Module:gender and number]] | ||
]=] | ]=] | ||
local | |||
local anchors_module = "Module:anchors" | |||
local form_of_module = "Module:form of" | |||
local gender_and_number_module = "Module:getn" | |||
local languages_module = "Module:languages" | |||
local load_module = "Module:load" | |||
local memoize_module = "Module:memoize" | |||
local pages_module = "Module:pages" | |||
local pron_qualifier_module = "Module:pron qualifier" | local pron_qualifier_module = "Module:pron qualifier" | ||
local scripts_module = "Module:scripts" | |||
local script_utilities_module = "Module:script utilities" | |||
local string_encode_entities_module = "Module:string/encode entities" | |||
local string_utilities_module = "Module:string utilities" | |||
local table_module = "Module:table" | |||
local utilities_module = "Module:utilities" | |||
local concat = table.concat | local concat = table.concat | ||
local find = string.find | local find = string.find | ||
local | local get_current_title = mw.title.getCurrentTitle | ||
local insert = table.insert | local insert = table.insert | ||
local ipairs = ipairs | local ipairs = ipairs | ||
local match = string.match | local match = string.match | ||
local new_title = mw.title.new | local new_title = mw.title.new | ||
local pairs = pairs | local pairs = pairs | ||
local remove = table.remove | local remove = table.remove | ||
local sub = string.sub | local sub = string.sub | ||
local toNFC = mw.ustring.toNFC | local toNFC = mw.ustring.toNFC | ||
local tostring = tostring | local tostring = tostring | ||
local type = type | local type = type | ||
local unstrip = mw.text.unstrip | local unstrip = mw.text.unstrip | ||
local | local NAMESPACE = get_current_title().nsText | ||
local function anchor_encode(...) | |||
anchor_encode = require(memoize_module)(mw.uri.anchorEncode, true) | |||
return anchor_encode(...) | |||
end | |||
local function decode_entities(...) | |||
decode_entities = require(string_utilities_module).decode_entities | |||
return decode_entities(...) | |||
end | |||
local function decode_uri(...) | |||
decode_uri = require(string_utilities_module).decode_uri | |||
return decode_uri(...) | |||
end | |||
-- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for. | |||
local function encode_entities(...) | |||
encode_entities = require(string_encode_entities_module) | |||
return encode_entities(...) | |||
end | |||
local function extend(...) | |||
extend = require(table_module).extend | |||
return extend(...) | |||
end | |||
local function find_best_script_without_lang(...) | |||
local | find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang | ||
return find_best_script_without_lang(...) | |||
end | |||
local function format_categories(...) | |||
format_categories = require(utilities_module).format_categories | |||
return format_categories(...) | |||
end | |||
local function format_genders(...) | |||
format_genders = require(gender_and_number_module).format_genders | |||
return format_genders(...) | |||
end | |||
local function format_qualifiers(...) | |||
format_qualifiers = require(pron_qualifier_module).format_qualifiers | |||
return format_qualifiers(...) | |||
end | |||
local function get_current_L2(...) | |||
get_current_L2 = require(pages_module).get_current_L2 | |||
return get_current_L2(...) | |||
end | |||
local function get_lang(...) | |||
get_lang = require(languages_module).getByCode | |||
return get_lang(...) | |||
end | |||
local function get_script(...) | |||
get_script = require(scripts_module).getByCode | |||
return get_script(...) | |||
end | |||
local function language_anchor(...) | |||
language_anchor = require(anchors_module).language_anchor | |||
return language_anchor(...) | |||
end | |||
local function load_data(...) | |||
load_data = require(load_module).load_data | |||
return load_data(...) | |||
end | |||
local function request_script(...) | |||
request_script = require(script_utilities_module).request_script | |||
return request_script(...) | |||
end | |||
local function shallow_copy(...) | |||
shallow_copy = require(table_module).shallowCopy | |||
return shallow_copy(...) | |||
end | |||
local function split(...) | |||
split = require(string_utilities_module).split | |||
return split(...) | |||
end | |||
local function tag_text(...) | |||
tag_text = require(script_utilities_module).tag_text | |||
return tag_text(...) | |||
end | |||
local function tag_translit(...) | |||
tag_translit = require(script_utilities_module).tag_translit | |||
return tag_translit(...) | |||
end | |||
local function trim(...) | |||
trim = require(string_utilities_module).trim | |||
return trim(...) | |||
end | |||
local function u(...) | |||
u = require(string_utilities_module).char | |||
return u(...) | |||
end | |||
local function ulower(...) | |||
ulower = require(string_utilities_module).lower | |||
return ulower(...) | |||
end | |||
local function umatch(...) | |||
umatch = require(string_utilities_module).match | |||
return umatch(...) | |||
end | |||
local m_headword_data | |||
local function get_headword_data() | |||
m_headword_data = load_data("Module:headword/data") | |||
return m_headword_data | |||
end | |||
local function selective_trim(...) | |||
-- Unconditionally trimmed charset. | -- Unconditionally trimmed charset. | ||
local always_trim = | local always_trim = | ||
"\194\128-\194\159" .. | "\194\128-\194\159" .. -- U+0080-009F (C1 control characters) | ||
"\194\173" .. | "\194\173" .. -- U+00AD (soft hyphen) | ||
"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters) | "\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters) | ||
"\226\129\166-\226\129\169" | "\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters) | ||
-- Standard trimmed charset. | -- Standard trimmed charset. | ||
local standard_trim = "%s" .. | local standard_trim = "%s" .. -- (default whitespace charset) | ||
"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces) | "\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces) | ||
always_trim | always_trim | ||
-- If there are non-whitespace characters, trim all characters in `standard_trim`. | -- If there are non-whitespace characters, trim all characters in `standard_trim`. | ||
-- Otherwise, only trim the characters in `always_trim`. | -- Otherwise, only trim the characters in `always_trim`. | ||
selective_trim = function(text) | |||
if text == "" then | if text == "" then | ||
return text | return text | ||
end | end | ||
local trimmed = | local trimmed = trim(text, standard_trim) | ||
if trimmed ~= "" then | if trimmed ~= "" then | ||
return trimmed | return trimmed | ||
end | end | ||
return | return trim(text, always_trim) | ||
end | end | ||
return selective_trim(...) | |||
end | end | ||
| Line 105: | Line 230: | ||
end | end | ||
return text | return text | ||
end | |||
--[==[Takes a wikilink and outputs the link target and display text. By default, the link target will be returned as a title object, but if `allow_bad_target` is set it will be returned as a string, and no check will be performed as to whether it is a valid link target.]==] | |||
function export.get_wikilink_parts(text, allow_bad_target) | |||
-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences. | |||
if ( -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]". | |||
not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[". | |||
find(text, "[[", 3, true) or | |||
find(text, "]]", 3, true) ~= #text - 1 | |||
) then | |||
return nil, nil | |||
end | |||
local pipe, title, display = find(text, "|", 3, true) | |||
if pipe then | |||
title, display = sub(text, 3, pipe - 1), sub(text, pipe + 1, -3) | |||
else | |||
title = sub(text, 3, -3) | |||
display = title | |||
end | |||
if allow_bad_target then | |||
return title, display | |||
end | |||
title = new_title(title) | |||
-- No title object means the target is invalid. | |||
if title == nil then | |||
return nil, nil | |||
-- If the link target starts with "#" then mw.title.new returns a broken | |||
-- title object, so grab the current title and give it the correct fragment. | |||
elseif title.prefixedText == "" then | |||
local fragment = title.fragment | |||
if fragment == "" then -- [[#]] isn't valid | |||
return nil, nil | |||
end | |||
title = get_current_title() | |||
title.fragment = fragment | |||
end | |||
return title, display | |||
end | end | ||
| Line 114: | Line 276: | ||
-- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a). | -- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a). | ||
text = decode_entities(text) | text = decode_entities(text) | ||
local target, fragment = text:match("^( | local target, fragment = text:match("^(.-)#(.+)$") | ||
target = target or text | target = target or text | ||
target = unescape(target, "#") | target = unescape(target, "#") | ||
| Line 124: | Line 286: | ||
function export.get_fragment(text) | function export.get_fragment(text) | ||
-- If there are no embedded links, process input. | -- If there are no embedded links, process input. | ||
local open = find(text, "[[", | local open = find(text, "[[", nil, true) | ||
if not open then | if not open then | ||
return get_fragment(text) | return get_fragment(text) | ||
| Line 131: | Line 293: | ||
if not close then | if not close then | ||
return get_fragment(text) | return get_fragment(text) | ||
-- If there is one, but it's redundant (i.e. encloses everything with no pipe), remove and process. | |||
elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then | elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then | ||
return get_fragment(sub(text, 3, -3)) | return get_fragment(sub(text, 3, -3)) | ||
| Line 139: | Line 301: | ||
end | end | ||
--[==[ | |||
function export. | Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes | ||
bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets | |||
beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix | |||
namespace. Returns up to three values: | |||
# the actual page to link to, or {nil} to not link to anything; | |||
# how the target should be displayed as, if the user didn't explicitly specify any display text; generally the | |||
same as the original target, but minus any anti-asterisk !!; | |||
# the value `true` if the target had a backslash-escaped * in it (FIXME: explain this more clearly). | |||
]==] | |||
function export.get_link_page_with_auto_display(target, lang, sc, plain) | |||
local orig_target = target | |||
if not target then | if not target then | ||
return nil | return nil | ||
end | end | ||
target = remove_formatting(target) | target = remove_formatting(target) | ||
-- | if target:sub(1, 1) == ":" then | ||
-- FIXME, the auto_display (second return value) should probably remove the colon | |||
-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.) | return target:sub(2), orig_target | ||
end | |||
if ( | |||
local prefix = target:match("^(.-):") | |||
-- Convert any escaped colons | |||
target = target:gsub("\\:", ":") | |||
return | if prefix then | ||
-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then | |||
-- return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term | |||
-- to a category.) | |||
prefix = ulower(trim(prefix)) | |||
if prefix ~= "" and ( | |||
load_data("Module:data/namespaces")[prefix] or | |||
load_data("Module:data/interwikis")[prefix] | |||
) then | |||
return target, orig_target | |||
end | end | ||
end | end | ||
-- Check if the term is reconstructed and remove any asterisk. Otherwise, handle the escapes. | -- Check if the term is reconstructed and remove any asterisk. Also check for anti-asterisk (!!). | ||
local reconstructed, escaped | -- Otherwise, handle the escapes. | ||
local reconstructed, escaped, anti_asterisk | |||
if not plain then | if not plain then | ||
target, reconstructed = target:gsub("^%*(.)", "%1") | target, reconstructed = target:gsub("^%*(.)", "%1") | ||
if reconstructed == 0 then | |||
target, anti_asterisk = target:gsub("^!!(.)", "%1") | |||
if anti_asterisk == 1 then | |||
-- Remove !! from original. FIXME! We do it this way because the call to remove_formatting() above | |||
-- may cause non-initial !! to be interpreted as anti-asterisks. We should surely move the | |||
-- remove_formatting() call later. | |||
orig_target = orig_target:gsub("^!!", "") | |||
end | |||
end | |||
end | end | ||
target, escaped = target:gsub("^(\\-)\\%*", "%1*") | target, escaped = target:gsub("^(\\-)\\%*", "%1*") | ||
if not | if reconstructed == 0 and lang:hasType("reconstructed") and not lang:hasType("conlang") then | ||
orig_target = "*" .. target | |||
reconstructed = 1 | |||
end | |||
if not (sc and sc:getCode() ~= "None") then | |||
sc = lang:findBestScript(target) | sc = lang:findBestScript(target) | ||
end | end | ||
| Line 180: | Line 375: | ||
-- Get the entry name for the language. | -- Get the entry name for the language. | ||
target = lang:makeEntryName(target, sc) | target = lang:makeEntryName(target, sc, reconstructed == 1 or lang:hasType("appendix-constructed")) | ||
-- If the link contains unexpanded template parameters, then don't create a link. | -- If the link contains unexpanded template parameters, then don't create a link. | ||
if target: | if target:match("{{{.-}}}") then | ||
-- FIXME: Should we return the original target as the default display value (second return value)? | |||
return nil | return nil | ||
end | end | ||
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * | |||
-- literally, however. | |||
if not lang:hasType("conlang") then | |||
if lang:hasType("appendix-constructed") then | |||
target = "wikt:Appendix:" .. lang:getFullName() .. "/" .. target | |||
elseif reconstructed == 1 then -- asterisk found | |||
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however. | if lang:getFullCode() == "und" then | ||
-- Return the original target as default display value. If we don't do this, we wrongly get | |||
-- [Term?] displayed instead. | |||
return nil, orig_target | |||
target = " | |||
if lang: | |||
target | |||
end | end | ||
target = "wikt:Reconstruction:" .. lang:getFullName() .. "/" .. target | |||
elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then | |||
--error("The specified language " .. lang:getCanonicalName() | |||
--.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.") | |||
orig_target = "*" .. target | |||
end | end | ||
end | end | ||
target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target | target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target | ||
return target, escaped > 0 | return target, orig_target, escaped > 0 | ||
end | |||
function export.get_link_page(target, lang, sc, plain) | |||
local target, auto_display, escaped = export.get_link_page_with_auto_display(target, lang, sc, plain) | |||
return target, escaped | |||
end | end | ||
-- Make a link from a given link's parts | -- Make a link from a given link's parts | ||
local function make_link(link, lang, sc, id, isolated | local function make_link(link, lang, sc, id, isolated, cats, no_alt_ast, plain) | ||
-- Convert percent encoding to plaintext. | -- Convert percent encoding to plaintext. | ||
link.target = decode_uri(link.target, "PATH") | link.target = link.target and decode_uri(link.target, "PATH") | ||
link.fragment = link.fragment and decode_uri(link.fragment, "PATH") | link.fragment = link.fragment and decode_uri(link.fragment, "PATH") | ||
-- Find fragments (if one isn't already set). | -- Find fragments (if one isn't already set). | ||
-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | -- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | ||
| Line 240: | Line 425: | ||
link.target, link.fragment = get_fragment(link.target) | link.target, link.fragment = get_fragment(link.target) | ||
end | end | ||
-- Process the target | |||
local auto_display, escaped | |||
link.target, auto_display, escaped = export.get_link_page_with_auto_display(link.target, lang, sc, plain) | |||
-- Create a default display form. | -- Create a default display form. | ||
-- If the target is "" then it's a link like [[#English]], which refers to the current page. | |||
if auto_display == "" then | |||
-- | auto_display = (m_headword_data or get_headword_data()).pagename | ||
end | |||
-- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash. | -- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash. | ||
| Line 252: | Line 440: | ||
auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1) | auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1) | ||
end | end | ||
-- Process the display form. | -- Process the display form. | ||
if link.display then | if link.display then | ||
| Line 280: | Line 468: | ||
link.display = lang:makeDisplayText(auto_display, sc) | link.display = lang:makeDisplayText(auto_display, sc) | ||
end | end | ||
if not link.target then | if not link.target then | ||
return link.display | return link.display | ||
end | end | ||
-- If the target is the same as the current page, there is no sense id | -- If the target is the same as the current page, there is no sense id | ||
-- and either the language code is "und" or the current L2 is the current | -- and either the language code is "und" or the current L2 is the current | ||
-- language then return a "self-link" like the software does. | -- language then return a "self-link" like the software does. | ||
if link.target == | if link.target == get_current_title().prefixedText then | ||
local fragment, current_L2 = link.fragment, | local fragment, current_L2 = link.fragment, get_current_L2() | ||
if ( | if ( | ||
fragment and fragment == current_L2 or | |||
not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2) | |||
) then | |||
return tostring(mw.html.create("strong") | return tostring(mw.html.create("strong") | ||
:addClass("selflink") | :addClass("selflink") | ||
| Line 316: | Line 504: | ||
if not link.fragment then | if not link.fragment then | ||
if id then | if id then | ||
link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or | link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or language_anchor(lang, id) | ||
elseif lang:getFullCode() ~= "und" and not (link.target: | elseif lang:getFullCode() ~= "und" and not (link.target:match("^Appendix:") or link.target:match("^Reconstruction:")) then | ||
link.fragment = anchor_encode(lang:getFullName()) | link.fragment = anchor_encode(lang:getFullName()) | ||
end | end | ||
end | end | ||
end | end | ||
-- Put inward-facing square brackets around a link to isolated spacing character(s). | -- Put inward-facing square brackets around a link to isolated spacing character(s). | ||
if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then | if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then | ||
| Line 331: | Line 519: | ||
return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}") | return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}") | ||
end) | end) | ||
link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}") | link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}") | ||
return "[[" .. | |||
link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]" | |||
end | end | ||
| Line 341: | Line 528: | ||
-- Split a link into its parts | -- Split a link into its parts | ||
local function parse_link(linktext) | local function parse_link(linktext) | ||
local link = {target = linktext} | local link = { target = linktext } | ||
local target = link.target | local target = link.target | ||
| Line 352: | Line 539: | ||
-- There's no point in processing these, as they aren't real links. | -- There's no point in processing these, as they aren't real links. | ||
local target_lower = link.target:lower() | local target_lower = link.target:lower() | ||
for _, false_positive in ipairs({"category", "cat", "file", "image"}) do | for _, false_positive in ipairs({ "category", "cat", "file", "image" }) do | ||
if target_lower:match("^" .. false_positive .. ":") then return nil end | if target_lower:match("^" .. false_positive .. ":") then | ||
return nil | |||
end | |||
end | end | ||
| Line 365: | Line 554: | ||
return link | return link | ||
end | |||
local function check_params_ignored_when_embedded(alt, lang, id, cats) | |||
if alt then | |||
if cats then | |||
insert(cats, lang:getFullName() .. " links with ignored alt parameters") | |||
end | |||
end | |||
if id then | |||
if cats then | |||
insert(cats, lang:getFullName() .. " links with ignored id parameters") | |||
end | |||
end | |||
end | end | ||
-- Find embedded links and ensure they link to the correct section. | -- Find embedded links and ensure they link to the correct section. | ||
local function process_embedded_links(text, | local function process_embedded_links(text, alt, lang, sc, id, cats, no_alt_ast, plain) | ||
-- Process the non-linked text. | -- Process the non-linked text. | ||
text = | text = lang:makeDisplayText(text, sc, true) | ||
-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not. | -- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not. | ||
| Line 383: | Line 585: | ||
end | end | ||
check_params_ignored_when_embedded(alt, lang, id, cats) | |||
local function process_link(space1, linktext, space2) | local function process_link(space1, linktext, space2) | ||
local capture = "[[" .. linktext .. "]]" | local capture = "[[" .. linktext .. "]]" | ||
local link = parse_link(linktext) | local link = parse_link(linktext) | ||
--Return unprocessed false positives untouched (e.g. categories). | -- Return unprocessed false positives untouched (e.g. categories). | ||
if not link then return capture end | if not link then | ||
return capture | |||
end | |||
if all_reconstructed | if all_reconstructed then | ||
if link.target:find("^!!") then | |||
-- Check for anti-asterisk !! at the beginning of a target, indicating that a reconstructed term | |||
-- wants a part of the term to link to a non-reconstructed term, e.g. Old English | |||
-- {{ang-noun|m|head=*[[!!Crist|Cristes]] [[!!mæsseǣfen]]}}. | |||
link.target = link.target:sub(3) | |||
-- Also remove !! from the display, which may have been copied from the target (as in mæsseǣfen in | |||
-- the example above). | |||
link.display = link.display:gsub("^!!", "") | |||
elseif not link.target:match("^%*") then | |||
link.target = "*" .. link.target | |||
end | |||
end | end | ||
linktext = make_link(link, | linktext = make_link(link, lang, sc, id, false, nil, no_alt_ast, plain) | ||
:gsub("^%[%[", "\3") | :gsub("^%[%[", "\3") | ||
:gsub("%]%]$", "\4") | :gsub("%]%]$", "\4") | ||
| Line 423: | Line 622: | ||
:gsub("%]%]", "\2") | :gsub("%]%]", "\2") | ||
-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text. | -- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text. | ||
if text: | if ( | ||
text:find("^", nil, true) and | |||
not sc:hasCapitalization() and | |||
sc:isTransliterated() | |||
) then | |||
text = escape(text, "^") | text = escape(text, "^") | ||
:gsub("%^\1", "\1%^") | :gsub("%^\1", "\1%^") | ||
| Line 437: | Line 640: | ||
return (text | return (text | ||
:gsub("[\1\3]", "[[") | :gsub("[\1\3]", "[[") | ||
:gsub("[\2\4]", "]]")) | :gsub("[\2\4]", "]]") | ||
) | |||
end | end | ||
local function | local function simple_link(term, fragment, alt, lang, sc, id, cats, no_alt_ast, srwc) | ||
local text = | local plain | ||
if lang == nil then | |||
lang, plain = get_lang("und"), true | |||
end | |||
-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page. | |||
if term == "" then | |||
term = get_current_title().prefixedText | |||
elseif term then | |||
local new_term, new_alt = export.get_wikilink_parts(term, true) | |||
if new_term then | |||
check_params_ignored_when_embedded(alt, lang, id, cats) | |||
-- [[|foo]] links are treated as plaintext "[[|foo]]". | |||
-- FIXME: Pipes should be handled via a proper escape sequence, as they can occur in unsupported titles. | |||
if new_term == "" then | |||
term, alt = nil, term | |||
else | |||
local title = new_title(new_term) | |||
if title then | |||
local ns = title.namespace | |||
-- File: and Category: links should be returned as-is. | |||
if ns == 6 or ns == 14 then | |||
return term | |||
end | |||
end | |||
term, alt = new_term, new_alt | |||
if cats then | |||
if not (srwc and srwc(term, alt)) then | |||
insert(cats, lang:getFullName() .. " links with redundant wikilinks") | |||
end | |||
end | |||
end | |||
end | |||
end | end | ||
if alt then | |||
alt = selective_trim(alt) | |||
if alt == "" then | |||
alt = nil | |||
if | |||
if | |||
end | end | ||
end | end | ||
-- If there's nothing to process, return nil. | |||
if | if not (term or alt) then | ||
if | return nil | ||
return | end | ||
-- If there is no script, get one. | |||
if not sc then | |||
sc = lang:findBestScript(alt or term) | |||
end | |||
-- Embedded wikilinks need to be processed individually. | |||
if term then | |||
local open = find(term, "[[", nil, true) | |||
if open and find(term, "]]", open + 2, true) then | |||
return process_embedded_links(term, alt, lang, sc, id, cats, no_alt_ast, plain) | |||
end | end | ||
term = selective_trim(term) | |||
end | end | ||
-- If not, make a link using the parameters. | |||
return make_link({ | |||
target = term, | |||
display = alt, | |||
fragment = fragment | |||
}, lang, sc, id, true, cats, no_alt_ast, plain) | |||
end | end | ||
| Line 498: | Line 739: | ||
function export.language_link(data) | function export.language_link(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | error( | ||
"The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | |||
end | end | ||
local | -- Categorize links to "und". | ||
local lang, cats = data.lang, data.cats | |||
if cats and lang:getCode() == "und" then | |||
if | insert(cats, "Undetermined language links") | ||
end | end | ||
return simple_link( | |||
data.term, | |||
data.fragment, | |||
data.alt, | |||
lang, | |||
data.sc, | |||
data.id, | |||
cats, | |||
data.no_alt_ast, | |||
data.suppress_redundant_wikilink_cat | |||
) | |||
end | end | ||
function export.plain_link(data) | function export.plain_link(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function | error( | ||
"The first argument to the function plain_link must be a table. See Module:links/documentation for more information.") | |||
end | end | ||
return simple_link( | |||
data.term, | |||
data.fragment, | |||
data.alt, | |||
nil, | |||
data.sc, | |||
data.id, | |||
data.cats, | |||
data.no_alt_ast, | |||
data.suppress_redundant_wikilink_cat | |||
) | |||
end | end | ||
| Line 567: | Line 784: | ||
function export.embedded_language_links(data) | function export.embedded_language_links(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function | error( | ||
"The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.") | |||
end | end | ||
local | local term, lang, sc = data.term, data.lang, data.sc | ||
-- If we don't have a script, get one. | -- If we don't have a script, get one. | ||
if not | if not sc then | ||
sc = lang:findBestScript(term) | |||
end | end | ||
-- Do we have embedded wikilinks? If so, they need to be processed individually. | -- Do we have embedded wikilinks? If so, they need to be processed individually. | ||
local open = find( | local open = find(term, "[[", nil, true) | ||
if open and find( | if open and find(term, "]]", open + 2, true) then | ||
return process_embedded_links( | return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast) | ||
end | end | ||
-- If not, return the display text. | -- If not, return the display text. | ||
term = selective_trim(term) | |||
-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links. | -- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links. | ||
term = term:gsub("%%", "%%25") | |||
return | return lang:makeDisplayText(term, sc, true) | ||
end | end | ||
| Line 596: | Line 814: | ||
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | ||
'</span><span class="mention-gloss-double-quote">”</span>' } | '</span><span class="mention-gloss-double-quote">”</span>' } | ||
elseif item_type == "tr" then | elseif item_type == "tr" then | ||
if face == "term" then | if face == "term" then | ||
| Line 612: | Line 826: | ||
elseif item_type == "pos" then | elseif item_type == "pos" then | ||
tag = { '<span class="ann-pos">', '</span>' } | tag = { '<span class="ann-pos">', '</span>' } | ||
elseif item_type == "non-gloss" then | |||
tag = { '<span class="ann-non-gloss">', '</span>' } | |||
elseif item_type == "annotations" then | elseif item_type == "annotations" then | ||
tag = { '<span class="mention-gloss-paren annotation-paren">(</span>', | tag = { '<span class="mention-gloss-paren annotation-paren">(</span>', | ||
'<span class="mention-gloss-paren annotation-paren">)</span>' } | '<span class="mention-gloss-paren annotation-paren">)</span>' } | ||
elseif item_type == "infl" then | |||
tag = { '<span class="ann-infl">', '</span>' } | |||
end | end | ||
| Line 624: | Line 842: | ||
end | end | ||
--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on. | local pos_tags | ||
--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on. | |||
* The first argument is a table possessing some or all of the following keys: | * The first argument is a table possessing some or all of the following keys: | ||
*:; <code class="n">genders</code> | *:; <code class="n">genders</code> | ||
| Line 633: | Line 853: | ||
*:: Gloss that translates the term in the link, or gives some other descriptive information. | *:: Gloss that translates the term in the link, or gives some other descriptive information. | ||
*:; <code class="n">pos</code> | *:; <code class="n">pos</code> | ||
*:: Part of speech of the linked term. If the given argument matches one of the | *:: Part of speech of the linked term. If the given argument matches one of the aliases in `pos_aliases` in [[Module:headword/data]], or consists of a part of speech or alias followed by `f` (for a non-lemma form), expand it appropriately. Otherwise, just show the given text as it is. | ||
*:; <code class="n">ng</code> | |||
*:: Arbitrary non-gloss descriptive text for the link. This should be used in preference to putting descriptive text in `gloss` or `pos`. | |||
*:; <code class="n">lit</code> | *:; <code class="n">lit</code> | ||
*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic. | *:: Literal meaning of the term, if the usual meaning is figurative or idiomatic. | ||
*:; <code class="n">infl</code> | |||
*:: Table containing a list of grammar tags in the style of [[Module:form of]] `tagged_inflections`. | |||
*:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned. | *:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned. | ||
* The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==] | * The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==] | ||
| Line 652: | Line 876: | ||
if data.genders and #data.genders > 0 then | if data.genders and #data.genders > 0 then | ||
local | local genders, gender_cats = format_genders(data.genders, data.lang) | ||
insert(output, " " .. | insert(output, " " .. genders) | ||
if gender_cats then | |||
local cats = data.cats | |||
if cats then | |||
extend(cats, gender_cats) | |||
end | |||
end | |||
end | end | ||
| Line 668: | Line 898: | ||
if data.tr[1] and data.ts[1] then | if data.tr[1] and data.ts[1] then | ||
insert(annotations, | insert(annotations, tag_translit(data.tr[1], data.lang, kind) .. " " .. export.mark(data.ts[1], "ts")) | ||
elseif data.ts[1] then | elseif data.ts[1] then | ||
insert(annotations, export.mark(data.ts[1], "ts")) | insert(annotations, export.mark(data.ts[1], "ts")) | ||
else | else | ||
insert(annotations, | insert(annotations, tag_translit(data.tr[1], data.lang, kind)) | ||
end | end | ||
end | end | ||
| Line 687: | Line 914: | ||
if data.pos then | if data.pos then | ||
-- debug category for pos= containing transcriptions | -- debug category for pos= containing transcriptions | ||
if data.pos: | if data.pos:match("/[^><]-/") then | ||
data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]" | data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]" | ||
end | end | ||
pos_tags = pos_tags or | -- Canonicalize part of speech aliases as well as non-lemma aliases like 'nf' or 'nounf' for "noun form". | ||
insert(annotations, export.mark( | pos_tags = pos_tags or (m_headword_data or get_headword_data()).pos_aliases | ||
local pos = pos_tags[data.pos] | |||
if not pos and data.pos:find("f$") then | |||
local pos_form = data.pos:sub(1, -2) | |||
-- We only expand something ending in 'f' if the result is a recognized non-lemma POS. | |||
pos_form = (pos_tags[pos_form] or pos_form) .. " form" | |||
if (m_headword_data or get_headword_data()).nonlemmas[pos_form .. "s"] then | |||
pos = pos_form | |||
end | |||
end | |||
insert(annotations, export.mark(pos or data.pos, "pos")) | |||
end | |||
-- Inflection data | |||
if data.infl then | |||
local m_form_of = require(form_of_module) | |||
-- Split tag sets manually, since tagged_inflections creates a numbered list, and we do not want that. | |||
local infl_outputs = {} | |||
local tag_sets = m_form_of.split_tag_set(data.infl) | |||
for _, tag_set in ipairs(tag_sets) do | |||
table.insert(infl_outputs, | |||
m_form_of.tagged_inflections({ tags = tag_set, lang = data.lang, nocat = true, nolink = true, nowrap = true })) | |||
end | |||
insert(annotations, export.mark(table.concat(infl_outputs, "; "), "infl")) | |||
end | |||
-- Non-gloss text | |||
if data.ng then | |||
insert(annotations, export.mark(data.ng, "non-gloss")) | |||
end | end | ||
| Line 698: | Line 953: | ||
if data.lit then | if data.lit then | ||
insert(annotations, "literally " .. export.mark(data.lit, "gloss")) | insert(annotations, "literally " .. export.mark(data.lit, "gloss")) | ||
end | |||
-- Provide a hook to insert additional annotations such as nested inflections. | |||
if data.postprocess_annotations then | |||
data.postprocess_annotations { | |||
data = data, | |||
annotations = annotations | |||
} | |||
end | end | ||
| Line 705: | Line 968: | ||
return concat(output) | return concat(output) | ||
end | |||
-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and > | |||
-- because they end up forming part of CSS class names inside of <span ...> and will interfere with finding the end | |||
-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as | |||
-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to < | |||
-- and > somehow works magically without any further work; they appear in the URL as < and >, and get passed to | |||
-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If | |||
-- someone knows, please modify this comment appropriately! | |||
local accel_char_map | |||
local function get_accel_char_map() | |||
accel_char_map = { | |||
["%"] = ".", | |||
[" "] = "_", | |||
["_"] = u(0xFFF0), | |||
["<"] = "<", | |||
[">"] = ">", | |||
} | |||
return accel_char_map | |||
end | |||
local function encode_accel_param_chars(param) | |||
return (param:gsub("[% <>_]", accel_char_map or get_accel_char_map())) | |||
end | |||
local function encode_accel_param(prefix, param) | |||
if not param then | |||
return "" | |||
end | |||
if type(param) == "table" then | |||
local filled_params = {} | |||
-- There may be gaps in the sequence, especially for translit params. | |||
local maxindex = 0 | |||
for k in pairs(param) do | |||
if type(k) == "number" and k > maxindex then | |||
maxindex = k | |||
end | |||
end | |||
for i = 1, maxindex do | |||
filled_params[i] = param[i] or "" | |||
end | |||
-- [[Module:accel]] splits these up again. | |||
param = concat(filled_params, "*~!") | |||
end | |||
-- This is decoded again by [[WT:ACCEL]]. | |||
return prefix .. encode_accel_param_chars(param) | |||
end | |||
local function insert_if_not_blank(list, item) | |||
if item == "" then | |||
return | |||
end | |||
insert(list, item) | |||
end | |||
local function get_class(lang, tr, accel, nowrap) | |||
if not accel and not nowrap then | |||
return "" | |||
end | |||
local classes = {} | |||
if accel then | |||
insert(classes, "form-of lang-" .. lang:getFullCode()) | |||
local form = accel.form | |||
if form then | |||
insert(classes, encode_accel_param_chars(form) .. "-form-of") | |||
end | |||
insert_if_not_blank(classes, encode_accel_param("gender-", accel.gender)) | |||
insert_if_not_blank(classes, encode_accel_param("pos-", accel.pos)) | |||
insert_if_not_blank(classes, encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil))) | |||
insert_if_not_blank(classes, encode_accel_param("target-", accel.target)) | |||
insert_if_not_blank(classes, encode_accel_param("origin-", accel.lemma)) | |||
insert_if_not_blank(classes, encode_accel_param("origin_transliteration-", accel.lemma_translit)) | |||
if accel.no_store then | |||
insert(classes, "form-of-nostore") | |||
end | |||
end | |||
if nowrap then | |||
insert(classes, nowrap) | |||
end | |||
return concat(classes, " ") | |||
end | end | ||
| Line 724: | Line 1,067: | ||
local q = data.q | local q = data.q | ||
if type(q) == "string" then | if type(q) == "string" then | ||
q = {q} | q = { q } | ||
end | end | ||
local qq = data.qq | local qq = data.qq | ||
if type(qq) == "string" then | if type(qq) == "string" then | ||
qq = {qq} | qq = { qq } | ||
end | end | ||
if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or | if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or | ||
data.ll and data.ll[1] or data.refs and data.refs[1] then | data.ll and data.ll[1] or data.refs and data.refs[1] then | ||
formatted = | formatted = format_qualifiers { | ||
lang = data.lang, | lang = data.lang, | ||
text = formatted, | text = formatted, | ||
| Line 749: | Line 1,092: | ||
--[==[Creates a full link, with annotations (see | --[==[ | ||
The first argument, | Creates a full link, with annotations (see `[[#format_link_annotations|format_link_annotations]]`), in the style of {{tl|l}} or {{tl|m}}. | ||
The first argument, `data`, must be a table. It contains the various elements that can be supplied as parameters to {{tl|l}} or {{tl|m}}: | |||
{ { | { { | ||
term = entry_to_link_to, | term = entry_to_link_to, | ||
| Line 758: | Line 1,102: | ||
track_sc = boolean, | track_sc = boolean, | ||
no_nonstandard_sc_cat = boolean, | no_nonstandard_sc_cat = boolean, | ||
fragment = link_fragment | fragment = link_fragment, | ||
id = sense_id, | id = sense_id, | ||
genders = { "gender1", "gender2", ... }, | genders = { "gender1", "gender2", ... }, | ||
tr = transliteration, | tr = transliteration, | ||
respect_link_tr = boolean, | |||
ts = transcription, | ts = transcription, | ||
gloss = gloss, | gloss = gloss, | ||
pos = part_of_speech_tag, | pos = part_of_speech_tag, | ||
ng = non-gloss text, | |||
lit = literal_translation, | lit = literal_translation, | ||
infl = { "form_of_grammar_tag1", "form_of_grammar_tag2", ... }, | |||
no_alt_ast = boolean, | no_alt_ast = boolean, | ||
accel = {accelerated_creation_tags}, | accel = {accelerated_creation_tags}, | ||
interwiki = interwiki, | interwiki = interwiki, | ||
pretext = "text_at_beginning" or nil, | |||
posttext = "text_at_end" or nil, | |||
q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier", | q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier", | ||
qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier", | qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier", | ||
l = { "left_label1", "left_label2", ...}, | |||
ll = { "right_label1", "right_label2", ...}, | |||
a = { "left_accent_qualifier1", "left_accent_qualifier2", ...}, | |||
aa = { "right_accent_qualifier1", "right_accent_qualifier2", ...}, | |||
refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... }, | refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... }, | ||
show_qualifiers = boolean, | |||
} } | } } | ||
Any one of the items in the | Any one of the items in the `data` table may be {nil}, but an error will be shown if neither `term` nor `alt` nor `tr` | ||
Thus, calling { | is present. Thus, calling {full_link{ term = term, lang = lang, sc = sc }}, where `term` is the page to link to (which | ||
may have diacritics that will be stripped and/or embedded bracketed links) and `lang` is a | |||
[[Module:languages#Language objects|language object]] from [[Module:languages]], will give a plain link similar to the | |||
one produced by the template {{tl|l}}, and calling {full_link( { term = term, lang = lang, sc = sc }, "term" )} will | |||
give a link similar to the one produced by the template {{tl|m}}. | |||
The function will: | The function will: | ||
* Try to determine the script, based on the characters found in the term or alt argument, if the script was not given. If a script is given and | * Try to determine the script, based on the characters found in the `term` or `alt` argument, if the script was not | ||
* Call | given. If a script is given and `track_sc` is {true}, it will check whether the input script is the same as the one | ||
* Call | which would have been automatically generated and add the category [[:Category:LANG terms with redundant script codes]] | ||
* Generate a transliteration, based on the alt or term arguments, if the script is not Latin | if yes, or [[:Category:LANG terms with non-redundant manual script codes]] if no. This should be used when the input | ||
* Add the annotations (transliteration, gender, gloss etc.) after the link. | script object is directly determined by a template's `sc` parameter. | ||
* If | * Call `[[#language_link|language_link]]` on the `term` or `alt` forms, to remove diacritics in the page name, process | ||
* If | any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary. | ||
* Call `[[Module:script utilities#tag_text]]` to add the appropriate language and script tags to the term and | |||
italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are | |||
included. | |||
* Generate a transliteration, based on the `alt` or `term` arguments, if the script is not Latin, no transliteration was | |||
provided in `tr` and the combination of the term's language and script support automatic transliteration. The | |||
transliteration itself will be linked if both `.respect_link_tr` is specified and the language of the term has the | |||
`link_tr` property set for the script of the term; but not otherwise. | |||
* Add the annotations (transliteration, gender, gloss, etc.) after the link. | |||
* If `no_alt_ast` is specified, then the `alt` text does not need to contain an asterisk if the language is | |||
reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't | |||
display asterisks (e.g. number boxes). | |||
* If `pretext` or `posttext` is specified, this is text to (respectively) prepend or append to the output, directly | |||
before processing qualifiers, labels and references. This can be used to add arbitrary extra text inside of the | |||
qualifiers, labels and references. | |||
* If `show_qualifiers` is specified or the `show_qualifiers` argument is given, then left and right qualifiers, accent | |||
qualifiers, labels and references will be displayed, otherwise they will be ignored. (This is because a fair amount of | |||
code stores qualifiers, labels and/or references in these fields and displays them itself, rather than expecting | |||
{full_link()} to display them.)]==] | |||
function export.full_link(data, face, allow_self_link, show_qualifiers) | function export.full_link(data, face, allow_self_link, show_qualifiers) | ||
if type(data) ~= "table" then | |||
error("The first argument to the function full_link must be a table. " | |||
.. "See Module:links/documentation for more information.") | |||
end | |||
-- Prevent data from being destructively modified. | -- Prevent data from being destructively modified. | ||
local data = shallow_copy(data) | local data = shallow_copy(data) | ||
if | -- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned. | ||
data.cats = {} | |||
-- Categorize links to "und". | |||
local lang, cats = data.lang, data.cats | |||
if cats and lang:getCode() == "und" then | |||
insert(cats, "Undetermined language links") | |||
end | end | ||
local terms = {true} | local terms = { true } | ||
-- Generate multiple forms if applicable. | -- Generate multiple forms if applicable. | ||
for _, param in ipairs{"term", "alt"} do | for _, param in ipairs { "term", "alt" } do | ||
if type(data[param]) == "string" and data[param]:find("//") then | if type(data[param]) == "string" and data[param]:find("//", nil, true) then | ||
data[param] = export.split_on_slashes(data[param]) | data[param] = export.split_on_slashes(data[param]) | ||
elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//")) then | elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then | ||
data[param] = | if not data.no_generate_forms then | ||
data[param] = lang:generateForms(data[param]) | |||
else | |||
data[param] = { data[param] } | |||
end | |||
else | else | ||
data[param] = {} | data[param] = {} | ||
| Line 805: | Line 1,195: | ||
end | end | ||
for _, param in ipairs{"sc", "tr", "ts"} do | for _, param in ipairs { "sc", "tr", "ts" } do | ||
data[param] = {data[param]} | data[param] = { data[param] } | ||
end | end | ||
for _, param in ipairs{"term", "alt", "sc", "tr", "ts"} do | for _, param in ipairs { "term", "alt", "sc", "tr", "ts" } do | ||
for i in pairs(data[param]) do | for i in pairs(data[param]) do | ||
terms[i] = true | terms[i] = true | ||
end | end | ||
end | end | ||
-- Create the link | -- Create the link | ||
local output = {} | local output = {} | ||
data. | local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat, | ||
local | data.accel, data.never_call_transliteration_module | ||
local link_tr = data.respect_link_tr and lang:link_tr(data.sc[1]) | |||
for i in ipairs(terms) do | for i in ipairs(terms) do | ||
local link | |||
-- Is there any text to show? | -- Is there any text to show? | ||
if (data.term[i] or data.alt[i]) then | if (data.term[i] or data.alt[i]) then | ||
-- Try to detect the script if it was not provided | -- Try to detect the script if it was not provided | ||
local display_term = data.alt[i] or data.term[i] | local display_term = data.alt[i] or data.term[i] | ||
local best = | local best = lang:findBestScript(display_term) | ||
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]] | -- no_nonstandard_sc_cat is intended for use in [[Module:interproject]] | ||
if ( | if ( | ||
not data.no_nonstandard_sc_cat and | |||
best:getCode() == "None" and | |||
find_best_script_without_lang(display_term):getCode() ~= "None" | |||
) then | |||
insert( | insert(cats, lang:getFullName() .. " terms in nonstandard scripts") | ||
end | end | ||
if not data.sc[i] then | if not data.sc[i] then | ||
data.sc[i] = best | data.sc[i] = best | ||
-- Track uses of sc parameter. | |||
elseif data.track_sc then | elseif data.track_sc then | ||
if data.sc[i]:getCode() == best:getCode() then | if data.sc[i]:getCode() == best:getCode() then | ||
insert( | insert(cats, lang:getFullName() .. " terms with redundant script codes") | ||
else | else | ||
insert( | insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes") | ||
end | end | ||
end | end | ||
| Line 849: | Line 1,240: | ||
if data.sc[i]:hasNormalizationFixes() == true then | if data.sc[i]:hasNormalizationFixes() == true then | ||
if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then | if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then | ||
insert( | insert(cats, "Pages using discouraged character sequences") | ||
end | end | ||
end | end | ||
link = simple_link( | |||
data.term[i], | |||
data.fragment, | |||
data.alt[i], | |||
lang, | |||
data.sc[i], | |||
id, | |||
cats, | |||
no_alt_ast, | |||
srwc | |||
[ | ) | ||
end | |||
-- simple_link can return nil, so check if a link has been generated. | |||
if link then | |||
-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen | |||
local nowrap | |||
local | local display_term = data.alt[i] or data.term[i] | ||
if display_term and (display_term:find("^%-") or display_term:find("^־")) then -- Hebrew maqqef -- FIXME, use hyphens from [[Module:affix]] | |||
nowrap = "nowrap" | |||
end | end | ||
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel, nowrap)) | |||
else | else | ||
--[[ No term to show. | --[[ No term to show. | ||
Is there at least a transliteration we can work from? ]] | Is there at least a transliteration we can work from? ]] | ||
link = | link = request_script(lang, data.sc[i]) | ||
-- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms). | -- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms). | ||
if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and | if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and lang:getFamilyCode() ~= "qfa-sub" then | ||
-- If there are multiple terms, break the loop instead. | -- If there are multiple terms, break the loop instead. | ||
if i > 1 then | if i > 1 then | ||
remove(output) | remove(output) | ||
break | break | ||
elseif | elseif NAMESPACE ~= "Template" then | ||
insert( | insert(cats, lang:getFullName() .. " term requests") | ||
end | end | ||
link = "<small>[Term?]</small>" | link = "<small>[Term?]</small>" | ||
| Line 964: | Line 1,286: | ||
end | end | ||
-- | -- When suppress_tr is true, do not show or generate any transliteration | ||
if data. | if data.suppress_tr then | ||
data.tr[1] = nil | data.tr[1] = nil | ||
else | else | ||
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction | -- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese). | ||
if data.tr[1] == "" or data.tr[1] == "-" then | |||
data.tr[1] = nil | |||
else | |||
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction | |||
phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()] | |||
if phonetic_extraction then | |||
data.tr[1] = data.tr[1] or | |||
require(phonetic_extraction).getTranslit(export.remove_links(data.alt[1] or data.term[1])) | |||
elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then | |||
-- Track whenever there is manual translit. The categories below like 'terms with redundant transliterations' | |||
-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in | |||
-- languages without any automatic translit, like Persian and Hebrew. | |||
if data.tr[1] then | |||
local full_code = lang:getFullCode() | |||
end | |||
if not nevercalltr then | |||
-- Try to generate a transliteration. | |||
local text = data.alt[1] or data.term[1] | |||
if not link_tr then | |||
text = export.remove_links(text, true) | |||
end | |||
local automated_tr = lang:transliterate(text, data.sc[1]) | |||
if automated_tr then | |||
local manual_tr = data.tr[1] | |||
if manual_tr then | |||
if export.remove_links(manual_tr) == export.remove_links(automated_tr) then | |||
insert(cats, lang:getFullName() .. " terms with redundant transliterations") | |||
else | |||
-- Prevents Arabic root categories from flooding the tracking categories. | |||
if NAMESPACE ~= "Category" then | |||
insert(cats, | |||
lang:getFullName() .. " terms with non-redundant manual transliterations") | |||
end | |||
end | |||
end | |||
if not manual_tr or lang:overrideManualTranslit(data.sc[1]) then | |||
data.tr[1] = automated_tr | |||
end | end | ||
end | end | ||
end | end | ||
| Line 1,014: | Line 1,342: | ||
-- Link to the transliteration entry for languages that require this | -- Link to the transliteration entry for languages that require this | ||
if data.tr[1] and | if data.tr[1] and link_tr and not data.tr[1]:match("%[%[(.-)%]%]") then | ||
data.tr[1] = | data.tr[1] = simple_link( | ||
data.tr[1], | |||
nil, | |||
nil, | |||
lang, | |||
get_script("Latn"), | |||
elseif data.tr[1] and not | nil, | ||
cats, | |||
no_alt_ast, | |||
srwc | |||
) | |||
elseif data.tr[1] and not link_tr then | |||
-- Remove the pseudo-HTML tags added by remove_links. | -- Remove the pseudo-HTML tags added by remove_links. | ||
data.tr[1] = data.tr[1]:gsub("</?link>", "") | data.tr[1] = data.tr[1]:gsub("</?link>", "") | ||
| Line 1,029: | Line 1,362: | ||
insert(output, export.format_link_annotations(data, face)) | insert(output, export.format_link_annotations(data, face)) | ||
if data.pretext then | |||
insert(output, 1, data.pretext) | |||
end | |||
if data.posttext then | |||
insert(output, data.posttext) | |||
end | |||
local categories = cats[1] and format_categories(cats, lang, "-", nil, nil, data.sc) or "" | |||
output = concat(output) | output = concat(output) | ||
if show_qualifiers then | if show_qualifiers or data.show_qualifiers then | ||
output = add_qualifiers_and_refs_to_term(data, output) | output = add_qualifiers_and_refs_to_term(data, output) | ||
end | end | ||
| Line 1,061: | Line 1,401: | ||
function(c1, c2, c3) | function(c1, c2, c3) | ||
-- Don't remove files. | -- Don't remove files. | ||
for _, false_positive in ipairs({"file", "image"}) do | for _, false_positive in ipairs({ "file", "image" }) do | ||
if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end | if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end | ||
end | end | ||
-- Remove categories completely. | -- Remove categories completely. | ||
for _, false_positive in ipairs({"category", "cat"}) do | for _, false_positive in ipairs({ "category", "cat" }) do | ||
if c2:lower():match("^" .. false_positive .. ":") then return "" end | if c2:lower():match("^" .. false_positive .. ":") then return "" end | ||
end | end | ||
| Line 1,083: | Line 1,423: | ||
return text | return text | ||
end | end | ||
function export.section_link(link) | function export.section_link(link) | ||
| Line 1,103: | Line 1,428: | ||
error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.") | error("The first argument to section_link was a " .. type(link) .. ", but it should be a string.") | ||
end | end | ||
local target, section = get_fragment((link:gsub("_", " "))) | |||
if not section then | |||
if not | error("No \"#\" delineating a section name") | ||
error(" | |||
end | end | ||
return | return simple_link( | ||
target, | |||
section, | |||
target .. " § " .. section | |||
) | |||
end | end | ||
return export | return export | ||