48,403
edits
No edit summary |
No edit summary |
||
| Line 12: | Line 12: | ||
[[Module:gender and number]] | [[Module:gender and number]] | ||
]=] | ]=] | ||
local | |||
local anchors_module = "Module:anchors" | |||
local gender_and_number_module = "Module:gender and number" | |||
local languages_module = "Module:languages" | |||
local load_module = "Module:load" | |||
local memoize_module = "Module:memoize" | |||
local pages_module = "Module:pages" | |||
local pron_qualifier_module = "Module:pron qualifier" | local pron_qualifier_module = "Module:pron qualifier" | ||
local scripts_module = "Module:scripts" | |||
local script_utilities_module = "Module:script utilities" | |||
local string_encode_entities_module = "Module:string/encode entities" | |||
local string_utilities_module = "Module:string utilities" | |||
local table_module = "Module:table" | |||
local utilities_module = "Module:utilities" | |||
local concat = table.concat | local concat = table.concat | ||
local find = string.find | local find = string.find | ||
local | local get_current_title = mw.title.getCurrentTitle | ||
local insert = table.insert | local insert = table.insert | ||
local ipairs = ipairs | local ipairs = ipairs | ||
local match = string.match | local match = string.match | ||
local new_title = mw.title.new | local new_title = mw.title.new | ||
local pairs = pairs | local pairs = pairs | ||
local remove = table.remove | local remove = table.remove | ||
local sub = string.sub | local sub = string.sub | ||
local toNFC = mw.ustring.toNFC | local toNFC = mw.ustring.toNFC | ||
local tostring = tostring | local tostring = tostring | ||
local type = type | local type = type | ||
local unstrip = mw.text.unstrip | local unstrip = mw.text.unstrip | ||
local | local NAMESPACE = get_current_title().namespace | ||
local function anchor_encode(...) | |||
anchor_encode = require(memoize_module)(mw.uri.anchorEncode, true) | |||
return anchor_encode(...) | |||
end | |||
local function decode_entities(...) | |||
decode_entities = require(string_utilities_module).decode_entities | |||
return decode_entities(...) | |||
end | |||
local function decode_uri(...) | |||
decode_uri = require(string_utilities_module).decode_uri | |||
return decode_uri(...) | |||
end | |||
-- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for. | |||
local function encode_entities(...) | |||
encode_entities = require(string_encode_entities_module) | |||
return encode_entities(...) | |||
end | |||
local function extend(...) | |||
extend = require(table_module).extend | |||
return extend(...) | |||
end | |||
local function find_best_script_without_lang(...) | |||
local | find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang | ||
return find_best_script_without_lang(...) | |||
end | |||
local function format_categories(...) | |||
format_categories = require(utilities_module).format_categories | |||
return format_categories(...) | |||
end | |||
local function format_genders(...) | |||
format_genders = require(gender_and_number_module).format_genders | |||
return format_genders(...) | |||
end | |||
local function format_qualifiers(...) | |||
format_qualifiers = require(pron_qualifier_module).format_qualifiers | |||
return format_qualifiers(...) | |||
end | |||
local function get_current_L2(...) | |||
get_current_L2 = require(pages_module).get_current_L2 | |||
return get_current_L2(...) | |||
end | |||
local function get_lang(...) | |||
get_lang = require(languages_module).getByCode | |||
return get_lang(...) | |||
end | |||
local function get_script(...) | |||
get_script = require(scripts_module).getByCode | |||
return get_script(...) | |||
end | |||
local function language_anchor(...) | |||
language_anchor = require(anchors_module).language_anchor | |||
return language_anchor(...) | |||
end | |||
local function load_data(...) | |||
load_data = require(load_module).load_data | |||
return load_data(...) | |||
end | |||
local function request_script(...) | |||
request_script = require(script_utilities_module).request_script | |||
return request_script(...) | |||
end | |||
local function shallow_copy(...) | |||
shallow_copy = require(table_module).shallowCopy | |||
return shallow_copy(...) | |||
end | |||
local function split(...) | |||
split = require(string_utilities_module).split | |||
return split(...) | |||
end | |||
local function tag_text(...) | |||
tag_text = require(script_utilities_module).tag_text | |||
return tag_text(...) | |||
end | |||
local function tag_translit(...) | |||
tag_translit = require(script_utilities_module).tag_translit | |||
return tag_translit(...) | |||
end | |||
local function trim(...) | |||
trim = require(string_utilities_module).trim | |||
return trim(...) | |||
end | |||
local function u(...) | |||
u = require(string_utilities_module).char | |||
return u(...) | |||
end | |||
local function ulower(...) | |||
ulower = require(string_utilities_module).lower | |||
return ulower(...) | |||
end | |||
local function umatch(...) | |||
umatch = require(string_utilities_module).match | |||
return umatch(...) | |||
end | |||
local function selective_trim(...) | |||
-- Unconditionally trimmed charset. | -- Unconditionally trimmed charset. | ||
local always_trim = | local always_trim = | ||
| Line 51: | Line 167: | ||
"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters) | "\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters) | ||
"\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters) | "\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters) | ||
-- Standard trimmed charset. | -- Standard trimmed charset. | ||
local standard_trim = "%s" .. -- (default whitespace charset) | local standard_trim = "%s" .. -- (default whitespace charset) | ||
"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces) | "\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces) | ||
always_trim | always_trim | ||
-- If there are non-whitespace characters, trim all characters in `standard_trim`. | -- If there are non-whitespace characters, trim all characters in `standard_trim`. | ||
-- Otherwise, only trim the characters in `always_trim`. | -- Otherwise, only trim the characters in `always_trim`. | ||
selective_trim = function(text) | |||
if text == "" then | if text == "" then | ||
return text | return text | ||
end | end | ||
local trimmed = | local trimmed = trim(text, standard_trim) | ||
if trimmed ~= "" then | if trimmed ~= "" then | ||
return trimmed | return trimmed | ||
end | end | ||
return | return trim(text, always_trim) | ||
end | end | ||
return selective_trim(...) | |||
end | end | ||
| Line 105: | Line 223: | ||
end | end | ||
return text | return text | ||
end | |||
--[==[Takes a wikilink and outputs the link target and display text. By default, the link target will be returned as a title object, but if `allow_bad_target` is set it will be returned as a string, and no check will be performed as to whether it is a valid link target.]==] | |||
function export.get_wikilink_parts(text, allow_bad_target) | |||
-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences. | |||
if ( -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]". | |||
not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[". | |||
find(text, "[[", 3, true) or | |||
find(text, "]]", 3, true) ~= #text - 1 | |||
) then | |||
return nil, nil | |||
end | |||
local pipe, title, display = find(text, "|", 3, true) | |||
if pipe then | |||
title, display = sub(text, 3, pipe - 1), sub(text, pipe + 1, -3) | |||
else | |||
title = sub(text, 3, -3) | |||
display = title | |||
end | |||
if allow_bad_target then | |||
return title, display | |||
end | |||
title = new_title(title) | |||
-- No title object means the target is invalid. | |||
if title == nil then | |||
return nil, nil | |||
-- If the link target starts with "#" then mw.title.new returns a broken | |||
-- title object, so grab the current title and give it the correct fragment. | |||
elseif title.prefixedText == "" then | |||
local fragment = title.fragment | |||
if fragment == "" then -- [[#]] isn't valid | |||
return nil, nil | |||
end | |||
title = get_current_title() | |||
title.fragment = fragment | |||
end | |||
return title, display | |||
end | end | ||
| Line 114: | Line 269: | ||
-- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a). | -- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a). | ||
text = decode_entities(text) | text = decode_entities(text) | ||
local target, fragment = text:match("^( | local target, fragment = text:match("^(.-)#(.+)$") | ||
target = target or text | target = target or text | ||
target = unescape(target, "#") | target = unescape(target, "#") | ||
| Line 124: | Line 279: | ||
function export.get_fragment(text) | function export.get_fragment(text) | ||
-- If there are no embedded links, process input. | -- If there are no embedded links, process input. | ||
local open = find(text, "[[", | local open = find(text, "[[", nil, true) | ||
if not open then | if not open then | ||
return get_fragment(text) | return get_fragment(text) | ||
| Line 146: | Line 301: | ||
target = remove_formatting(target) | target = remove_formatting(target) | ||
if target:sub(1, 1) == ":" then | |||
return target:sub(2) | |||
end | |||
local prefix = target:match("^(.-):") | |||
-- Convert any escaped colons | |||
target = target:gsub("\\:", ":") | |||
if prefix then | |||
-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.) | -- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.) | ||
prefix = ulower(trim(prefix)) | |||
if ( | if prefix ~= "" and ( | ||
load_data("Module:data/namespaces")[prefix] or | load_data("Module:data/namespaces")[prefix] or | ||
load_data("Module:data/interwikis")[prefix] | load_data("Module:data/interwikis")[prefix] | ||
) then | ) then | ||
return | return target | ||
end | end | ||
end | end | ||
| Line 168: | Line 327: | ||
target, escaped = target:gsub("^(\\-)\\%*", "%1*") | target, escaped = target:gsub("^(\\-)\\%*", "%1*") | ||
if not | if not (sc and sc:getCode() ~= "None") then | ||
sc = lang:findBestScript(target) | sc = lang:findBestScript(target) | ||
end | end | ||
| Line 183: | Line 342: | ||
-- If the link contains unexpanded template parameters, then don't create a link. | -- If the link contains unexpanded template parameters, then don't create a link. | ||
if target: | if target:match("{{{.-}}}") then | ||
return nil | return nil | ||
end | end | ||
-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however. | -- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however. | ||
if reconstructed == 1 then | |||
if lang:getFullCode() == "und" then | if lang:getFullCode() == "und" then | ||
return nil | return nil | ||
| Line 202: | Line 355: | ||
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target | target = "Reconstruction:" .. lang:getFullName() .. "/" .. target | ||
end | end | ||
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target | |||
-- Reconstructed languages and substrates require an initial *. | -- Reconstructed languages and substrates require an initial *. | ||
elseif lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub" then | elseif lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub" then | ||
--error("The specified language " .. lang:getCanonicalName() .. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.") | |||
if lang:hasType("conlang") then | |||
target = "*" .. target | |||
elseif not lang:hasType("conlang") then | |||
target = "Reconstruction:" .. lang:getFullName() .. "/" .. target | |||
end | end | ||
elseif lang:hasType("appendix-constructed") then | elseif lang:hasType("appendix-constructed") then | ||
target = "Appendix:" .. lang:getFullName() .. "/" .. target | target = "Appendix:" .. lang:getFullName() .. "/" .. target | ||
else | |||
target = target | |||
end | end | ||
| Line 229: | Line 377: | ||
-- Make a link from a given link's parts | -- Make a link from a given link's parts | ||
local function make_link(link, lang, sc, id, isolated | local function make_link(link, lang, sc, id, isolated, cats, no_alt_ast, plain) | ||
-- Convert percent encoding to plaintext. | -- Convert percent encoding to plaintext. | ||
link.target = decode_uri(link.target, "PATH") | link.target = link.target and decode_uri(link.target, "PATH") | ||
link.fragment = link.fragment and decode_uri(link.fragment, "PATH") | link.fragment = link.fragment and decode_uri(link.fragment, "PATH") | ||
-- Find fragments (if one isn't already set). | -- Find fragments (if one isn't already set). | ||
-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | -- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | ||
| Line 243: | Line 391: | ||
-- Create a default display form. | -- Create a default display form. | ||
local auto_display = link.target | local auto_display = link.target | ||
-- If the target is "" then it's a link like [[#English]], which refers to the current page. | |||
if auto_display == "" then | |||
auto_display = load_data("Module:headword/data").pagename | |||
end | |||
-- Process the target | -- Process the target | ||
| Line 288: | Line 440: | ||
-- and either the language code is "und" or the current L2 is the current | -- and either the language code is "und" or the current L2 is the current | ||
-- language then return a "self-link" like the software does. | -- language then return a "self-link" like the software does. | ||
if link.target == | if link.target == get_current_title().prefixedText then | ||
local fragment, current_L2 = link.fragment, | local fragment, current_L2 = link.fragment, get_current_L2() | ||
if ( | if ( | ||
fragment and fragment == current_L2 or | fragment and fragment == current_L2 or | ||
| Line 316: | Line 468: | ||
if not link.fragment then | if not link.fragment then | ||
if id then | if id then | ||
link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or | link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or language_anchor(lang, id) | ||
elseif lang:getFullCode() ~= "und" and not (link.target: | elseif lang:getFullCode() ~= "und" and not (link.target:match("^Appendix:") or link.target:match("^Reconstruction:")) then | ||
link.fragment = anchor_encode(lang:getFullName()) | link.fragment = anchor_encode(lang:getFullName()) | ||
end | end | ||
| Line 340: | Line 492: | ||
end | end | ||
return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]" | return "[[" .. link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]" | ||
end | end | ||
| Line 354: | Line 506: | ||
link.display = target | link.display = target | ||
end | end | ||
-- There's no point in processing these, as they aren't real links. | -- There's no point in processing these, as they aren't real links. | ||
local target_lower = link.target:lower() | local target_lower = link.target:lower() | ||
for _, false_positive in ipairs({"category", "cat", "file", "image"}) do | for _, false_positive in ipairs({"category", "cat", "file", "image"}) do | ||
if target_lower:match("^" .. false_positive .. ":") then return nil end | if target_lower:match("^" .. false_positive .. ":") then | ||
return nil | |||
end | |||
end | end | ||
| Line 372: | Line 524: | ||
return link | return link | ||
end | |||
local function check_params_ignored_when_embedded(alt, lang, id, cats) | |||
if alt then | |||
if cats then | |||
insert(cats, lang:getFullName() .. " links with ignored alt parameters") | |||
end | |||
end | |||
if id then | |||
if cats then | |||
insert(cats, lang:getFullName() .. " links with ignored id parameters") | |||
end | |||
end | |||
end | end | ||
-- Find embedded links and ensure they link to the correct section. | -- Find embedded links and ensure they link to the correct section. | ||
local function process_embedded_links(text, | local function process_embedded_links(text, alt, lang, sc, id, cats, no_alt_ast, plain) | ||
-- Process the non-linked text. | -- Process the non-linked text. | ||
text = | text = lang:makeDisplayText(text, sc, true) | ||
-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not. | -- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not. | ||
| Line 388: | Line 553: | ||
-- Otherwise, handle any escapes. | -- Otherwise, handle any escapes. | ||
text = text:gsub("^(\\-)\\%*", "%1*") | text = text:gsub("^(\\-)\\%*", "%1*") | ||
end | end | ||
check_params_ignored_when_embedded(alt, lang, id, cats) | |||
local function process_link(space1, linktext, space2) | local function process_link(space1, linktext, space2) | ||
local capture = "[[" .. linktext .. "]]" | local capture = "[[" .. linktext .. "]]" | ||
local link = parse_link(linktext) | local link = parse_link(linktext) | ||
--Return unprocessed false positives untouched (e.g. categories). | -- Return unprocessed false positives untouched (e.g. categories). | ||
if not link then return capture end | if not link then | ||
return capture | |||
end | |||
if all_reconstructed and not link.target:match("^%*") then | |||
link.target = "*" .. link.target | link.target = "*" .. link.target | ||
end | end | ||
linktext = make_link(link, | linktext = make_link(link, lang, sc, id, false, nil, no_alt_ast, plain) | ||
:gsub("^%[%[", "\3") | :gsub("^%[%[", "\3") | ||
:gsub("%]%]$", "\4") | :gsub("%]%]$", "\4") | ||
| Line 430: | Line 582: | ||
:gsub("%]%]", "\2") | :gsub("%]%]", "\2") | ||
-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text. | -- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text. | ||
if text: | if ( | ||
text:find("^", nil, true) and | |||
not sc:hasCapitalization() and | |||
sc:isTransliterated() | |||
) then | |||
text = escape(text, "^") | text = escape(text, "^") | ||
:gsub("%^\1", "\1%^") | :gsub("%^\1", "\1%^") | ||
| Line 444: | Line 600: | ||
return (text | return (text | ||
:gsub("[\1\3]", "[[") | :gsub("[\1\3]", "[[") | ||
:gsub("[\2\4]", "]]")) | :gsub("[\2\4]", "]]") | ||
) | |||
end | end | ||
local function | local function simple_link(term, fragment, alt, lang, sc, id, cats, no_alt_ast, srwc) | ||
local text = | local plain | ||
if lang == nil then | |||
lang, plain = get_lang("und"), true | |||
end | |||
-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page. | |||
if term == "" then | |||
term = get_current_title().prefixedText | |||
elseif term then | |||
local new_term, new_alt = export.get_wikilink_parts(term, true) | |||
if new_term then | |||
check_params_ignored_when_embedded(alt, lang, id, cats) | |||
-- [[|foo]] links are treated as plaintext "[[|foo]]". | |||
-- FIXME: Pipes should be handled via a proper escape sequence, as they can occur in unsupported titles. | |||
if new_term == "" then | |||
term, alt = nil, term | |||
else | |||
local title = new_title(new_term) | |||
if title then | |||
local ns = title.namespace | |||
-- File: and Category: links should be returned as-is. | |||
if ns == 6 or ns == 14 then | |||
return term | |||
end | |||
end | |||
term, alt = new_term, new_alt | |||
if cats then | |||
if not (srwc and srwc(term, alt)) then | |||
insert(cats, lang:getFullName() .. " links with redundant wikilinks") | |||
end | |||
end | |||
end | |||
end | |||
end | end | ||
if alt then | |||
alt = selective_trim(alt) | |||
if alt == "" then | |||
alt = nil | |||
if | |||
if | |||
end | end | ||
end | end | ||
-- If there's nothing to process, return nil. | |||
if | if not (term or alt) then | ||
if | return nil | ||
return | end | ||
-- If there is no script, get one. | |||
if not sc then | |||
sc = lang:findBestScript(alt or term) | |||
end | |||
-- Embedded wikilinks need to be processed individually. | |||
if term then | |||
local open = find(term, "[[", nil, true) | |||
if open and find(term, "]]", open + 2, true) then | |||
return process_embedded_links(term, alt, lang, sc, id, cats, no_alt_ast, plain) | |||
end | end | ||
term = selective_trim(term) | |||
end | end | ||
-- If not, make a link using the parameters. | |||
return make_link({ | |||
target = term, | |||
display = alt, | |||
fragment = fragment | |||
}, lang, sc, id, true, cats, no_alt_ast, plain) | |||
end | end | ||
| Line 506: | Line 700: | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | ||
end | end | ||
local | -- Categorize links to "und". | ||
local lang, cats = data.lang, data.cats | |||
if cats and lang:getCode() == "und" then | |||
if | insert(cats, "Undetermined language links") | ||
end | end | ||
return simple_link( | |||
data.term, | |||
data.fragment, | |||
data.alt, | |||
lang, | |||
data.sc, | |||
data.id, | |||
cats, | |||
data.no_alt_ast, | |||
data.suppress_redundant_wikilink_cat | |||
) | |||
end | end | ||
function export.plain_link(data) | function export.plain_link(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function | error("The first argument to the function plain_link must be a table. See Module:links/documentation for more information.") | ||
end | end | ||
return simple_link( | |||
data.term, | |||
data.fragment, | |||
data.alt, | |||
nil, | |||
data.sc, | |||
data.id, | |||
data.cats, | |||
data.no_alt_ast, | |||
data.suppress_redundant_wikilink_cat | |||
) | |||
end | end | ||
| Line 574: | Line 742: | ||
function export.embedded_language_links(data) | function export.embedded_language_links(data) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function | error("The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.") | ||
end | end | ||
local | local term, lang, sc = data.term, data.lang, data.sc | ||
-- If we don't have a script, get one. | -- If we don't have a script, get one. | ||
if not | if not sc then | ||
sc = lang:findBestScript(term) | |||
end | end | ||
-- Do we have embedded wikilinks? If so, they need to be processed individually. | -- Do we have embedded wikilinks? If so, they need to be processed individually. | ||
local open = find( | local open = find(term, "[[", nil, true) | ||
if open and find( | if open and find(term, "]]", open + 2, true) then | ||
return process_embedded_links( | return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast) | ||
end | end | ||
-- If not, return the display text. | -- If not, return the display text. | ||
term = selective_trim(term) | |||
-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links. | -- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links. | ||
term = term:gsub("%%", "%%25") | |||
return ( | return (lang:makeDisplayText(term, sc, true)) | ||
end | end | ||
| Line 603: | Line 771: | ||
tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">', | ||
'</span><span class="mention-gloss-double-quote">”</span>' } | '</span><span class="mention-gloss-double-quote">”</span>' } | ||
elseif item_type == "tr" then | elseif item_type == "tr" then | ||
if face == "term" then | if face == "term" then | ||
| Line 659: | Line 823: | ||
if data.genders and #data.genders > 0 then | if data.genders and #data.genders > 0 then | ||
local | local genders, gender_cats = format_genders(data.genders, data.lang) | ||
insert(output, " " .. | insert(output, " " .. genders) | ||
if gender_cats then | |||
local cats = data.cats | |||
if cats then | |||
extend(cats, gender_cats) | |||
end | |||
end | |||
end | end | ||
| Line 675: | Line 845: | ||
if data.tr[1] and data.ts[1] then | if data.tr[1] and data.ts[1] then | ||
insert(annotations, | insert(annotations, tag_translit(data.tr[1], data.lang, kind) .. " " .. export.mark(data.ts[1], "ts")) | ||
elseif data.ts[1] then | elseif data.ts[1] then | ||
insert(annotations, export.mark(data.ts[1], "ts")) | insert(annotations, export.mark(data.ts[1], "ts")) | ||
else | else | ||
insert(annotations, | insert(annotations, tag_translit(data.tr[1], data.lang, kind)) | ||
end | end | ||
end | end | ||
| Line 694: | Line 861: | ||
if data.pos then | if data.pos then | ||
-- debug category for pos= containing transcriptions | -- debug category for pos= containing transcriptions | ||
if data.pos: | if data.pos:match("/[^><]-/") then | ||
data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]" | data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]" | ||
end | end | ||
pos_tags = pos_tags or load_data("Module: | pos_tags = pos_tags or load_data("Module:headword/data").pos_aliases | ||
insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) | insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) | ||
end | end | ||
| Line 712: | Line 879: | ||
return concat(output) | return concat(output) | ||
end | |||
-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and > | |||
-- because they end up forming part of CSS class names inside of <span ...> and will interfere with finding the end | |||
-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as | |||
-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to < | |||
-- and > somehow works magically without any further work; they appear in the URL as < and >, and get passed to | |||
-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If | |||
-- someone knows, please modify this comment appropriately! | |||
local accel_char_map | |||
local function get_accel_char_map() | |||
accel_char_map = { | |||
["%"] = ".", | |||
[" "] = "_", | |||
["_"] = u(0xFFF0), | |||
["<"] = "<", | |||
[">"] = ">", | |||
} | |||
return accel_char_map | |||
end | |||
local function encode_accel_param_chars(param) | |||
return (param:gsub("[% <>_]", accel_char_map or get_accel_char_map())) | |||
end | |||
local function encode_accel_param(prefix, param) | |||
if not param then | |||
return "" | |||
end | |||
if type(param) == "table" then | |||
local filled_params = {} | |||
-- There may be gaps in the sequence, especially for translit params. | |||
local maxindex = 0 | |||
for k in pairs(param) do | |||
if type(k) == "number" and k > maxindex then | |||
maxindex = k | |||
end | |||
end | |||
for i = 1, maxindex do | |||
filled_params[i] = param[i] or "" | |||
end | |||
-- [[Module:accel]] splits these up again. | |||
param = concat(filled_params, "*~!") | |||
end | |||
-- This is decoded again by [[WT:ACCEL]]. | |||
return prefix .. encode_accel_param_chars(param) | |||
end | |||
local function get_class(lang, tr, accel) | |||
if not accel then | |||
return "" | |||
end | |||
local form = accel.form | |||
return "form-of lang-" .. lang:getFullCode() .. " " .. | |||
(form and encode_accel_param_chars(form) .. "-form-of" or "") .. " " .. | |||
(encode_accel_param("gender-", accel.gender)) .. " " .. | |||
(encode_accel_param("pos-", accel.pos)) .. " " .. | |||
(encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil))) .. " " .. | |||
(encode_accel_param("target-", accel.target)) .. " " .. | |||
(encode_accel_param("origin-", accel.lemma)) .. " " .. | |||
(encode_accel_param("origin_transliteration-", accel.lemma_translit)) .. " " .. | |||
(accel.no_store and "form-of-nostore" or "") .. " " | |||
end | end | ||
| Line 739: | Line 968: | ||
if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or | if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or | ||
data.ll and data.ll[1] or data.refs and data.refs[1] then | data.ll and data.ll[1] or data.refs and data.refs[1] then | ||
formatted = | formatted = format_qualifiers{ | ||
lang = data.lang, | lang = data.lang, | ||
text = formatted, | text = formatted, | ||
| Line 797: | Line 1,026: | ||
error("The first argument to the function full_link must be a table. " | error("The first argument to the function full_link must be a table. " | ||
.. "See Module:links/documentation for more information.") | .. "See Module:links/documentation for more information.") | ||
end | |||
-- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned. | |||
data.cats = {} | |||
-- Categorize links to "und". | |||
local lang, cats = data.lang, data.cats | |||
if cats and lang:getCode() == "und" then | |||
insert(cats, "Undetermined language links") | |||
end | end | ||
| Line 803: | Line 1,041: | ||
-- Generate multiple forms if applicable. | -- Generate multiple forms if applicable. | ||
for _, param in ipairs{"term", "alt"} do | for _, param in ipairs{"term", "alt"} do | ||
if type(data[param]) == "string" and data[param]:find("//") then | if type(data[param]) == "string" and data[param]:find("//", nil, true) then | ||
data[param] = export.split_on_slashes(data[param]) | data[param] = export.split_on_slashes(data[param]) | ||
elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//")) then | elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then | ||
data[param] = | data[param] = lang:generateForms(data[param]) | ||
else | else | ||
data[param] = {} | data[param] = {} | ||
| Line 824: | Line 1,062: | ||
-- Create the link | -- Create the link | ||
local output = {} | local output = {} | ||
data. | local id, no_alt_ast, srwc, accel = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat, data.accel | ||
for i in ipairs(terms) do | for i in ipairs(terms) do | ||
local link | |||
-- Is there any text to show? | -- Is there any text to show? | ||
if (data.term[i] or data.alt[i]) then | if (data.term[i] or data.alt[i]) then | ||
-- Try to detect the script if it was not provided | -- Try to detect the script if it was not provided | ||
local display_term = data.alt[i] or data.term[i] | local display_term = data.alt[i] or data.term[i] | ||
local best = | local best = lang:findBestScript(display_term) | ||
-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]] | -- no_nonstandard_sc_cat is intended for use in [[Module:interproject]] | ||
if ( | if ( | ||
not data.no_nonstandard_sc_cat and | not data.no_nonstandard_sc_cat and | ||
best:getCode() == "None" and | best:getCode() == "None" and | ||
find_best_script_without_lang(display_term):getCode() ~= "None" | |||
) then | ) then | ||
insert( | insert(cats, lang:getFullName() .. " terms in nonstandard scripts") | ||
end | end | ||
if not data.sc[i] then | if not data.sc[i] then | ||
data.sc[i] = best | data.sc[i] = best | ||
-- Track uses of sc parameter. | |||
elseif data.track_sc then | |||
if data.sc[i]:getCode() == best:getCode() then | |||
insert(cats, lang:getFullName() .. " terms with redundant script codes") | |||
else | |||
insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes") | |||
end | |||
end | end | ||
| Line 849: | Line 1,093: | ||
if data.sc[i]:hasNormalizationFixes() == true then | if data.sc[i]:hasNormalizationFixes() == true then | ||
if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then | if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then | ||
insert( | insert(cats, "Pages using discouraged character sequences") | ||
end | end | ||
end | end | ||
link = simple_link( | |||
data.term[i], | |||
data.fragment, | |||
data.alt[i], | |||
lang, | |||
data.sc[i], | |||
id, | |||
cats, | |||
no_alt_ast, | |||
srwc | |||
) | |||
end | |||
-- simple_link can return nil, so check if a link has been generated. | |||
if link then | |||
link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel)) | |||
else | else | ||
--[[ No term to show. | --[[ No term to show. | ||
Is there at least a transliteration we can work from? ]] | Is there at least a transliteration we can work from? ]] | ||
link = | link = request_script(lang, data.sc[i]) | ||
-- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms). | -- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms). | ||
if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and | if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and lang:getFamilyCode() ~= "qfa-sub" then | ||
-- If there are multiple terms, break the loop instead. | -- If there are multiple terms, break the loop instead. | ||
if i > 1 then | if i > 1 then | ||
remove(output) | remove(output) | ||
break | break | ||
elseif | elseif NAMESPACE ~= 10 then -- Template: | ||
insert( | insert(cats, lang:getFullName() .. " term requests") | ||
end | end | ||
link = "<small>[Term?]</small>" | link = "<small>[Term?]</small>" | ||
| Line 967: | Line 1,135: | ||
if data.tr[1] == "" or data.tr[1] == "-" then | if data.tr[1] == "" or data.tr[1] == "-" then | ||
data.tr[1] = nil | data.tr[1] = nil | ||
else | else | ||
local phonetic_extraction = load_data("Module:links/data").phonetic_extraction | local phonetic_extraction = load_data("Module:links/data").phonetic_extraction | ||
phonetic_extraction = phonetic_extraction[ | phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()] | ||
if phonetic_extraction then | if phonetic_extraction then | ||
| Line 979: | Line 1,146: | ||
-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in | -- aren't sufficient because they only work with reference to automatic translit and won't operate at all in | ||
-- languages without any automatic translit, like Persian and Hebrew. | -- languages without any automatic translit, like Persian and Hebrew. | ||
if data.tr[1] then | |||
local full_code = lang:getFullCode() | |||
end | |||
-- Try to generate a transliteration. | -- Try to generate a transliteration. | ||
local text = data.alt[1] or data.term[1] | local text = data.alt[1] or data.term[1] | ||
if not | if not lang:link_tr(data.sc[1]) then | ||
text = export.remove_links(text, true) | text = export.remove_links(text, true) | ||
end | end | ||
local automated_tr, tr_categories | local automated_tr, tr_categories | ||
automated_tr, data.tr_fail, tr_categories = | automated_tr, data.tr_fail, tr_categories = lang:transliterate(text, data.sc[1]) | ||
if automated_tr or data.tr_fail then | if automated_tr or data.tr_fail then | ||
| Line 994: | Line 1,164: | ||
if manual_tr then | if manual_tr then | ||
if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then | if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then | ||
--insert( | insert(cats, lang:getFullName() .. " terms with redundant transliterations") | ||
elseif not data.tr_fail then | |||
-- Prevents Arabic root categories from flooding the tracking categories. | |||
if NAMESPACE ~= 14 then -- Category: | |||
insert(cats, lang:getFullName() .. " terms with non-redundant manual transliterations") | |||
end | |||
end | end | ||
end | end | ||
if (not manual_tr) or | if (not manual_tr) or lang:overrideManualTranslit(data.sc[1]) then | ||
data.tr[1] = automated_tr | data.tr[1] = automated_tr | ||
for _, category in ipairs(tr_categories) do | for _, category in ipairs(tr_categories) do | ||
insert( | insert(cats, category) | ||
end | end | ||
end | end | ||
| Line 1,009: | Line 1,184: | ||
-- Link to the transliteration entry for languages that require this | -- Link to the transliteration entry for languages that require this | ||
if data.tr[1] and | if data.tr[1] and lang:link_tr(data.sc[1]) and not (data.tr[1]:match("%[%[(.-)%]%]") or data.tr_fail) then | ||
data.tr[1] = | data.tr[1] = simple_link( | ||
data.tr[1], | |||
nil, | |||
nil, | |||
lang, | |||
get_script("Latn"), | |||
elseif data.tr[1] and not ( | nil, | ||
cats, | |||
no_alt_ast, | |||
srwc | |||
) | |||
elseif data.tr[1] and not (lang:link_tr(data.sc[1]) or data.tr_fail) then | |||
-- Remove the pseudo-HTML tags added by remove_links. | -- Remove the pseudo-HTML tags added by remove_links. | ||
data.tr[1] = data.tr[1]:gsub("</?link>", "") | data.tr[1] = data.tr[1]:gsub("</?link>", "") | ||
| Line 1,024: | Line 1,204: | ||
insert(output, export.format_link_annotations(data, face)) | insert(output, export.format_link_annotations(data, face)) | ||
local categories = # | local categories = #cats > 0 and format_categories(cats, lang, "-", nil, nil, data.sc) or "" | ||
output = concat(output) | output = concat(output) | ||
| Line 1,030: | Line 1,210: | ||
output = add_qualifiers_and_refs_to_term(data, output) | output = add_qualifiers_and_refs_to_term(data, output) | ||
end | end | ||
return output | return output .. categories | ||
end | end | ||
| Line 1,078: | Line 1,258: | ||
return text | return text | ||
end | end | ||
function export.section_link(link) | function export.section_link(link) | ||
| Line 1,099: | Line 1,264: | ||
end | end | ||
local target, section = get_fragment((link:gsub("_", " "))) | |||
if not | if not section then | ||
error(" | error("No \"#\" delineating a section name") | ||
end | end | ||
return | return simple_link( | ||
target, | |||
section, | |||
target .. " § " .. section | |||
) | |||
end | end | ||
return export | return export | ||