Module:links: Difference between revisions
Jump to navigation
Jump to search
No edit summary Tag: Reverted |
No edit summary Tag: Manual revert |
||
Line 2: | Line 2: | ||
--[=[ | --[=[ | ||
[[Unsupported titles]] | [[Unsupported titles]] and pages with high | ||
memory usage are listed at [[Module:links/data]]. | |||
Other modules used: | Other modules used: | ||
[[Module:script utilities]] | [[Module:script utilities]] | ||
Line 11: | Line 10: | ||
[[Module:languages]] and its submodules | [[Module:languages]] and its submodules | ||
[[Module:gender and number]] | [[Module:gender and number]] | ||
[[Module:utilities]] | |||
[[Module:string]] | |||
]=] | ]=] | ||
-- These are prefixed with u to avoid confusion with the default string methods | -- These are prefixed with u to avoid confusion with the default string methods | ||
-- of the same name. | -- of the same name. | ||
local | local usub = mw.ustring.sub | ||
local table_insert = table.insert | |||
local table_concat = table.concat | |||
local | |||
local ignore_cap = { | |||
local | ["ko"] = true, | ||
} | |||
local | |||
local | local phonetic_extraction = { | ||
["th"] = "Module:th", | |||
["km"] = "Module:km", | |||
} | |||
local | local pos_tags = { | ||
["a"] = "adjective", | |||
["adv"] = "adverb", | |||
["int"] = "interjection", | |||
["n"] = "noun", | |||
["pron"] = "pronoun", | |||
["v"] = "verb", | |||
["vi"] = "intransitive verb", | |||
["vt"] = "transitive verb", | |||
["vti"] = "transitive and intransitive verb", | |||
} | |||
local unsupported_titles | |||
function export.getLinkPage(target, lang) | |||
unsupported_titles = unsupported_titles or mw.loadData("Module:links/data").unsupported_titles | |||
if | if unsupported_titles[target] then | ||
return "Unsupported titles/" .. unsupported_titles[target] | |||
end | end | ||
-- If the link contains unexpanded template parameters, then don't create a link. | -- If the link contains unexpanded template parameters, then don't create a link. | ||
if target:find("{{{") then | if target:find("{{{") then | ||
return nil | return nil | ||
end | end | ||
if target:sub(1, 1) == ":" or target:sub(1, 2) == "w:" or target:sub(1, 10) == "wikipedia:" then | |||
return target | |||
end | |||
-- Remove diacritics from the page name | |||
target = lang:makeEntryName(target) | |||
if target:sub(1, 1) == "/" then | if target:sub(1, 1) == "/" then | ||
return ":" .. target | return ":" .. target | ||
end | end | ||
return target | return target | ||
end | end | ||
-- Make a link from | -- Make a language-specific link from given link's parts | ||
local function | local function makeLangLink(link, lang, id, allow_self_link) | ||
-- | -- Temporary tracking code | ||
link. | local langCode = lang:getCode() | ||
if langCode == "se" or langCode == "sia" or langCode:find("^sm[ajns]$") | |||
or langCode:find("^sj[dektu]$") then | |||
if link.display and link.display:find("'") then | |||
elseif link.target and link.target:find("'") then | |||
end | |||
end | |||
-- Find fragments (when link didn't come from parseLink). | -- Find fragments (when link didn't come from parseLink). | ||
-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | -- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]]. | ||
if link.fragment == nil then | |||
-- Replace numeric character references with the corresponding character ( → '), | |||
-- as they contain #, which causes the numeric character reference to be | |||
-- misparsed (wa'a → waa → pagename wa&, fragment 29;a). | |||
link.target = link.target:gsub("&#(%d+);", | |||
function(number) return mw.ustring.char(tonumber(number)) end) | |||
local first, second = link.target:match("^([^#]+)#(.+)$") | |||
if first then | |||
local first, second = link.target:match("^([^#]+)#(.+)$") | link.target, link.fragment = first, second | ||
end | end | ||
end | end | ||
-- If there is no display form, then create a default one | -- If there is no display form, then create a default one | ||
if not link.display then | if not link.display then | ||
link.display = link.target | link.display = link.target | ||
-- Strip the prefix from the displayed form | |||
-- TODO: other interwiki links? | |||
if link.display:sub(1, 1) == ":" and not mw.loadData("Module:links/data").unsupported_titles[link.display] then | |||
link.display = link.display:sub(2) -- remove colon from beginning | |||
else | |||
local prefix = link.display:match("^([^:]+):") | |||
local prefixes = { | |||
w = true, | |||
wikipedia = true, | |||
} | |||
if prefixes[prefix] then | |||
link.display = link.display:sub(#prefix + 2) -- remove prefix plus colon | |||
end | |||
end | |||
end | end | ||
-- Process the target | -- Process the target | ||
link.target = link.target:gsub("^%*", "Reconstruction:" .. lang:getCanonicalName() .. "/") | |||
link.target | link.target = export.getLinkPage(link.target, lang) | ||
link. | |||
if not link.target then | if not link.target then | ||
return link.display | return link.display | ||
Line 185: | Line 125: | ||
-- and linking to the same page hasn't been turned on, then return a "self-link" | -- and linking to the same page hasn't been turned on, then return a "self-link" | ||
-- like the software does. | -- like the software does. | ||
if | if not (allow_self_link or id) and link.target:gsub("^:", "") == mw.title.getCurrentTitle().prefixedText then | ||
return | return "<strong class=\"selflink\">" .. link.display .. "</strong>" | ||
end | end | ||
-- Add fragment | --[[ | ||
local prefix | Add fragment | ||
Do not add a section link to "Undetermined", as such sections do not exist and are invalid. | |||
local | TabbedLanguages handles links without a section by linking to the "last visited" section, | ||
but adding "Undetermined" would break that feature. | |||
For localized prefixes that make syntax error, please use the format: ["xyz"] = true, | |||
]] | |||
local prefix = link.target:match("^:?([^:]+):") | |||
local prefixes = { | |||
if not | w = true, | ||
wikipedia = true, | |||
if | Category = true, | ||
} | |||
if not prefixes[prefix] then | |||
if not link.fragment and lang:getCode() ~= "und" then | |||
if id then | if id then | ||
link.fragment = require("Module: | link.fragment = require("Module:utilities").make_id(lang, id) | ||
elseif not (link.target | elseif not mw.ustring.find(link.target, "^Appendix:") | ||
link.fragment = lang: | and not mw.ustring.find(link.target, "^Reconstruction:") then | ||
link.fragment = lang:getCanonicalName() | |||
end | end | ||
end | end | ||
-- This allows linking to pages like [[sms:a]] without it being treated weirdly. | |||
link.target = link.target:gsub(":", ":") | |||
end | end | ||
return "[[" .. (link.nocont and "wikt:" or "Contionary:") .. link.target .. "#" .. lang:getCanonicalName() .. "|" .. link.display .. "]]" | return "[[" .. (link.nocont and "wikt:" or "Contionary:") .. link.target .. "#" .. lang:getCanonicalName() .. "|" .. link.display .. "]]" | ||
end | end | ||
Line 230: | Line 166: | ||
local link = { target = linktext } | local link = { target = linktext } | ||
local first, second = link.target:match("^([^|]+)|(.+)$") | local first, second = link.target:match("^([^|]+)|(.+)$") | ||
if first then | if first then | ||
Line 245: | Line 172: | ||
else | else | ||
link.display = link.target | link.display = link.target | ||
end | end | ||
Line 259: | Line 180: | ||
link.fragment = second | link.fragment = second | ||
else | else | ||
-- So that | -- So that makeLangLink does not look for a fragment again | ||
link.fragment = false | link.fragment = false | ||
end | end | ||
Line 266: | Line 187: | ||
end | end | ||
-- | -- Creates a basic wikilink to the given term. If the text already contains | ||
-- links, these are replaced with links to the correct section. | |||
function export.language_link(data, allow_self_link) | function export.language_link(data, allow_self_link) | ||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.") | ||
end | end | ||
local text = data.term | local text = data.term | ||
if ignore_cap[data.lang:getCode()] and text then | |||
if | |||
text = text:gsub("%^", "") | text = text:gsub("%^", "") | ||
end | end | ||
-- | -- If the text begins with * and another character, | ||
if text then | -- then act as if each link begins with * | ||
local allReconstructed = false | |||
if text:find("^*.") then | |||
allReconstructed = true | |||
end | end | ||
-- Do we have embedded wikilinks? | -- Do we have embedded wikilinks? | ||
if text and text:find("%[%[.-%]%]") then | if text:find("[[", nil, true) then | ||
if data.alt then | |||
mw.log("(from Module:links)", "text with embedded wikilinks:", text, | |||
text = text | "ignored alt:", data.alt, "lang:", data.lang:getCode()) | ||
end | |||
if data.id then | |||
mw.log("(from Module:links)", "text with embedded wikilinks:", text, | |||
"ignored id:", data.id, "lang:", data.lang:getCode()) | |||
end | end | ||
-- Begins and ends with a wikilink tag | |||
if text:find("^%[%[(.+)%]%]$") then | |||
-- There are no [ ] in between. | |||
-- This makes the wikilink tag redundant. | |||
if text:find("^%[%[[^%[%]]+%]%]$") then | |||
else | |||
local temp = text:gsub("^%[%[(.+)%]%]$", "%1") | |||
temp = temp:gsub("%]%], %[%[", "|") | |||
end | |||
end | |||
text = text:gsub("%[%[([^%]]+)%]%]", | |||
function(linktext) | |||
local link = parseLink(linktext) | |||
if allReconstructed then | |||
link.target = "*" .. link.target | |||
end | |||
return makeLangLink(link, data.lang, data.id, allow_self_link) | |||
end) | |||
-- Remove the extra * at the beginning if it's immediately followed | |||
-- by a link whose display begins with * too | |||
if allReconstructed then | |||
text = text:gsub("^%*%[%[([^|%]]+)|%*", "[[%1|*") | |||
end | |||
else | else | ||
-- There is no embedded wikilink, make a link using the parameters. | |||
text = makeLangLink({ target = text, display = data.alt, nocont = data.nocont}, data.lang, data.id, allow_self_link) | |||
text = | |||
end | end | ||
return text | return text | ||
end | end | ||
Line 479: | Line 265: | ||
elseif itemType == "tr" then | elseif itemType == "tr" then | ||
if face == "term" then | if face == "term" then | ||
tag = { '<span lang="' .. lang: | tag = { '<span lang="' .. lang:getCode() .. '" class="tr mention-tr Latn">', | ||
'</span>' } | '</span>' } | ||
else | else | ||
tag = { '<span lang="' .. lang: | tag = { '<span lang="' .. lang:getCode() .. '" class="tr Latn">', '</span>' } | ||
end | end | ||
elseif itemType == "ts" then | elseif itemType == "ts" then | ||
tag = { '<span class="ts mention-ts Latn">/', '/</span>' } | |||
tag = { '<span class="ts mention-ts Latn">/ | |||
elseif itemType == "pos" then | elseif itemType == "pos" then | ||
tag = { '<span class="ann-pos">', '</span>' } | tag = { '<span class="ann-pos">', '</span>' } | ||
Line 501: | Line 286: | ||
end | end | ||
-- | -- Format the annotations (things following the linked term) | ||
function export.format_link_annotations(data, face) | function export.format_link_annotations(data, face) | ||
local output = {} | local output = {} | ||
Line 520: | Line 292: | ||
-- Interwiki link | -- Interwiki link | ||
if data.interwiki then | if data.interwiki then | ||
table_insert(output, data.interwiki) | |||
end | end | ||
Line 529: | Line 301: | ||
if data.genders and #data.genders > 0 then | if data.genders and #data.genders > 0 then | ||
local m_gen = require("Module: | local m_gen = require("Module:getn") | ||
table_insert(output, " " .. m_gen.format_list(data.genders, data.lang)) | |||
end | end | ||
Line 536: | Line 308: | ||
-- Transliteration and transcription | -- Transliteration and transcription | ||
if data.tr | if data.tr or data.ts then | ||
local kind | local kind | ||
if face == "term" then | if face == "term" then | ||
Line 544: | Line 316: | ||
end | end | ||
if data.tr | if data.tr and data.ts then | ||
table_insert(annotations, | |||
require("Module:script utilities").tag_translit(data.tr | require("Module:script utilities").tag_translit(data.tr, data.lang, kind) | ||
.. " " .. export.mark(data.ts | .. " " .. export.mark(data.ts, "ts")) | ||
elseif data.ts | elseif data.ts then | ||
table_insert(annotations, export.mark(data.ts, "ts")) | |||
else | else | ||
table_insert(annotations, | |||
require("Module:script utilities").tag_translit(data.tr | require("Module:script utilities").tag_translit(data.tr, data.lang, kind)) | ||
end | end | ||
end | end | ||
Line 558: | Line 330: | ||
-- Gloss/translation | -- Gloss/translation | ||
if data.gloss then | if data.gloss then | ||
table_insert(annotations, export.mark(data.gloss, "gloss")) | |||
end | end | ||
Line 568: | Line 340: | ||
end | end | ||
table_insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos")) | |||
end | end | ||
-- Literal/sum-of-parts meaning | -- Literal/sum-of-parts meaning | ||
if data.lit then | if data.lit then | ||
table_insert(annotations, "literally " .. export.mark(data.lit, "gloss")) | |||
end | end | ||
if #annotations > 0 then | if #annotations > 0 then | ||
table_insert(output, " " .. export.mark(table_concat(annotations, ", "), "annotations")) | |||
end | end | ||
return | return table_concat(output) | ||
end | end | ||
-- | -- A version of {{l}} or {{m}} that can be called from other modules too | ||
function export.full_link(data, face, allow_self_link, no_check_redundant_translit) | |||
function export.full_link(data, face, allow_self_link, | |||
if type(data) ~= "table" then | if type(data) ~= "table" then | ||
error("The first argument to the function full_link must be a table. " | error("The first argument to the function full_link must be a table. " | ||
Line 671: | Line 362: | ||
end | end | ||
-- Create the link | -- Create the link | ||
local output = {} | local output = {} | ||
Line 706: | Line 367: | ||
local link = "" | local link = "" | ||
local annotations | local annotations | ||
local class = "" | --local m_utilities = require("Module:utilities") | ||
-- Is there any text to show? | |||
if (data.term or data.alt) and data.term ~= "-" then | |||
-- Try to detect the script if it was not provided | |||
if not data.sc then | |||
data.sc = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) | |||
else | |||
-- Track uses of sc parameter | |||
local best = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) | |||
end | |||
local class = "" | |||
local function encode_accel_param(prefix, param) | |||
-- This is decoded again by [[WT:ACCEL]]. | |||
return param and prefix .. param:gsub("%%", "."):gsub(" ", "_") or "" | |||
-- | end | ||
if data.accel then | |||
local form = data.accel.form and data.accel.form .. "-form-of" or "" | |||
local gender = encode_accel_param("gender-", data.accel.gender) | |||
local pos = encode_accel_param("pos-", data.accel.pos) | |||
local translit = encode_accel_param("transliteration-", data.accel.translit) | |||
local lemma = encode_accel_param("origin-", data.accel.lemma) | |||
local lemma_translit = encode_accel_param("origin_transliteration-", data.accel.lemma_translit) | |||
local no_store = data.accel.no_store and "form-of-nostore" or "" | |||
local accel = | |||
form .. " " .. | |||
gender .. " " .. | |||
pos .. " " .. | |||
translit .. " " .. | |||
lemma .. " " .. | |||
lemma_translit .. " " .. | |||
no_store .. " " | |||
class = "form-of lang-" .. data.lang:getCode() .. " " .. accel | |||
end | |||
-- Only make a link if the term has been given, otherwise just show the alt text without a link | |||
link = require("Module:script utilities").tag_text( | |||
data.term and export.language_link(data, allow_self_link) | |||
or data.alt, data.lang, data.sc, face, class) | |||
else | |||
--[[ No term to show. | |||
Is there at least a transliteration we can work from? ]] | |||
link = require("Module:script utilities").request_script(data.lang, data.sc) | |||
if not data.sc then | |||
data.sc = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) | |||
else | else | ||
-- | -- Track uses of sc parameter | ||
local best = require("Module:scripts").findBestScript(data.alt or data.term, data.lang) | |||
end | |||
if link == "" or link == "-" or link == "?" or not data.tr or data.tr == "-" then | |||
-- No link to show, and no transliteration either. Show a term request. | |||
local category = "" | |||
if mw.title.getCurrentTitle().nsText ~= "Template" then | |||
table_insert(categories, "[[Category:" .. data.lang:getCanonicalName() .. " term requests]]") | |||
end | end | ||
link = "<small>[Term?]</small>" | |||
end | end | ||
end | end | ||
table_insert(output, link) | |||
if data.tr == "" or data.tr == "-" then | |||
data.tr = nil | |||
elseif | elseif phonetic_extraction[data.lang:getCode()] then | ||
local m_phonetic = require(phonetic_extraction[data.lang:getCode()]) | |||
data.tr = data.tr or m_phonetic.getTranslit(export.remove_links(data.term)) | |||
-- Try to generate a transliteration, unless transliteration has been supplied and | elseif (data.term or data.alt) and not data.sc:getCode():find("Lati?n") then | ||
-- given. (Checking for redundant transliteration can use up significant amounts of memory so we don't want to do it | |||
-- | -- Try to generate a transliteration, unless transliteration has been supplied and either | ||
-- no_check_redundant_translit is given or we are in a high-memory entry. (Checking for redundant | |||
-- transliteration can use up significant amounts of memory so we don't want to do it if memory | |||
-- is tight. `no_check_redundant_translit` is currently set when called ultimately from | |||
-- {{multitrans|...|no-check-redundant-translit=1}}.) | -- {{multitrans|...|no-check-redundant-translit=1}}.) | ||
if not | if not data.tr then | ||
local | local automated_tr = data.lang:transliterate(export.remove_links(data.alt or data.term), data.sc) | ||
if automated_tr then | |||
local manual_tr = data.tr | |||
if automated_tr | |||
local manual_tr = data.tr | |||
if manual_tr then | if manual_tr then | ||
if | if manual_tr == automated_tr then | ||
table_insert(categories, | |||
"[[Category:Terms with redundant transliterations]]" | |||
.. "[[Category:Terms with redundant transliterations/" .. data.lang:getCode() .. "]]") | |||
else | |||
-- Prevents Arabic root categories from flooding the tracking categories. | -- Prevents Arabic root categories from flooding the tracking categories. | ||
if mw.title.getCurrentTitle().nsText ~= "Category" then | if mw.title.getCurrentTitle().nsText ~= "Category" then | ||
table_insert(categories, | |||
"[[Category:Terms with manual transliterations different from the automated ones]]" | |||
.. "[[Category:Terms with manual transliterations different from the automated ones/" .. data.lang:getCode() .. "]]") | |||
end | end | ||
end | end | ||
Line 855: | Line 474: | ||
if (not manual_tr) or data.lang:overrideManualTranslit() then | if (not manual_tr) or data.lang:overrideManualTranslit() then | ||
data.tr | data.tr = automated_tr | ||
end | end | ||
end | end | ||
end | end | ||
end | end | ||
-- Link to the transliteration entry for languages that require this | -- Link to the transliteration entry for languages that require this | ||
if data.tr | if data.tr and data.lang:link_tr() then | ||
data.tr | data.tr = require("Module:script utilities").tag_text( | ||
elseif data.tr | export.language_link({ lang = data.lang, term = data.tr, nocont = data.nocont}, allow_self_link), | ||
data.lang, data.sc, face, class) | |||
data.tr | --data.tr = export.language_link { lang = data.lang, term = data.tr, nocont = data.nocont} | ||
elseif data.tr then | |||
data.tr = "''" .. data.tr .. "''" | |||
end | end | ||
table_insert(output, export.format_link_annotations(data, face)) | |||
return table_concat(output) .. table_concat(categories) | |||
return output .. categories | |||
end | end | ||
--[ | --[[ Strips links: deletes category links, | ||
the targets of piped links, | |||
and all double square brackets. ]] | |||
function export.remove_links(text) | |||
function export.remove_links(text | |||
if type(text) == "table" then | if type(text) == "table" then | ||
text = text.args[1] | text = text.args[1] | ||
Line 898: | Line 506: | ||
return "" | return "" | ||
end | end | ||
text = mw.ustring.gsub(text, "%[%[Category:[^|%]]-|?[^|%]]-%]%]", "") | |||
text = text:gsub("%[%[[^|%]]-|", "") | |||
text = text:gsub("%[%[", "") | |||
text = text:gsub("%]%]", "") | |||
text = text | |||
return text | return text | ||
end | |||
function export.english_links(text) | |||
local lang = require("Module:languages").getByCode("en") | |||
-- Parentheses around function call to remove second return value, the | |||
-- number of replacements. | |||
return (text:gsub("%[%[([^%]]+)%]%]", | |||
function(linktext) | |||
local link = parseLink(linktext) | |||
return makeLangLink(link, lang, nil, true, false) | |||
end)) | |||
end | |||
function export.light_link(data) | |||
local language_names = mw.loadData("Module:languages/code to canonical name") | |||
local script_codes = mw.loadData("Module:scripts/codes") | |||
if data.langCode then | |||
data.langName = language_names[data.langCode] or error('The language code "' .. data.langCode .. '" is not recognized.') | |||
else | |||
error('Language code is required.') | |||
end | |||
if not data.term then | |||
error('Term to link to is required.') | |||
end | |||
if data.scCode then | |||
if not script_codes[data.scCode] then | |||
error('The script code "' .. data.sc .. '" is not recognized.') | |||
end | |||
else | |||
error("The function light_link requires a script code.") | |||
end | |||
local fragment | |||
if data.id then | |||
fragment = data.langName .. "-" .. mw.uri.encode(data.id, "WIKI") | |||
else | |||
fragment = data.langName | |||
end | |||
return table_concat { | |||
'<span class="', data.scCode, '" lang="', data.langCode, | |||
'">[[', data.term, "#", fragment, "|", (data.alt or data.term), "]]</span>" | |||
} | |||
end | end | ||
--[=[ | --[=[ | ||
For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål. 0xC3 and 0xA5 are the | |||
For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål. | hexadecimal-base representation of the two bytes used to encode the character | ||
å in the UTF-8 encoding: | |||
11000011 10100101 | |||
Note that the bytes used to represent a character are actually different from | |||
the Unicode codepoint. For å, the codepoint is 0xE5. The bits (digits) that | |||
actually spell the codepoint are found in the brackets: 110[00011] 10[100101]. | |||
For further explanation, see [[w:UTF-8#Description]]. | |||
]=] | ]=] | ||
Line 963: | Line 592: | ||
link = link:gsub("_", " ") | link = link:gsub("_", " ") | ||
local numberSigns = | local numberSigns = require("Module:string").count(link, "#") | ||
if numberSigns > 1 then | if numberSigns > 1 then |