Module:links: Difference between revisions

(94 intermediate revisions by 2 users not shown)

Line 5:

extraction modules and part-of-speech names are listed

at [[Module:links/data]].

Other modules used:

[[Module:script utilities]]

Line 13:

]=]

~~-- These are prefixed with u to avoid confusion with the default~~ string ~~methods~~

local anchors_module = "Module:anchors"

~~-- of the same name.~~

local form_of_module = "Module:form of"

local gender_and_number_module = "Module:getn"

local languages_module = "Module:languages"

local load_module = "Module:load"

local memoize_module = "Module:memoize"

local pages_module = "Module:pages"

local pron_qualifier_module = "Module:pron qualifier"

local scripts_module = "Module:scripts"

local script_utilities_module = "Module:script utilities"

local string_encode_entities_module = "Module:string/encode entities"

local string_utilities_module = "Module:string utilities"

local table_module = "Module:table"

local utilities_module = "Module:utilities"

local concat = table.concat

~~local encode = mw.text.encode~~

local find = string.find

local find = ~~mw.ustring~~.find

local get_current_title = mw.title.getCurrentTitle

local ~~get_entities = require("Module:utilities").get_entities~~

~~local gsub~~ = mw.~~ustring~~.~~gsub~~

local insert = table.insert

local ~~lower~~ = mw.~~ustring~~.~~lower~~

local ipairs = ipairs

local ~~split~~ = mw.~~text~~.~~split~~

local match = string.match

local new_title = mw.title.new

local pairs = pairs

local remove = table.remove

local sub = string.sub

local toNFC = mw.ustring.toNFC

local ~~trim~~ = ~~mw.text.trim~~

local tostring = tostring

local type = type

local unstrip = mw.text.unstrip

local NAMESPACE = get_current_title().nsText

local function anchor_encode(...)

anchor_encode = require(memoize_module)(mw.uri.anchorEncode, true)

return anchor_encode(...)

end

local function decode_entities(...)

decode_entities = require(string_utilities_module).decode_entities

return decode_entities(...)

end

local function decode_uri(...)

decode_uri = require(string_utilities_module).decode_uri

return decode_uri(...)

end

-- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for.

local function encode_entities(...)

encode_entities = require(string_encode_entities_module)

return encode_entities(...)

end

local function extend(...)

extend = require(table_module).extend

return extend(...)

end

local function find_best_script_without_lang(...)

find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang

return find_best_script_without_lang(...)

end

local function format_categories(...)

format_categories = require(utilities_module).format_categories

return format_categories(...)

end

local function format_genders(...)

format_genders = require(gender_and_number_module).format_genders

return format_genders(...)

end

local function format_qualifiers(...)

format_qualifiers = require(pron_qualifier_module).format_qualifiers

return format_qualifiers(...)

end

local function get_current_L2(...)

get_current_L2 = require(pages_module).get_current_L2

return get_current_L2(...)

end

local function get_lang(...)

get_lang = require(languages_module).getByCode

return get_lang(...)

end

local function get_script(...)

get_script = require(scripts_module).getByCode

return get_script(...)

end

local function language_anchor(...)

language_anchor = require(anchors_module).language_anchor

return language_anchor(...)

end

local function load_data(...)

load_data = require(load_module).load_data

return load_data(...)

end

local function request_script(...)

request_script = require(script_utilities_module).request_script

return request_script(...)

end

local function shallow_copy(...)

shallow_copy = require(table_module).shallowCopy

return shallow_copy(...)

end

local function split(...)

split = require(string_utilities_module).split

return split(...)

end

local function tag_text(...)

tag_text = require(script_utilities_module).tag_text

return tag_text(...)

end

local function tag_translit(...)

tag_translit = require(script_utilities_module).tag_translit

return tag_translit(...)

end

local function trim(...)

trim = require(string_utilities_module).trim

return trim(...)

end

local function u(...)

u = require(string_utilities_module).char

return u(...)

end

local function ulower(...)

ulower = require(string_utilities_module).lower

return ulower(...)

end

local function umatch(...)

umatch = require(string_utilities_module).match

return umatch(...)

end

local m_headword_data

local function get_headword_data()

m_headword_data = load_data("Module:headword/data")

return m_headword_data

end

local function selective_trim(...)

-- Unconditionally trimmed charset.

local always_trim =

"\194\128-\194\159" .. -- U+0080-009F (C1 control characters)

"\194\173" .. -- U+00AD (soft hyphen)

"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters)

"\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters)

-- Standard trimmed charset.

local standard_trim = "%s" .. -- (default whitespace charset)

"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces)

always_trim

-- If there are non-whitespace characters, trim all characters in `standard_trim`.

-- Otherwise, only trim the characters in `always_trim`.

selective_trim = function(text)

if text == "" then

return text

end

local trimmed = trim(text, standard_trim)

if trimmed ~= "" then

return trimmed

end

return trim(text, always_trim)

end

return selective_trim(...)

end

local function escape(text, str)

Line 41:

Line 210:

end

-- ~~Trim only if there are non-whitespace characters~~.

-- Remove bold, italics, soft hyphens, strip markers and HTML tags.

local function ~~cond_trim~~(~~text~~)

local function remove_formatting(str)

-- ~~Include all conventional whitespace~~ + ~~zero~~-~~width space~~.

str = str

if ~~find~~(text, "[^%s]") then

:gsub("('*)'''(.-'*)'''", "%1%2")

text = ~~trim~~(text, "~~%s~~")

:gsub("('*)''(.-'*)''", "%1%2")

:gsub("", "")

return (unstrip(str)

:gsub("<[^<>]+>", ""))

end

--[==[Takes an input and splits on a double slash (taking account of escaping backslashes).]==]

function export.split_on_slashes(text)

text = split(escape(text, "//"), "//", true) or {}

for i, v in ipairs(text) do

text[i] = unescape(v, "//")

if v == "" then

text[i] = false

end

return text

end

--[==[Takes a wikilink and outputs the link target and display text. By default, the link target will be returned as a title object, but if `allow_bad_target` is set it will be returned as a string, and no check will be performed as to whether it is a valid link target.]==]

function export.get_wikilink_parts(text, allow_bad_target)

-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences.

if ( -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]".

not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[".

find(text, "[[", 3, true) or

find(text, "]]", 3, true) ~= #text - 1

) then

return nil, nil

end

local pipe, title, display = find(text, "|", 3, true)

if pipe then

title, display = sub(text, 3, pipe - 1), sub(text, pipe + 1, -3)

else

title = sub(text, 3, -3)

display = title

end

if allow_bad_target then

return title, display

end

title = new_title(title)

-- No title object means the target is invalid.

if title == nil then

return nil, nil

-- If the link target starts with "#" then mw.title.new returns a broken

-- title object, so grab the current title and give it the correct fragment.

elseif title.prefixedText == "" then

local fragment = title.fragment

if fragment == "" then -- [[#]] isn't valid

return nil, nil

end

title = get_current_title()

title.fragment = fragment

end

return title, display

end

-- Does the work of export.get_fragment, but can be called directly to avoid unnecessary checks for embedded links.

local function get_fragment(text)

text = escape(text, "#")

-- Replace numeric character references with the corresponding character (' → '),

-- as they contain #, which causes the numeric character reference to be

-- misparsed (wa'a → wa'a → pagename wa&, fragment 39;a).

text = decode_entities(text)

local target, fragment = text:match("^(.-)#(.+)$")

target = target or text

target = unescape(target, "#")

fragment = fragment and unescape(fragment, "#")

return target, fragment

end

--[==[Takes a link target and outputs the actual target and the fragment (if any).]==]

function export.get_fragment(text)

-- If there are no embedded links, process input.

local open = find(text, "[[", nil, true)

if not open then

return get_fragment(text)

end

local close = find(text, "]]", open + 2, true)

if not close then

return get_fragment(text)

-- If there is one, but it's redundant (i.e. encloses everything with no pipe), remove and process.

elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then

return get_fragment(sub(text, 3, -3))

end

-- Otherwise, return the input.

return text

end

~~local ignore_cap~~

--[==[

~~local pos_tags~~

Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes

function export.~~getLinkPage~~(target, lang, sc, plain)

bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets

beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix

namespace. Returns up to three values:

# the actual page to link to, or {nil} to not link to anything;

# how the target should be displayed as, if the user didn't explicitly specify any display text; generally the

same as the original target, but minus any anti-asterisk !!;

# the value `true` if the target had a backslash-escaped * in it (FIXME: explain this more clearly).

]==]

function export.get_link_page_with_auto_display(target, lang, sc, plain)

local orig_target = target

if not target then

return nil

end

~~-- Remove bold, italics, soft hyphens, strip markers and HTML tags.~~

target = remove_formatting(target)

target = target

:~~gsub~~("('*)~~'''(.-'*)'''~~", "~~%1%~~2")

if target:sub(1, 1) == ":" then

:~~gsub~~("('*)~~''(.-'*)''~~", "~~%1%2~~")

local link_target = target:sub(2)

~~:gsub(""~~, ~~"")~~

link_target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. link_target

~~target~~ = ~~unstrip(~~target)

return link_target, orig_target

:~~gsub~~("<[^~~<>]+>", "~~")

end

-- ~~Check if the target is an interwiki link.~~

local prefix = target:match("^(.-):")

if target:~~match~~(":"~~) and target ~=~~ ":" then

-- Convert any escaped colons

~~local m_utildata = mw.loadData("Module:utilities/data")~~

target = target:gsub("\\:", ":")

-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.)

if prefix then

~~local~~ prefix = ~~target:gsub~~("^:*(.-)~~:.*", lower~~)

-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then

if ~~m_utildata.~~namespaces[prefix] or ~~m_utildata.~~interwikis[prefix] then

-- return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term

return ~~":" ..~~ target~~:gsub("^:+", ""), nil~~, {}

-- to a category.)

prefix = ulower(trim(prefix))

if prefix ~= "" and (

load_data("Module:data/namespaces")[prefix] or

load_data("Module:data/interwikis")[prefix]

) then

return target, orig_target

end

~~-- Convert any escaped colons~~

~~target = target:gsub("\\:", ":")~~

end

-- Check if the term is reconstructed and remove any asterisk. Otherwise, handle the escapes.

-- Check if the term is reconstructed and remove any asterisk. Also check for anti-asterisk (!!).

local reconstructed, escaped

-- Otherwise, handle the escapes.

local reconstructed, escaped, anti_asterisk

if not plain then

target, reconstructed = target:gsub("^%*(.)", "%1")

if reconstructed == 0 then

target, anti_asterisk = target:gsub("^!!(.)", "%1")

if anti_asterisk == 1 then

-- Remove !! from original. FIXME! We do it this way because the call to remove_formatting() above

-- may cause non-initial !! to be interpreted as anti-asterisks. We should surely move the

-- remove_formatting() call later.

orig_target = orig_target:gsub("^!!", "")

end

target, escaped = target:gsub("^(\\-)\\%%*", "%1*")

if reconstructed == 0 and lang:hasType("reconstructed") then

orig_target = "*" .. target

reconstructed = 1

end

~~target, escaped = target:gsub("^(\\-)\\%*", "%1*")~~

if not ~~require~~(~~"Module:utilities").check_object("script", true,~~ sc~~) or~~ sc:getCode() == "None" then

if not (sc and sc:getCode() ~= "None") then

sc = lang:findBestScript(target)

end

-- Remove carets if they are used to capitalize parts of transliterations (unless they have been escaped).

if (not sc:hasCapitalization()) and sc:isTransliterated() and target:match("%^") then

Line 94:

Line 374:

target = unescape(target, "^")

end

-- Get the entry name for the language.

target = lang:makeEntryName(target, sc)

target = lang:makeEntryName(target, sc, reconstructed == 1 or lang:hasType("appendix-constructed"))

-- If the link contains unexpanded template parameters, then don't create a link.

if target:~~find~~("{{{") then

if target:match("{{{.-}}}") then

-- FIXME: Should we return the original target as the default display value (second return value)?

return nil

end

~~if target:sub(1, 1) == "/" then~~

-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret *

~~return ":" .. target~~

-- literally, however.

if not lang:hasType("conlang") then

~~elseif target:find("^Reconstruction:") then~~

if lang:hasType("appendix-constructed") then

~~return target~~

target = "wikt:Appendix:" .. lang:getFullName() .. "/" .. target

elseif reconstructed == 1 then -- asterisk found

-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.

if lang:getFullCode() == "und" then

elseif reconstructed == 1 then

-- Return the original target as default display value. If we don't do this, we wrongly get

if lang:~~getNonEtymologicalCode~~() == "und" then

-- [Term?] displayed instead.

return nil

return nil, orig_target

~~else~~

end

target = "Reconstruction:" .. lang:~~getNonEtymologicalName~~() .. "/" .. target

target = "wikt:Reconstruction:" .. lang:getFullName() .. "/" .. target

elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then

--error("The specified language " .. lang:getCanonicalName()

--.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")

orig_target = "*" .. target

end

~~-- Reconstructed languages and substrates require an initial *.~~

else

~~elseif lang:hasType("~~reconstructed~~") or lang:getFamilyCode()~~ == ~~"qfa-sub"~~ then

if reconstructed == 1 then

~~local check, m_utildata~~ = ~~target:match(~~"~~^:*([^:]~~*):"~~), mw~~.~~loadData("Module:utilities/data")~~

target = "*" .. target

~~check = check and lower(check)~~

~~if m_utildata~~.~~interwikis[check] or m_utildata.namespaces[check] then~~

~~return~~ target

~~else~~

~~error("The specified language " .. lang:getCanonicalName()~~

~~.. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed.")~~

end

~~elseif lang:hasType("appendix-constructed") then~~

~~target = "Appendix:" .. lang:getNonEtymologicalName() .. "/" .. target~~

end

target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target

return target, escaped > 0

return target, orig_target, escaped > 0

end

function export.get_link_page(target, lang, sc, plain)

local target, auto_display, escaped = export.get_link_page_with_auto_display(target, lang, sc, plain)

return target, escaped

end

-- Make a link from a given link's parts

local function ~~makeLink~~(link, lang, sc, id, ~~allow_self_link~~, ~~isolated~~, plain)

local function make_link(link, lang, sc, id, isolated, cats, no_alt_ast, plain)

-- Convert percent encoding to plaintext.

link.target = mw.~~uri.decode~~(link.target, "PATH")

link.target = link.target and decode_uri(link.target, "PATH")

link.fragment = link.fragment and ~~mw.uri.decode~~(link.fragment, "PATH")

link.fragment = link.fragment and decode_uri(link.fragment, "PATH")

-- Find fragments (~~when link didn~~'t ~~come from parseLink~~).

-- Find fragments (if one isn't already set).

-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]].

-- # can be escaped as \#.

if link.target ~~then~~

if link.target and link.fragment == nil then

~~link.target = escape(link.target, "#")~~

link.target, link.fragment = get_fragment(link.target)

if link.fragment == nil then

~~-- Replace numeric character references with the corresponding character ( → '),~~

~~-- as they contain #, which causes the numeric character reference to be~~

~~-- misparsed (wa'a → waa → pagename wa&, fragment 29;a).~~

~~link.target = get_entities(link.target)~~

~~local first, second = link.target:match("^([^#]+)#(.+)$")~~

~~if first then~~

link.target, link.fragment = ~~first, second~~

~~end~~

~~link.target = unescape~~(link.target~~, "#")~~

~~link.fragment = link.fragment and unescape(link.fragment, "#"~~)

end

-- If ~~there~~ is ~~no display form,~~ then ~~create~~ a ~~default one~~.

-- Process the target

if ~~not link.display~~ then

local auto_display, escaped

~~link.display~~ = ~~link~~.~~target~~

link.target, auto_display, escaped = export.get_link_page_with_auto_display(link.target, lang, sc, plain)

-- Create a default display form.

-- If the target is "" then it's a link like [[#English]], which refers to the current page.

if auto_display == "" then

auto_display = (m_headword_data or get_headword_data()).pagename

end

~~local display_is_target = link.display == link.target~~

~~-- Process the target~~

~~local escaped~~

~~link.target, escaped = export.getLinkPage(link.target, lang, sc, plain)~~

-- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash.

if ~~display_is_target and~~ escaped then

if escaped then

~~link.display~~ = ~~link.display~~:gsub("\\([^\\]*%*)", "%1", 1)

auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1)

end

-- Process the display form.

link.display = lang:makeDisplayText(link.display, sc, not ~~display_is_target~~)

if link.display then

local orig_display = link.display

link.display = lang:makeDisplayText(link.display, sc, true)

if cats then

auto_display = lang:makeDisplayText(auto_display, sc)

-- If the alt text is the same as what would have been automatically generated, then the alt parameter is redundant (e.g. {{l|en|foo|foo}}, {{l|en|w:foo|foo}}, but not {{l|en|w:foo|w:foo}}).

-- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}).

-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.

if link.display == auto_display then

else

local ok, check

if no_alt_ast then

ok, check = pcall(export.get_link_page, orig_display, lang, sc, plain)

else

ok = true

check = export.get_link_page(orig_display, lang, sc, plain)

end

else

link.display = lang:makeDisplayText(auto_display, sc)

end

if not link.target then

return link.display

end

-- If the target is the same as the current page ~~and~~ there is no sense id

-- If the target is the same as the current page, there is no sense id

-- and ~~linking to~~ the ~~same page hasn't been turned on,~~ then return a "self-link"

-- and either the language code is "und" or the current L2 is the current

-- like the software does.

-- language then return a "self-link" like the software does.

if (not (~~allow_self_link~~ or id)) ~~and link.target~~ == ~~mw.title.getCurrentTitle~~()~~.prefixedText~~ then

if link.target == get_current_title().prefixedText then

return tostring(mw.html.create("strong")

local fragment, current_L2 = link.fragment, get_current_L2()

:addClass("selflink")

if (

:wikitext(link.display))

fragment and fragment == current_L2 or

not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2)

) then

return tostring(mw.html.create("strong")

:addClass("selflink")

:wikitext(link.display))

end

-- Add fragment. Do not add a section link to "Undetermined", as such sections do not exist and are invalid. TabbedLanguages handles links without a section by linking to the "last visited" section, but adding "Undetermined" would break that feature. For localized prefixes that make syntax error, please use the format: ["xyz"] = true.

-- Add fragment. Do not add a section link to "Undetermined", as such sections do not exist and are invalid.

local prefix~~, lower_prefix~~ = link.target:match("^:*([^:]+):")

-- TabbedLanguages handles links without a section by linking to the "last visited" section, but adding

-- "Undetermined" would break that feature. For localized prefixes that make syntax error, please use the

~~local m_utildata~~

-- format: ["xyz"] = true.

if prefix ~~then~~

local prefix = link.target:match("^:*([^:]+):")

~~lower_prefix~~ = ~~lower~~(prefix)

prefix = prefix and ulower(prefix)

~~m_utildata = mw.loadData~~("Module:~~utilities~~/~~data~~")

~~end~~

if prefix ~= "category" and not (prefix and load_data("Module:data/interwikis")[prefix]) then

if (link.fragment or link.target:sub(-1) == "#") and not plain then

~~if not (m_utildata and m_utildata.interwikis~~[~~lower_prefix~~]) then

if cats then

insert(cats, lang:getFullName() .. " links with manual fragments")

if (~~not~~ link.fragment) and lang:~~getNonEtymologicalCode~~() ~= "~~und~~" then

end

if not link.fragment then

if id then

link.fragment = ~~require~~("~~Module:senseid~~")~~.anchor~~(lang, id)

link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or language_anchor(lang, id)

elseif not (link.target:~~find~~("^Appendix:") or link.target:~~find~~("^Reconstruction:") ~~or plain~~) then

elseif lang:getFullCode() ~= "und" and not (link.target:match("^Appendix:") or link.target:match("^Reconstruction:")) then

link.fragment = lang:~~getNonEtymologicalName~~()

link.fragment = anchor_encode(lang:getFullName())

end

~~elseif plain and id then~~

~~link.fragment = id~~

end

if isolated then

-- Put inward-facing square brackets around a link to isolated spacing character(s).

link.display = ~~mw.loadData(~~"~~Module:links/data~~").~~display_change[~~link.display~~] or link~~.~~display~~

if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then

link.display = "]" .. link.display .. "["

end

link.target = link.target:gsub("^(:?)(.*)", function(m1, m2)

return m1 .. ~~encode~~(m2, "#%%&+/:<=>@[\\%]_{|}")

return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")

end)

link.fragment = link.fragment and ~~encode~~(link.fragment, "#%%&+/:<=>@[\\%]_{|}")

link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")

return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"

return "[[" ..

link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"

end

-- Split a link into its parts

local function ~~parseLink~~(linktext)

local function parse_link(linktext)

local link = { target = linktext }

local ~~first~~, ~~second =~~ link.target:match("^(~~[^|]+~~)|(.+)$")

local target = link.target

-- Prevent characters whose HTML entities are unsupported titles from being incorrectly recognised as the entity if they are in a link being re-parsed (e.g. "&" becomes "&" when returned, but "&" is also an unsupported title. If "&" is given as a link which is then re-parsed, we don't want it to be perceived as "&".)

link.target, link.display = target:match("^(..-)|(.+)$")

if link.target~~:match("&[^;]+;")~~ then

if not link.target then

~~local unsupported_titles = mw.loadData("Module:links/data").unsupported_titles~~

link.target = target

~~if unsupported_titles[second] and unsupported_titles[second] ~= first then~~

link.display = target

link.target = ~~get_entities(link.~~target)

~~first, second =~~ link.target~~:match("^([^|]+)|(.+)$")~~

~~end~~

end

~~if first then~~

~~link.target = first~~

~~link.display = second~~

~~else~~

~~link.display = link.target~~

~~end~~

-- There's no point in processing these, as they aren't real links.

local target_lower = link.target:lower()

for _, ~~falsePositive~~ in ipairs({"category", "cat", "file", "image"}) do

for _, false_positive in ipairs({ "category", "cat", "file", "image" }) do

if target_lower:match("^" .. ~~falsePositive~~ .. ":") then return nil end

if target_lower:match("^" .. false_positive .. ":") then

return nil

end

~~first, second~~ = link.target~~:match("^(~~.~~+)#~~(.~~+)$"~~)

link.display = decode_entities(link.display)

link.target, link.fragment = get_fragment(link.target)

~~if first then~~

-- So that make_link does not look for a fragment again.

~~link.target = first~~

if not link.fragment then

~~link.fragment = second~~

~~else~~

-- So that ~~makeLink~~ does not look for a fragment again

link.fragment = false

end

return link

end

local function check_params_ignored_when_embedded(alt, lang, id, cats)

if alt then

if cats then

insert(cats, lang:getFullName() .. " links with ignored alt parameters")

end

if id then

if cats then

insert(cats, lang:getFullName() .. " links with ignored id parameters")

end

-- Find embedded links and ensure they link to the correct section.

local function process_embedded_links(text, ~~data~~, ~~allow_self_link~~, plain)

local function process_embedded_links(text, alt, lang, sc, id, cats, no_alt_ast, plain)

-- Process the non-linked text.

text = ~~data.~~lang:makeDisplayText(text, ~~data.~~sc~~[1]~~, true)

text = lang:makeDisplayText(text, sc, true)

-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set ~~allReconstructed~~ to true, while `|[[*foo]]` would not.

-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not.

local ~~allReconstructed~~ = false

local all_reconstructed = false

if not plain then

if ~~require~~(~~"Module:utilities"~~)~~.get_plaintext(text~~:~~gsub~~(~~"%[%[.-%]%]"~~, ~~"."~~)~~):match(~~"^*.") then

-- anchor_encode removes links etc.

~~allReconstructed~~ = true

if anchor_encode(text):sub(1, 1) == "*" then

all_reconstructed = true

end

-- Otherwise, handle any escapes.

text = text:gsub("^(\\-)\\%*", "%1*")

end

~~if data.alt then~~

check_params_ignored_when_embedded(alt, lang, id, cats)

~~mw.log~~(~~"(from Module:links)", "text with embedded wikilinks:", text,~~

~~"ignored alt:", data.~~alt, "lang~~:", data.lang:getNonEtymologicalCode())~~

local function process_link(space1, linktext, space2)

~~end~~

~~if data.id then~~

~~mw.log("(from Module:links)", "text with embedded wikilinks:", text,~~

~~"ignored id:"~~, ~~data.~~id, ~~"lang:", data.lang:getNonEtymologicalCode()~~)

~~end~~

local function ~~processLink~~(space1, linktext, space2)

local capture = "[[" .. linktext .. "]]"

local link = parse_link(linktext)

~~linktext = get_entities(linktext)~~

-- Return unprocessed false positives untouched (e.g. categories).

local link = ~~parseLink~~(linktext)

if not link then

return capture

--Return unprocessed false positives untouched (e.g. categories).

if not link then return capture ~~end~~

~~if allReconstructed and not link.target:find("^%*") then~~

~~link.target = "*" .. link.target~~

end

linktext = ~~makeLink~~(link, ~~data.~~lang, ~~data.~~sc, ~~data.~~id, ~~allow_self_link~~, ~~false~~, plain)

if all_reconstructed then

if link.target:find("^!!") then

-- Check for anti-asterisk !! at the beginning of a target, indicating that a reconstructed term

-- wants a part of the term to link to a non-reconstructed term, e.g. Old English

-- {{ang-noun|m|head=*[[!!Crist|Cristes]] [[!!mæsseǣfen]]}}.

link.target = link.target:sub(3)

-- Also remove !! from the display, which may have been copied from the target (as in mæsseǣfen in

-- the example above).

link.display = link.display:gsub("^!!", "")

elseif not link.target:match("^%*") then

link.target = "*" .. link.target

end

linktext = make_link(link, lang, sc, id, false, nil, no_alt_ast, plain)

:gsub("^%[%[", "\3")

:gsub("%]%]$", "\4")

return space1 .. linktext .. space2

end

-- Use chars 1 and 2 as temporary substitutions, so that we can use charsets. These are converted to chars 3 and 4 by ~~processLink~~, which means we can convert any remaining chars 1 and 2 back to square brackets (i.e. those not part of a link).

-- Use chars 1 and 2 as temporary substitutions, so that we can use charsets. These are converted to chars 3 and 4 by process_link, which means we can convert any remaining chars 1 and 2 back to square brackets (i.e. those not part of a link).

text = text

:gsub("%[%[", "\1")

:gsub("%]%]", "\2")

-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text.

if text:~~match~~("%^") and not ~~data.~~sc:hasCapitalization() and ~~data.~~sc:isTransliterated() then

if (

text:find("^", nil, true) and

not sc:hasCapitalization() and

sc:isTransliterated()

) then

text = escape(text, "^")

:gsub("%^\1", "\1%^")

text = unescape(text, "^")

end

text = text:gsub("\1(%s*)([^\1\2]-)(%s*)\2", ~~processLink~~)

text = text:gsub("\1(%s*)([^\1\2]-)(%s*)\2", process_link)

-- Remove the extra * at the beginning of a language link if it's immediately followed by a link whose display begins with * too.

if ~~allReconstructed~~ then

if all_reconstructed then

text = text:gsub("^%*\3([^|\1-\4]+)|%*", "\3%1|*")

end

return (text

:gsub("[\1\3]", "[[")

:gsub("[\2\4]", "]]"))

:gsub("[\2\4]", "]]")

)

end

local function ~~handle_redundant_wikilink~~(~~text~~, alt)

local function simple_link(term, fragment, alt, lang, sc, id, cats, no_alt_ast, srwc)

local ~~temp = text:match("^%[%[(.-)%]%]$")~~

local plain

if ~~not temp~~ then

if lang == nil then

~~return text~~, ~~alt~~

lang, plain = get_lang("und"), true

end

~~local temp_lower~~ = ~~temp:lower~~()

~~for _~~, ~~falsePositive in ipairs~~({"~~category~~", "~~cat~~", ~~"file"~~, ~~"image"}~~) do

-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page.

if ~~temp_lower~~:~~match~~(~~"^" .. falsePositive~~ .. ":") ~~then~~

if term == "" then

~~return text, alt~~

term = get_current_title().prefixedText

elseif term then

local new_term, new_alt = export.get_wikilink_parts(term, true)

if new_term then

check_params_ignored_when_embedded(alt, lang, id, cats)

-- [[|foo]] links are treated as plaintext "[[|foo]]".

-- FIXME: Pipes should be handled via a proper escape sequence, as they can occur in unsupported titles.

if new_term == "" then

term, alt = nil, term

else

local title = new_title(new_term)

if title then

local ns = title.namespace

-- File: and Category: links should be returned as-is.

if ns == 6 or ns == 14 then

return term

end

term, alt = new_term, new_alt

if cats then

if not (srwc and srwc(term, alt)) then

insert(cats, lang:getFullName() .. " links with redundant wikilinks")

end

~~-- Note: it's possible for "[[" or "]]" to be uninvolved in links, so we need to check for both individually (e.g. "[[aaa]] bb]]" would not have a redundant wikilink).~~

if alt then

if ~~temp and not (temp:find("%[%[") or temp:find("%]%]"))~~ then

alt = selective_trim(alt)

~~text,~~ alt = ~~temp:match~~(~~"^([^|]+)|?(.-)$"~~)

if alt == "" then

alt = nil

end

return ~~text~~, alt

-- If there's nothing to process, return nil.

if not (term or alt) then

return nil

end

-- If there is no script, get one.

if not sc then

sc = lang:findBestScript(alt or term)

end

-- Embedded wikilinks need to be processed individually.

if term then

local open = find(term, "[[", nil, true)

if open and find(term, "]]", open + 2, true) then

return process_embedded_links(term, alt, lang, sc, id, cats, no_alt_ast, plain)

end

term = selective_trim(term)

end

-- If not, make a link using the parameters.

return make_link({

target = term,

display = alt,

fragment = fragment

}, lang, sc, id, true, cats, no_alt_ast, plain)

end

Line 372:

Line 730:

: Sense id string. If this argument is defined, the link will point to a language-specific sense id ({{ll|en|identifier|id=HTML}}) created by the template {{temp|senseid}}. A sense id consists of the language's canonical name, a hyphen (<code>-</code>), and the string that was supplied as the <code class="n">id</code> argument. This is useful when a term has more than one sense in a language. If the <code class="n">term</code> argument contains wikilinks, this argument is ignored. (Links in which the sense id is ignored are tracked with the tracking template {{whatlinkshere|tracking=links/id-ignored}}.)

The second argument is as follows:

; <code class="n">~~allowSelfLink~~</code>

; <code class="n">allow_self_link</code>

: If {{code|lua|true}}, the function will also generate links to the current page. The default ({{code|lua|false}}) will not generate a link but generate a bolded "self link" instead.

The following special options are processed for each link (both simple text and with embedded wikilinks):

Line 380:

Line 738:

** {{temp|l|en|*nix}} links to the nonexistent page [[Reconstruction:English/nix]] (<code class="n">*</code> is interpreted as a reconstruction), but {{temp|l|en|:*nix}} links to [[*nix]].

** {{temp|l|sl|Franche-Comté}} links to the nonexistent page [[Franche-Comte]] (<code>é</code> is converted to <code>e</code> by <code class="n">makeEntryName</code>), but {{temp|l|sl|:Franche-Comté}} links to [[Franche-Comté]].]==]

function export.language_link(data~~, allow_self_link~~)

function export.language_link(data)

if type(data) ~= "table" then

error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")

error(

~~-- Nothing to process, return nil.~~

"The first argument to the function language_link must be a table. See Module:links/documentation for more information.")

~~elseif not (data.term or data.alt) then~~

~~return nil~~

end

~~local text = data~~.~~term~~

-- Categorize links to "und".

local lang, cats = data.lang, data.cats

~~data.sc~~ = ~~data.sc or~~ data.lang~~:findBestScript(text)~~

if cats and lang:getCode() == "und" then

insert(cats, "Undetermined language links")

~~ignore_cap = ignore_cap or mw.loadData("Module:links/~~data").~~ignore_cap~~

if ~~(ignore_cap[data.~~lang:getCode()~~] or ignore_cap[data.lang:getNonEtymologicalCode()]) and text~~ then

~~text = text:gsub~~(~~"%^"~~, "")

end

~~-- Do we have a redundant wikilink? If so, remove it.~~

return simple_link(

~~if text then~~

data.term,

~~text,~~ data.~~alt = handle_redundant_wikilink(text~~, ~~data.alt)~~

data.fragment,

~~end~~

data.alt,

lang,

~~-- Do we have embedded wikilinks?~~

data.sc,

~~if text and text:find("%[%[.-%]%]") then~~

data.id,

~~text = process_embedded_links(text,~~ data, ~~allow_self_link)~~

cats,

~~-- If not, make a link using the parameters.~~

data.no_alt_ast,

~~else~~

data.suppress_redundant_wikilink_cat

~~text = text and cond_trim(text)~~

)

data.alt ~~= data.alt and cond_trim(data.alt)~~

~~text = makeLink({ target = text, display = data.alt }, data.~~lang, data.sc, data.id, ~~allow_self_link~~, ~~true)~~

~~end~~

~~return text~~

end

function export.plain_link(data~~, allow_self_link~~)

function export.plain_link(data)

if type(data) ~= "table" then

error("The first argument to the function ~~language_link~~ must be a table. See Module:links/documentation for more information.")

error(

~~-- Nothing to process, return nil.~~

"The first argument to the function plain_link must be a table. See Module:links/documentation for more information.")

~~elseif not (data.term or data.alt) then~~

~~return nil~~

~~-- Only have alt, just return it.~~

~~elseif not data.term then~~

~~return data.alt~~

end

~~local text = data.term~~

return simple_link(

if (~~not data.lang) or data.lang:getNonEtymologicalCode() ~= "und" then~~

data.term,

data.~~lang = require("Module:languages").getByCode("und")~~

data.fragment,

~~end~~

data.alt,

data.~~sc = data.sc or require("Module:scripts").findBestScriptWithoutLang(text)~~

nil,

data.sc,

~~-- Do we have a redundant wikilink? If so~~, ~~remove it.~~

data.id,

~~if text then~~

data.cats,

~~text,~~ data.alt ~~= handle_redundant_wikilink(text~~, ~~data.alt)~~

data.no_alt_ast,

~~end~~

data.suppress_redundant_wikilink_cat

)

~~-- Do we have embedded wikilinks?~~

~~if text:find("%[%[.-%]%]") then~~

~~text = process_embedded_links(text, data, allow_self_link~~, ~~true)~~

~~-- If not, make a link using the parameters.~~

~~else~~

~~text = cond_trim(text)~~

data.~~alt = data.alt and cond_trim(data.alt)~~

~~text = makeLink({ target = text, display =~~ data.~~alt }~~, data.~~lang~~, data.sc, data.~~id, allow_self_link, true, true)~~

~~end~~

~~return text~~

end

--[==[Replace any links with links to the correct section, but don't link the whole text if no embedded links are found. Returns the display text form.]==]

function export.embedded_language_links(data~~, allow_self_link~~)

function export.embedded_language_links(data)

if type(data) ~= "table" then

error("The first argument to the function ~~language_link~~ must be a table. See Module:links/documentation for more information.")

error(

"The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.")

end

local term, lang, sc = data.term, data.lang, data.sc

-- If we don't have a script, get one.

if not sc then

sc = lang:findBestScript(term)

end

~~local text = data.term~~

-- Do we have embedded wikilinks? If so, they need to be processed individually.

~~data.sc = data.sc or data.lang:findBestScript(text)~~

local open = find(term, "[[", nil, true)

if open and find(term, "]]", open + 2, true) then

-- Do we have embedded wikilinks?

return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast)

~~if text:~~find("%[%[~~.-%~~]%]") then

~~text =~~ process_embedded_links(~~text~~, data, ~~allow_self_link)~~

~~else~~

~~-- If there are no embedded wikilinks~~, ~~return the display text~~.

~~text = cond_trim(text)~~

~~text = (~~data.~~lang:makeDisplayText(text~~, data.~~sc, true)~~)

end

return ~~text~~

-- If not, return the display text.

term = selective_trim(term)

-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links.

term = term:gsub("%%", "%%25")

return lang:makeDisplayText(term, sc, true)

end

function export.mark(text, ~~itemType~~, face, lang)

function export.mark(text, item_type, face, lang)

local tag = { "", "" }

if ~~itemType~~ == "gloss" then

if item_type == "gloss" then

tag = { '“',

'”' }

elseif ~~itemType~~ == "tr" then

elseif item_type == "tr" then

if face == "term" then

tag = { '',

tag = { '',

'' }

else

tag = { '', '' }

tag = { '', '' }

end

elseif ~~itemType~~ == "ts" then

elseif item_type == "ts" then

-- \226\129\160 = word joiner (zero-width non-breaking space) U+2060

tag = { '/\226\129\160', '\226\129\160/' }

elseif ~~itemType~~ == "pos" then

elseif item_type == "pos" then

tag = { '', '' }

elseif ~~itemType~~ == "annotations" then

elseif item_type == "non-gloss" then

tag = { '', '' }

elseif item_type == "annotations" then

tag = { '(',

')' }

elseif item_type == "infl" then

tag = { '', '' }

end

Line 501:

Line 843:

end

--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.

local pos_tags

--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.

* The first argument is a table possessing some or all of the following keys:

*:; <code class="n">genders</code>

Line 510:

Line 854:

*:: Gloss that translates the term in the link, or gives some other descriptive information.

*:; <code class="n">pos</code>

*:: Part of speech of the linked term. If the given argument matches one of the ~~templates~~ in [[:~~Category:Part of speech tags~~]], ~~then call that to show~~ a part-of-~~speech tag~~. Otherwise, just show the given text as it is.

*:: Part of speech of the linked term. If the given argument matches one of the aliases in `pos_aliases` in [[Module:headword/data]], or consists of a part of speech or alias followed by `f` (for a non-lemma form), expand it appropriately. Otherwise, just show the given text as it is.

*:; <code class="n">ng</code>

*:: Arbitrary non-gloss descriptive text for the link. This should be used in preference to putting descriptive text in `gloss` or `pos`.

*:; <code class="n">lit</code>

*:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.

*:; <code class="n">infl</code>

*:: Table containing a list of grammar tags in the style of [[Module:form of]] `tagged_inflections`.

*:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned.

* The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==]

Line 529:

Line 877:

if data.genders and #data.genders > 0 then

local ~~m_gen~~ = ~~require~~(~~"Module:gender and number"~~)

local genders, gender_cats = format_genders(data.genders, data.lang)

insert(output, " " .. ~~m_gen~~.~~format_list~~(~~data.genders~~, ~~data.lang)~~)

insert(output, " " .. genders)

if gender_cats then

local cats = data.cats

if cats then

extend(cats, gender_cats)

end

Line 545:

Line 899:

if data.tr[1] and data.ts[1] then

insert(annotations,

insert(annotations, tag_translit(data.tr[1], data.lang, kind) .. " " .. export.mark(data.ts[1], "ts"))

~~require("Module:script utilities").~~tag_translit(data.tr[1], data.lang, kind)

.. " " .. export.mark(data.ts[1], "ts"))

elseif data.ts[1] then

insert(annotations, export.mark(data.ts[1], "ts"))

else

insert(annotations,

insert(annotations, tag_translit(data.tr[1], data.lang, kind))

~~require("Module:script utilities").~~tag_translit(data.tr[1], data.lang, kind))

end

Line 564:

Line 915:

if data.pos then

-- debug category for pos= containing transcriptions

if data.pos:~~find~~("/[^><]*/") then

if data.pos:match("/[^><]-/") then

data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]"

end

pos_tags = pos_tags or mw.~~loadData~~("~~Module~~:~~links/data~~").~~pos_tags~~

-- Canonicalize part of speech aliases as well as non-lemma aliases like 'nf' or 'nounf' for "noun form".

insert(annotations, export.mark(~~pos_tags[data.~~pos] or data.pos, "pos"))

pos_tags = pos_tags or (m_headword_data or get_headword_data()).pos_aliases

local pos = pos_tags[data.pos]

if not pos and data.pos:find("f$") then

local pos_form = data.pos:sub(1, -2)

-- We only expand something ending in 'f' if the result is a recognized non-lemma POS.

pos_form = (pos_tags[pos_form] or pos_form) .. " form"

if (m_headword_data or get_headword_data()).nonlemmas[pos_form .. "s"] then

pos = pos_form

end

insert(annotations, export.mark(pos or data.pos, "pos"))

end

-- Inflection data

if data.infl then

local m_form_of = require(form_of_module)

-- Split tag sets manually, since tagged_inflections creates a numbered list, and we do not want that.

local infl_outputs = {}

local tag_sets = m_form_of.split_tag_set(data.infl)

for _, tag_set in ipairs(tag_sets) do

table.insert(infl_outputs,

m_form_of.tagged_inflections({ tags = tag_set, lang = data.lang, nocat = true, nolink = true, nowrap = true }))

end

insert(annotations, export.mark(table.concat(infl_outputs, "; "), "infl"))

end

-- Non-gloss text

if data.ng then

insert(annotations, export.mark(data.ng, "non-gloss"))

end

Line 575:

Line 954:

if data.lit then

insert(annotations, "literally " .. export.mark(data.lit, "gloss"))

end

-- Provide a hook to insert additional annotations such as nested inflections.

if data.postprocess_annotations then

data.postprocess_annotations {

data = data,

annotations = annotations

}

end

Line 584:

Line 971:

end

--[==[Creates a full link, with annotations (see ~~<code class="n">~~[[#format_link_annotations|format_link_annotations]]~~</code>~~), in the style of {{~~temp~~|l}} or {{~~temp~~|m}}.

-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and >

The first argument, ~~<code class="n">~~data~~</code>~~, must be a table. It contains the various elements that can be supplied as parameters to {{~~temp~~|l}} or {{~~temp~~|m}}:

-- because they end up forming part of CSS class names inside of and will interfere with finding the end

-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as

-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to <

-- and > somehow works magically without any further work; they appear in the URL as < and >, and get passed to

-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If

-- someone knows, please modify this comment appropriately!

local accel_char_map

local function get_accel_char_map()

accel_char_map = {

["%"] = ".",

[" "] = "_",

["_"] = u(0xFFF0),

["<"] = "<",

[">"] = ">",

}

return accel_char_map

end

local function encode_accel_param_chars(param)

return (param:gsub("[% <>_]", accel_char_map or get_accel_char_map()))

end

local function encode_accel_param(prefix, param)

if not param then

return ""

end

if type(param) == "table" then

local filled_params = {}

-- There may be gaps in the sequence, especially for translit params.

local maxindex = 0

for k in pairs(param) do

if type(k) == "number" and k > maxindex then

maxindex = k

end

for i = 1, maxindex do

filled_params[i] = param[i] or ""

end

-- [[Module:accel]] splits these up again.

param = concat(filled_params, "*~!")

end

-- This is decoded again by [[WT:ACCEL]].

return prefix .. encode_accel_param_chars(param)

end

local function insert_if_not_blank(list, item)

if item == "" then

return

end

insert(list, item)

end

local function get_class(lang, tr, accel, nowrap)

if not accel and not nowrap then

return ""

end

local classes = {}

if accel then

insert(classes, "form-of lang-" .. lang:getFullCode())

local form = accel.form

if form then

insert(classes, encode_accel_param_chars(form) .. "-form-of")

end

insert_if_not_blank(classes, encode_accel_param("gender-", accel.gender))

insert_if_not_blank(classes, encode_accel_param("pos-", accel.pos))

insert_if_not_blank(classes, encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil)))

insert_if_not_blank(classes, encode_accel_param("target-", accel.target))

insert_if_not_blank(classes, encode_accel_param("origin-", accel.lemma))

insert_if_not_blank(classes, encode_accel_param("origin_transliteration-", accel.lemma_translit))

if accel.no_store then

insert(classes, "form-of-nostore")

end

if nowrap then

insert(classes, nowrap)

end

return concat(classes, " ")

end

-- Add any left or right regular or accent qualifiers, labels or references to a formatted term. `data` is the object

-- specifying the term, which should optionally contain:

-- * a language object in `lang`; required if any accent qualifiers or labels are given;

-- * left regular qualifiers in `q` (an array of strings or a single string); an empty array or blank string will be

-- ignored;

-- * right regular qualifiers in `qq` (an array of strings or a single string); an empty array or blank string will be

-- ignored;

-- * left accent qualifiers in `a` (an array of strings); an empty array will be ignored;

-- * right accent qualifiers in `aa` (an array of strings); an empty array will be ignored;

-- * left labels in `l` (an array of strings); an empty array will be ignored;

-- * right labels in `ll` (an array of strings); an empty array will be ignored;

-- * references in `refs`, an array either of strings (formatted reference text) or objects containing fields `text`

-- (formatted reference text) and optionally `name` and/or `group`.

-- `formatted` is the formatted version of the term itself.

local function add_qualifiers_and_refs_to_term(data, formatted)

local q = data.q

if type(q) == "string" then

q = { q }

end

local qq = data.qq

if type(qq) == "string" then

qq = { qq }

end

if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or

data.ll and data.ll[1] or data.refs and data.refs[1] then

formatted = format_qualifiers {

lang = data.lang,

text = formatted,

q = q,

qq = qq,

a = data.a,

aa = data.aa,

l = data.l,

ll = data.ll,

refs = data.refs,

}

end

return formatted

end

--[==[

Creates a full link, with annotations (see `[[#format_link_annotations|format_link_annotations]]`), in the style of {{tl|l}} or {{tl|m}}.

The first argument, `data`, must be a table. It contains the various elements that can be supplied as parameters to {{tl|l}} or {{tl|m}}:

{ {

term = entry_to_link_to,

Line 591:

Line 1,101:

lang = language_object,

sc = script_object,

track_sc = boolean,

no_nonstandard_sc_cat = boolean,

fragment = link_fragment,

id = sense_id,

genders = { "gender1", "gender2", ... },

tr = transliteration,

respect_link_tr = boolean,

ts = transcription,

gloss = gloss,

pos = part_of_speech_tag,

ng = non-gloss text,

lit = literal_translation,

infl = { "form_of_grammar_tag1", "form_of_grammar_tag2", ... },

no_alt_ast = boolean,

accel = {accelerated_creation_tags},

interwiki = interwiki,

pretext = "text_at_beginning" or nil,

posttext = "text_at_end" or nil,

q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",

qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",

l = { "left_label1", "left_label2", ...},

ll = { "right_label1", "right_label2", ...},

a = { "left_accent_qualifier1", "left_accent_qualifier2", ...},

aa = { "right_accent_qualifier1", "right_accent_qualifier2", ...},

refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },

show_qualifiers = boolean,

} }

Any one of the items in the ~~<code class="n">~~data~~</code>~~ table may be {~~{code|lua|~~nil}}, but an error will be shown if neither ~~<code class="n">~~term~~</code>~~ nor ~~<code class="n">~~alt~~</code>~~ nor ~~<code class="n">~~tr~~</code>~~ is present.

Any one of the items in the `data` table may be {nil}, but an error will be shown if neither `term` nor `alt` nor `tr`

Thus, calling {~~{code|lua|2=~~full_link{ term = term, lang = lang, sc = sc } }}, where ~~<code class="n">~~term</~~code> is an entry name, <code class="n">~~lang~~</code>~~ is a [[Module:languages#Language objects|language object]] from [[Module:languages~~]], and <code class="n">sc</code> is a [[Module:scripts#Script objects|script object]] from [[Module:scripts~~]], will give a plain link similar to the one produced by the template {{~~temp~~|l}}, and calling {~~{code|lua|2=~~full_link( { term = term, lang = lang, sc = sc }, "term" )}} will give a link similar to the one produced by the template {{~~temp~~|m}}.

is present. Thus, calling {full_link{ term = term, lang = lang, sc = sc }}, where `term` is the page to link to (which

may have diacritics that will be stripped and/or embedded bracketed links) and `lang` is a

[[Module:languages#Language objects|language object]] from [[Module:languages]], will give a plain link similar to the

one produced by the template {{tl|l}}, and calling {full_link( { term = term, lang = lang, sc = sc }, "term" )} will

give a link similar to the one produced by the template {{tl|m}}.

The function will:

* Try to determine the script, based on the characters found in the term or alt argument, if the script was not given.

* Try to determine the script, based on the characters found in the `term` or `alt` argument, if the script was not

* Call ~~<code class="n">~~[[#language_link|language_link]]~~</code>~~ on the term or alt forms, to remove diacritics in the page name, process any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.

given. If a script is given and `track_sc` is {true}, it will check whether the input script is the same as the one

* Call ~~<code class="n">~~[[Module:script utilities#tag_text]]~~</code>~~ to add the appropriate language and script tags to the term, and to italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are included.

which would have been automatically generated and add the category [[:Category:LANG terms with redundant script codes]]

* Generate a transliteration, based on the alt or term arguments, if the script is not Latin ~~and~~ no transliteration was provided.

if yes, or [[:Category:LANG terms with non-redundant manual script codes]] if no. This should be used when the input

* Add the annotations (transliteration, gender, gloss etc.) after the link.]==]

script object is directly determined by a template's `sc` parameter.

function export.full_link(data, face, allow_self_link, ~~no_check_redundant_translit)~~

* Call `[[#language_link|language_link]]` on the `term` or `alt` forms, to remove diacritics in the page name, process

~~-- Prevent data from being destructively modified.~~

any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.

~~local data = require("Module:table").shallowcopy(data~~)

* Call `[[Module:script utilities#tag_text]]` to add the appropriate language and script tags to the term and

italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are

included.

* Generate a transliteration, based on the `alt` or `term` arguments, if the script is not Latin, no transliteration was

provided in `tr` and the combination of the term's language and script support automatic transliteration. The

transliteration itself will be linked if both `.respect_link_tr` is specified and the language of the term has the

`link_tr` property set for the script of the term; but not otherwise.

* Add the annotations (transliteration, gender, gloss, etc.) after the link.

* If `no_alt_ast` is specified, then the `alt` text does not need to contain an asterisk if the language is

reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't

display asterisks (e.g. number boxes).

* If `pretext` or `posttext` is specified, this is text to (respectively) prepend or append to the output, directly

before processing qualifiers, labels and references. This can be used to add arbitrary extra text inside of the

qualifiers, labels and references.

* If `show_qualifiers` is specified or the `show_qualifiers` argument is given, then left and right qualifiers, accent

qualifiers, labels and references will be displayed, otherwise they will be ignored. (This is because a fair amount of

code stores qualifiers, labels and/or references in these fields and displays them itself, rather than expecting

{full_link()} to display them.)]==]

function export.full_link(data, face, allow_self_link, show_qualifiers)

if type(data) ~= "table" then

error("The first argument to the function full_link must be a table. "

.. "See Module:links/documentation for more information.")

end

local terms = {true}

-- Prevent data from being destructively modified.

local data = shallow_copy(data)

-- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned.

data.cats = {}

-- Categorize links to "und".

local lang, cats = data.lang, data.cats

if cats and lang:getCode() == "und" then

insert(cats, "Undetermined language links")

end

local terms = { true }

-- Generate multiple forms if applicable.

for _, param in ipairs{"term", "alt"} do

for _, param in ipairs { "term", "alt" } do

if type(data[param]) == "string" and data[param]:find("//") then

if type(data[param]) == "string" and data[param]:find("//", nil, true) then

data[param] = ~~escape~~(data[param]~~, "//"~~)

data[param] = export.split_on_slashes(data[param])

data[param] = ~~split~~(data~~[param],~~ "//") ~~or {}~~

elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then

~~for i, subparam in ipairs(~~data~~[param]) do~~

if not data.no_generate_forms then

data[param][i] ~~= unescape(subparam, "//"~~)

data[param] = lang:generateForms(data[param])

~~if subparam == "" then~~

else

data[param][i] ~~= nil~~

data[param] = { data[param] }

~~end~~

end

~~elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//")) then~~

~~data[param] = data.lang:generateForms(data[param])~~

else

data[param] = {}

end

for _, param in ipairs{"sc", "tr", "ts"} do

for _, param in ipairs { "sc", "tr", "ts" } do

data[param] = {data[param]}

data[param] = { data[param] }

end

for _, param in ipairs{"term", "alt", "sc", "tr", "ts"} do

for _, param in ipairs { "term", "alt", "sc", "tr", "ts" } do

for i in pairs(data[param]) do

terms[i] = true

end

-- Create the link

local output = {}

local ~~categories~~ = {}

local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat,

~~local link = ""~~

data.accel, data.never_call_transliteration_module

~~local annotations~~

local link_tr = data.respect_link_tr and lang:link_tr(data.sc[1])

~~local phonetic_extraction = mw~~.~~loadData("Module:links/~~data").~~phonetic_extraction~~

~~phonetic_extraction~~ = ~~phonetic_extraction[~~data.lang:~~getCode~~(~~)] or phonetic_extraction[~~data.~~lang:getNonEtymologicalCode(~~)]

for i in ipairs(terms) do

local link

-- Is there any text to show?

if (data.term[i] or data.alt[i]) then

-- Try to detect the script if it was not provided

local ~~best~~ = ~~data.lang:findBestScript(~~data.alt[i] or data.term[i])

local display_term = data.alt[i] or data.term[i]

local best = lang:findBestScript(display_term)

-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]]

if (

not data.no_nonstandard_sc_cat and

best:getCode() == "None" and

find_best_script_without_lang(display_term):getCode() ~= "None"

) then

insert(cats, lang:getFullName() .. " terms in nonstandard scripts")

end

if not data.sc[i] then

data.sc[i] = best

-- Track uses of sc parameter.

elseif data.track_sc then

if data.sc[i]:getCode() == best:getCode() then

insert(cats, lang:getFullName() .. " terms with redundant script codes")

else

insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")

end

-- If using a discouraged character sequence, add to maintenance category

if data.sc[i]:hasNormalizationFixes() == true then

if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then

insert(~~categories~~, "Pages using discouraged character sequences")

insert(cats, "Pages using discouraged character sequences")

end

~~local class~~ = ""

link = simple_link(

data.term[i],

~~-- Encode certain characters to avoid various delimiter-related issues at various stages~~. ~~We need to encode < and >~~

data.fragment,

~~-- because they end up forming part of CSS class names inside of <span~~ ..~~.> and will interfere with finding the~~ end

data.alt[i],

-- ~~of the HTML tag. I first tried converting them to URL encoding~~, i.~~e. %3C and %3E; they~~ then ~~appear in the URL as~~

lang,

-- ~~%253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them~~ to ~~<~~

data.sc[i],

~~-- and > somehow works magically without any further work; they appear~~ in ~~the URL as < and >, and get passed~~ to

id,

~~-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing~~ the ~~encoding and decoding. If~~

cats,

~~-- someone knows, please modify this comment appropriately!~~

no_alt_ast,

local ~~encode_accel_char_map~~ = {

srwc

[~~"%"~~] ~~= "~~.",

)

[~~" "~~] ~~= "_",~~

end

["<"~~] =~~ "~~<~~",

-- simple_link can return nil, so check if a link has been generated.

[~~">"~~] ~~= ">",~~

if link then

}

-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen

~~local function encode_accel_param_chars(param)~~

local nowrap

~~local retval~~ = ~~param:gsub(~~"~~[% <>]~~"~~, encode_accel_char_map) -- discard second return value~~

local display_term = data.alt[i] or data.term[i]

~~return retval~~

if display_term and (display_term:find("^%-") or display_term:find("^־")) then -- Hebrew maqqef -- FIXME, use hyphens from [[Module:affix]]

nowrap = "nowrap"

end

~~local function encode_accel_param(prefix, param)~~

link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel, nowrap))

~~if not param then~~

~~return ""~~

~~end~~

~~if type(param)~~ =~~= "table" then~~

~~local filled_params = {}~~

~~-- There may be gaps in the sequence, especially for translit params.~~

~~local maxindex = 0~~

~~for k, v in pairs(param) do~~

~~if type(k) == "number" and k > maxindex then~~

~~maxindex = k~~

~~end~~

~~for i=1,maxindex do~~

~~filled_params[i] = param[i] or ""~~

~~end~~

~~-- [[Module:accel]] splits these up again.~~

~~param = table.concat(filled_params, "*~!")~~

~~end~~

~~-- This is decoded again by [[WT:ACCEL]].~~

~~return prefix .. encode_accel_param_chars(param)~~

~~end~~

~~if data.accel then~~

~~local form = data.accel.form and encode_accel_param_chars(data.accel.form) .. "-form-of" or ""~~

~~local gender = encode_accel_param("gender-", data.accel.gender)~~

~~local pos = encode_accel_param("pos-", data.accel.pos)~~

~~local translit = encode_accel_param("transliteration-",~~

~~data.accel.translit or (data.tr[i] ~= "-" and data.tr[i] or nil))~~

~~local target = encode_accel_param("target-", data.accel.target)~~

~~local lemma = encode_accel_param("origin-", data.accel.lemma)~~

~~local lemma_translit = encode_accel_param("origin_transliteration-", data.accel.lemma_translit)~~

~~local no_store = data.accel.no_store and "form-of-nostore" or ""~~

~~local accel =~~

~~form .. " " ..~~

~~gender .. " " ..~~

~~pos .. " " ..~~

~~translit .. " " ..~~

~~target .. " " ..~~

~~lemma .. " " ..~~

~~lemma_translit .. " " ..~~

~~no_store .. " "~~

~~class = "form-of lang-" .. data.lang:getNonEtymologicalCode~~(~~) .. " " .. accel~~

~~end~~

~~-- Only make a~~ link ~~if the term has been given, otherwise just show the alt text without a link~~

~~local term_data = {term = data.term[i], alt = data.alt[i]~~, ~~lang = data.~~lang, ~~sc =~~ data.sc[i], ~~id = data.id~~, ~~genders = data.genders~~, ~~tr =~~ data.tr[i], ~~ts = data.ts[i], gloss = data.gloss, pos = data.pos, lit = data.lit, accel = data.~~accel, ~~interwiki = data.interwiki}~~

~~link = require("Module:script utilities").tag_text(~~

~~data.term[i] and export.language_link(term_data, allow_self_link~~)

~~or data.alt[i], data.lang, data.sc[i], face, class~~)

else

--[[ No term to show.

Is there at least a transliteration we can work from? ]]

link = ~~require("Module:script utilities").~~request_script(~~data.~~lang, data.sc[i])

link = request_script(lang, data.sc[i])

-- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms).

if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and ~~data.~~lang:getFamilyCode() ~= "qfa-sub" then

if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and lang:getFamilyCode() ~= "qfa-sub" then

-- ~~No link to show~~, ~~and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms)~~.

-- If there are multiple terms, break the loop instead.

if ~~mw.title.getCurrentTitle~~()~~.nsText~~ ~= "Template" then

if i > 1 then

insert(~~categories~~, ~~data.~~lang:~~getNonEtymologicalName~~() .. " term requests")

remove(output)

break

elseif NAMESPACE ~= "Template" then

insert(cats, lang:getFullName() .. " term requests")

end

link = "[Term?]"

Line 759:

Line 1,284:

end

insert(output, link)

if i < #terms then insert(output, "／") end

if i < #terms then insert(output, " / ") end

end

-- ~~TODO: Currently only handles the first transliteration~~, ~~pending consensus on how to handle multiple translits for multiple forms, as this is~~ not ~~always desirable (e.g. traditional/simplified Chinese).~~

-- When suppress_tr is true, do not show or generate any transliteration

if data.~~tr[1] == "" or data.tr[1] == "-"~~ then

if data.suppress_tr then

data.tr[1] = nil

else

-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).

if data.tr[1] == "" or data.tr[1] == "-" then

data.tr[1] = nil

else

local phonetic_extraction = load_data("Module:links/data").phonetic_extraction

phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()]

if phonetic_extraction then

data.tr[1] = data.tr[1] or

require(phonetic_extraction).getTranslit(export.remove_links(data.alt[1] or data.term[1]))

elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then

-- Track whenever there is manual translit. The categories below like 'terms with redundant transliterations'

-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in

-- languages without any automatic translit, like Persian and Hebrew.

if data.tr[1] then

local full_code = lang:getFullCode()

end

if not nevercalltr then

-- Try to generate a transliteration.

local text = data.alt[1] or data.term[1]

if not link_tr then

text = export.remove_links(text, true)

end

~~elseif phonetic_extraction then~~

local automated_tr = lang:transliterate(text, data.sc[1])

local ~~m_phonetic~~ = ~~require~~(~~phonetic_extraction)~~

data.tr[1] ~~= data.tr[1] or m_phonetic.getTranslit(export.remove_links(data.alt[1] or data.term[1])~~)

~~elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then~~

if automated_tr then

~~-- Try to generate a transliteration, unless transliteration has been supplied and no_check_redundant_translit is~~

local manual_tr = data.tr[1]

~~-- given. (Checking for redundant transliteration can use up significant amounts of memory so we don't want to do~~

~~-- it if memory is tight. `no_check_redundant_translit` is currently set when called ultimately from~~

~~-- {{multitrans|...|no-check-redundant-translit=1}}.)~~

~~if not (data.tr[1] and no_check_redundant_translit) then~~

~~local text = data.alt[1] or data.term[1]~~

~~if not data.lang:link_tr() then~~

~~text = export.remove_links(text, true)~~

~~end~~

~~local automated_tr, tr_categories~~

~~automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1])~~

if automated_tr ~~or data.tr_fail~~ then

local manual_tr = data.tr[1]

if manual_tr then

if (export.remove_links(manual_tr) == export.remove_links(automated_tr~~)) and (not data.tr_fail~~) then

if export.remove_links(manual_tr) == export.remove_links(automated_tr) then

insert(~~categories~~, ~~"Terms with redundant transliterations"~~)

insert(cats, lang:getFullName() .. " terms with redundant transliterations")

~~insert(categories,~~ "~~Terms~~ with redundant transliterations/" ~~.. data.lang:getNonEtymologicalCode()~~)

else

~~elseif not data.tr_fail then~~

-- Prevents Arabic root categories from flooding the tracking categories.

if NAMESPACE ~= "Category" then

if ~~mw.title.getCurrentTitle().nsText~~ ~= "Category" then

insert(cats,

insert(~~categories~~, "~~Terms~~ with manual transliterations ~~different from the automated ones~~")

lang:getFullName() .. " terms with non-redundant manual transliterations")

~~insert(categories, "Terms with manual transliterations different from the automated ones/" .. data.lang:getNonEtymologicalCode())~~

end

~~end~~

if (not manual_tr) or ~~data.~~lang:overrideManualTranslit() then

if not manual_tr or lang:overrideManualTranslit(data.sc[1]) then

data.tr[1] = automated_tr

~~for _, category in ipairs(tr_categories) do~~

end

~~insert(categories, category)~~

end

Line 809:

Line 1,341:

end

-- Link to the transliteration entry for languages that require this

if data.tr[1] and ~~data.lang:~~link_tr() and not (data.tr[1]:match("%[%[(.-)%]%]"~~) or data.tr_fail~~) then

if data.tr[1] and link_tr and not data.tr[1]:match("%[%[(.-)%]%]") then

data.tr[1] = ~~export.language_link{lang = data.lang, term =~~ data.tr[1]}

data.tr[1] = simple_link(

elseif data.tr[1] and not ~~(data.lang:~~link_tr~~() or data.tr_fail)~~ then

data.tr[1],

nil,

lang,

get_script("Latn"),

nil,

cats,

no_alt_ast,

srwc

)

elseif data.tr[1] and not link_tr then

-- Remove the pseudo-HTML tags added by remove_links.

data.tr[1] = data.tr[1]:gsub("</?link>", "")

end

if data.tr[1] and ~~gsub~~(data.tr[1], "[%s%p]~~", "~~")~~:len() == 0~~ then data.tr[1] = nil end

if data.tr[1] and not umatch(data.tr[1], "[^%s%p]") then data.tr[1] = nil end

insert(output, export.format_link_annotations(data, face))

categories = ~~#categories > 0~~ and ~~require("Module:utilities").~~format_categories(~~categories~~, ~~data.~~lang, "-", nil, nil, data.sc) or ""

if data.pretext then

insert(output, 1, data.pretext)

~~return concat~~(output) .. categories

end

if data.posttext then

insert(output, data.posttext)

end

local categories = cats[1] and format_categories(cats, lang, "-", nil, nil, data.sc) or ""

output = concat(output)

if show_qualifiers or data.show_qualifiers then

output = add_qualifiers_and_refs_to_term(data, output)

end

return output .. categories

end

Line 840:

Line 1,393:

return ""

end

text = text

:gsub("%[%[", "\1")

Line 849:

Line 1,402:

function(c1, c2, c3)

-- Don't remove files.

for _, ~~falsePositive~~ in ipairs({"file", "image"}) do

for _, false_positive in ipairs({ "file", "image" }) do

if c2:lower():match("^" .. ~~falsePositive~~ .. ":") then return c1 .. c2 .. c3 end

if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end

end

-- Remove categories completely.

for _, ~~falsePositive~~ in ipairs({"category", "cat"}) do

for _, false_positive in ipairs({ "category", "cat" }) do

if c2:lower():match("^" .. ~~falsePositive~~ .. ":") then return "" end

if c2:lower():match("^" .. false_positive .. ":") then return "" end

end

-- In piped links, remove all text before the pipe, unless it's the final character (i.e. the pipe trick), in which case just remove the pipe.

Line 864:

Line 1,417:

end

end)

text = text

:gsub("\1", "[[")

Line 870:

Line 1,423:

return text

~~end~~

~~--[=[~~

~~This decodes old section encodings.~~

~~For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål.~~

~~It isn't picky about whether the section encodings represent the UTF-8 encoding~~

~~of a real Unicode character, so it will mangle section names that contain~~

~~a period followed by two uppercase hex characters. At least such section names~~

~~are probably pretty rare.~~

~~Wiktionary adds an additional id="" attribute for sections~~

~~using a legacy encoding, if it is different from the modern minimally modified attribute.~~

~~It is like percent encoding (URI or URL encoding) except with "." instead of "%".~~

~~See [[mw:Manual:$wgFragmentMode]] and the code that does the encoding at~~

~~https://gerrit.wikimedia.org/r/plugins/gitiles/mediawiki/core/+/7bf779524ab1fd8e1d74f79ea4840564d48eea4d/includes/parser/Sanitizer.php#893~~

~~]=]~~

~~-- The character class %x should not be used, as it includes the characters a-f,~~

~~-- which do not occur in these anchor encodings.~~

~~local capitalHex = "[0-9A-F]"~~

~~local function decodeAnchor(anchor)~~

~~return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")",~~

~~function(hexByte)~~

~~return string.char(tonumber(hexByte, 16))~~

~~end))~~

end

Line 903:

Line 1,430:

end

~~link~~ = link:gsub("_", " ")

local target, section = get_fragment((link:gsub("_", " ")))

~~local numberSigns = select(2, link:gsub("#", ""~~))

if ~~numberSigns > 1~~ then

if not section then

error("~~The~~ section ~~link should only contain one number sign (#).~~")

error("No \"#\" delineating a section name")

end

~~link = mw.uri.decode~~(~~link, "WIKI")~~

return simple_link(

~~local page, section = link:match("^([^#]*)#(.+)$")~~

target,

~~if page == "" then~~

section,

~~page = nil~~

target .. " § " .. section

~~end~~

)

~~if section then~~

~~section = decodeAnchor(section)~~

~~-- URI-encode (percent-encode) section to allow square brackets and~~

~~-- other dodgy characters in section name.~~

~~-- If not percent-encoded~~, ~~they prevent the parser from creating a link.~~

~~-- Decode percent-encoding in the displayed text~~

~~if page then~~

~~return "[[" .. page .. "#" .. mw.uri.encode(~~section, ~~"WIKI")~~

~~.. "|" .. page .. " § " .. section .. "]]"~~

~~else~~

~~return "[[#" .. mw.uri.encode(section, "WIKI")~~

.. "|§ " .. section ~~.. "]]"~~

~~end~~

~~else~~

~~error("The function “section_link” could not find a number sign marking a section name."~~)

@@ Line 5: / Line 5: @@
 	extraction modules and part-of-speech names are listed
 	at [[Module:links/data]].
 	Other modules used:
 		[[Module:script utilities]]
@@ Line 13: / Line 13: @@
 ]=]
--- These are prefixed with u to avoid confusion with the default string methods
+local anchors_module = "Module:anchors"
--- of the same name.
+local form_of_module = "Module:form of"
+local gender_and_number_module = "Module:getn"
+local languages_module = "Module:languages"
+local load_module = "Module:load"
+local memoize_module = "Module:memoize"
+local pages_module = "Module:pages"
+local pron_qualifier_module = "Module:pron qualifier"
+local scripts_module = "Module:scripts"
+local script_utilities_module = "Module:script utilities"
+local string_encode_entities_module = "Module:string/encode entities"
+local string_utilities_module = "Module:string utilities"
+local table_module = "Module:table"
+local utilities_module = "Module:utilities"
 local concat = table.concat
-local encode = mw.text.encode
+local find = string.find
-local find = mw.ustring.find
+local get_current_title = mw.title.getCurrentTitle
-local get_entities = require("Module:utilities").get_entities
-local gsub = mw.ustring.gsub
 local insert = table.insert
-local lower = mw.ustring.lower
+local ipairs = ipairs
-local split = mw.text.split
+local match = string.match
+local new_title = mw.title.new
+local pairs = pairs
+local remove = table.remove
+local sub = string.sub
 local toNFC = mw.ustring.toNFC
-local trim = mw.text.trim
+local tostring = tostring
+local type = type
 local unstrip = mw.text.unstrip
+local NAMESPACE = get_current_title().nsText
+local function anchor_encode(...)
+	anchor_encode = require(memoize_module)(mw.uri.anchorEncode, true)
+	return anchor_encode(...)
+end
+local function decode_entities(...)
+	decode_entities = require(string_utilities_module).decode_entities
+	return decode_entities(...)
+end
+local function decode_uri(...)
+	decode_uri = require(string_utilities_module).decode_uri
+	return decode_uri(...)
+end
+-- Can't yet replace, as the [[Module:string utilities]] version no longer has automatic double-encoding prevention, which requires changes here to account for.
+local function encode_entities(...)
+	encode_entities = require(string_encode_entities_module)
+	return encode_entities(...)
+end
+local function extend(...)
+	extend = require(table_module).extend
+	return extend(...)
+end
+local function find_best_script_without_lang(...)
+	find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang
+	return find_best_script_without_lang(...)
+end
+local function format_categories(...)
+	format_categories = require(utilities_module).format_categories
+	return format_categories(...)
+end
+local function format_genders(...)
+	format_genders = require(gender_and_number_module).format_genders
+	return format_genders(...)
+end
+local function format_qualifiers(...)
+	format_qualifiers = require(pron_qualifier_module).format_qualifiers
+	return format_qualifiers(...)
+end
+local function get_current_L2(...)
+	get_current_L2 = require(pages_module).get_current_L2
+	return get_current_L2(...)
+end
+local function get_lang(...)
+	get_lang = require(languages_module).getByCode
+	return get_lang(...)
+end
+local function get_script(...)
+	get_script = require(scripts_module).getByCode
+	return get_script(...)
+end
+local function language_anchor(...)
+	language_anchor = require(anchors_module).language_anchor
+	return language_anchor(...)
+end
+local function load_data(...)
+	load_data = require(load_module).load_data
+	return load_data(...)
+end
+local function request_script(...)
+	request_script = require(script_utilities_module).request_script
+	return request_script(...)
+end
+local function shallow_copy(...)
+	shallow_copy = require(table_module).shallowCopy
+	return shallow_copy(...)
+end
+local function split(...)
+	split = require(string_utilities_module).split
+	return split(...)
+end
+local function tag_text(...)
+	tag_text = require(script_utilities_module).tag_text
+	return tag_text(...)
+end
+local function tag_translit(...)
+	tag_translit = require(script_utilities_module).tag_translit
+	return tag_translit(...)
+end
+local function trim(...)
+	trim = require(string_utilities_module).trim
+	return trim(...)
+end
+local function u(...)
+	u = require(string_utilities_module).char
+	return u(...)
+end
+local function ulower(...)
+	ulower = require(string_utilities_module).lower
+	return ulower(...)
+end
+local function umatch(...)
+	umatch = require(string_utilities_module).match
+	return umatch(...)
+end
+local m_headword_data
+local function get_headword_data()
+	m_headword_data = load_data("Module:headword/data")
+	return m_headword_data
+end
+local function selective_trim(...)
+	-- Unconditionally trimmed charset.
+	local always_trim =
+		"\194\128-\194\159" ..   -- U+0080-009F (C1 control characters)
+		"\194\173" ..            -- U+00AD (soft hyphen)
+		"\226\128\170-\226\128\174" .. -- U+202A-202E (directionality formatting characters)
+		"\226\129\166-\226\129\169" -- U+2066-2069 (directionality formatting characters)
+	-- Standard trimmed charset.
+	local standard_trim = "%s" .. -- (default whitespace charset)
+		"\226\128\139-\226\128\141" .. -- U+200B-200D (zero-width spaces)
+		always_trim
+	-- If there are non-whitespace characters, trim all characters in `standard_trim`.
+	-- Otherwise, only trim the characters in `always_trim`.
+	selective_trim = function(text)
+		if text == "" then
+			return text
+		end
+		local trimmed = trim(text, standard_trim)
+		if trimmed ~= "" then
+			return trimmed
+		end
+		return trim(text, always_trim)
+	end
+	return selective_trim(...)
+end
 local function escape(text, str)
@@ Line 41: / Line 210: @@
 end
--- Trim only if there are non-whitespace characters.
+-- Remove bold, italics, soft hyphens, strip markers and HTML tags.
-local function cond_trim(text)
+local function remove_formatting(str)
-	-- Include all conventional whitespace + zero-width space.
+	str = str
-	if find(text, "[^%s]") then
+		:gsub("('*)'''(.-'*)'''", "%1%2")
-		text = trim(text, "%s")
+		:gsub("('*)''(.-'*)''", "%1%2")
+		:gsub("", "")
+	return (unstrip(str)
+		:gsub("<[^<>]+>", ""))
+end
+--[==[Takes an input and splits on a double slash (taking account of escaping backslashes).]==]
+function export.split_on_slashes(text)
+	text = split(escape(text, "//"), "//", true) or {}
+	for i, v in ipairs(text) do
+		text[i] = unescape(v, "//")
+		if v == "" then
+			text[i] = false
+		end
+	end
+	return text
+end
+--[==[Takes a wikilink and outputs the link target and display text. By default, the link target will be returned as a title object, but if `allow_bad_target` is set it will be returned as a string, and no check will be performed as to whether it is a valid link target.]==]
+function export.get_wikilink_parts(text, allow_bad_target)
+	-- TODO: replace `allow_bad_target` with `allow_unsupported`, with support for links to unsupported titles, including escape sequences.
+	if (                        -- Filters out anything but "[[...]]" with no intermediate "[[" or "]]".
+			not match(text, "^()%[%[") or -- Faster than sub(text, 1, 2) ~= "[[".
+			find(text, "[[", 3, true) or
+			find(text, "]]", 3, true) ~= #text - 1
+		) then
+		return nil, nil
+	end
+	local pipe, title, display = find(text, "|", 3, true)
+	if pipe then
+		title, display = sub(text, 3, pipe - 1), sub(text, pipe + 1, -3)
+	else
+		title = sub(text, 3, -3)
+		display = title
+	end
+	if allow_bad_target then
+		return title, display
+	end
+	title = new_title(title)
+	-- No title object means the target is invalid.
+	if title == nil then
+		return nil, nil
+		-- If the link target starts with "#" then mw.title.new returns a broken
+		-- title object, so grab the current title and give it the correct fragment.
+	elseif title.prefixedText == "" then
+		local fragment = title.fragment
+		if fragment == "" then -- [[#]] isn't valid
+			return nil, nil
+		end
+		title = get_current_title()
+		title.fragment = fragment
+	end
+	return title, display
+end
+-- Does the work of export.get_fragment, but can be called directly to avoid unnecessary checks for embedded links.
+local function get_fragment(text)
+	text = escape(text, "#")
+	-- Replace numeric character references with the corresponding character (&#39; → '),
+	-- as they contain #, which causes the numeric character reference to be
+	-- misparsed (wa'a → wa&#39;a → pagename wa&, fragment 39;a).
+	text = decode_entities(text)
+	local target, fragment = text:match("^(.-)#(.+)$")
+	target = target or text
+	target = unescape(target, "#")
+	fragment = fragment and unescape(fragment, "#")
+	return target, fragment
+end
+--[==[Takes a link target and outputs the actual target and the fragment (if any).]==]
+function export.get_fragment(text)
+	-- If there are no embedded links, process input.
+	local open = find(text, "[[", nil, true)
+	if not open then
+		return get_fragment(text)
 	end
+	local close = find(text, "]]", open + 2, true)
+	if not close then
+		return get_fragment(text)
+		-- If there is one, but it's redundant (i.e. encloses everything with no pipe), remove and process.
+	elseif open == 1 and close == #text - 1 and not find(text, "|", 3, true) then
+		return get_fragment(sub(text, 3, -3))
+	end
+	-- Otherwise, return the input.
 	return text
 end
-local ignore_cap
+--[==[
-local pos_tags
+Given a link target as passed to `full_link()`, get the actual page that the target refers to. This removes
-function export.getLinkPage(target, lang, sc, plain)
+bold, italics, strip markets and HTML; calls `makeEntryName()` for the language in question; converts targets
+beginning with `*` to the Reconstruction namespace; and converts appendix-constructed languages to the Appendix
+namespace. Returns up to three values:
+# the actual page to link to, or {nil} to not link to anything;
+# how the target should be displayed as, if the user didn't explicitly specify any display text; generally the
+  same as the original target, but minus any anti-asterisk !!;
+# the value `true` if the target had a backslash-escaped * in it (FIXME: explain this more clearly).
+]==]
+function export.get_link_page_with_auto_display(target, lang, sc, plain)
+	local orig_target = target
 	if not target then
 		return nil
 	end
-	-- Remove bold, italics, soft hyphens, strip markers and HTML tags.
+	target = remove_formatting(target)
-	target = target
-		:gsub("('*)'''(.-'*)'''", "%1%2")
+	if target:sub(1, 1) == ":" then
-		:gsub("('*)''(.-'*)''", "%1%2")
+		local link_target = target:sub(2)
-		:gsub("", "")
+		link_target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. link_target
-	target = unstrip(target)
+		return link_target, orig_target
-		:gsub("<[^<>]+>", "")
+	end
-	-- Check if the target is an interwiki link.
+	local prefix = target:match("^(.-):")
-	if target:match(":") and target ~= ":" then
+	-- Convert any escaped colons
-		local m_utildata = mw.loadData("Module:utilities/data")
+	target = target:gsub("\\:", ":")
-		-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term to a category.)
+	if prefix then
-		local prefix = target:gsub("^:*(.-):.*", lower)
+		-- If this is an a link to another namespace or an interwiki link, ensure there's an initial colon and then
-		if m_utildata.namespaces[prefix] or m_utildata.interwikis[prefix] then
+		-- return what we have (so that it works as a conventional link, and doesn't do anything weird like add the term
-			return ":" .. target:gsub("^:+", ""), nil, {}
+		-- to a category.)
+		prefix = ulower(trim(prefix))
+		if prefix ~= "" and (
+				load_data("Module:data/namespaces")[prefix] or
+				load_data("Module:data/interwikis")[prefix]
+			) then
+			return target, orig_target
 		end
-		-- Convert any escaped colons
-		target = target:gsub("\\:", ":")
 	end
-	-- Check if the term is reconstructed and remove any asterisk. Otherwise, handle the escapes.
+	-- Check if the term is reconstructed and remove any asterisk. Also check for anti-asterisk (!!).
-	local reconstructed, escaped
+	-- Otherwise, handle the escapes.
+	local reconstructed, escaped, anti_asterisk
 	if not plain then
 		target, reconstructed = target:gsub("^%*(.)", "%1")
+		if reconstructed == 0 then
+			target, anti_asterisk = target:gsub("^!!(.)", "%1")
+			if anti_asterisk == 1 then
+				-- Remove !! from original. FIXME! We do it this way because the call to remove_formatting() above
+				-- may cause non-initial !! to be interpreted as anti-asterisks. We should surely move the
+				-- remove_formatting() call later.
+				orig_target = orig_target:gsub("^!!", "")
+			end
+		end
+	end
+	target, escaped = target:gsub("^(\\-)\\%%*", "%1*")
+	if reconstructed == 0 and lang:hasType("reconstructed") then
+		orig_target = "*" .. target
+		reconstructed = 1
 	end
-	target, escaped = target:gsub("^(\\-)\\%*", "%1*")
-	if not require("Module:utilities").check_object("script", true, sc) or sc:getCode() == "None" then
+	if not (sc and sc:getCode() ~= "None") then
 		sc = lang:findBestScript(target)
 	end
 	-- Remove carets if they are used to capitalize parts of transliterations (unless they have been escaped).
 	if (not sc:hasCapitalization()) and sc:isTransliterated() and target:match("%^") then
@@ Line 94: / Line 374: @@
 		target = unescape(target, "^")
 	end
 	-- Get the entry name for the language.
-	target = lang:makeEntryName(target, sc)
+	target = lang:makeEntryName(target, sc, reconstructed == 1 or lang:hasType("appendix-constructed"))
 	-- If the link contains unexpanded template parameters, then don't create a link.
-	if target:find("{{{") then
+	if target:match("{{{.-}}}") then
+		-- FIXME: Should we return the original target as the default display value (second return value)?
 		return nil
 	end
-	if target:sub(1, 1) == "/" then
+	-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret *
-		return ":" .. target
+	-- literally, however.
+	if not lang:hasType("conlang") then
-	elseif target:find("^Reconstruction:") then
+		if lang:hasType("appendix-constructed") then
-		return target
+			target = "wikt:Appendix:" .. lang:getFullName() .. "/" .. target
+		elseif reconstructed == 1 then -- asterisk found
-	-- Link to appendix for reconstructed terms and terms in appendix-only languages. Plain links interpret * literally, however.
+			if lang:getFullCode() == "und" then
-	elseif reconstructed == 1 then
+				-- Return the original target as default display value. If we don't do this, we wrongly get
-		if lang:getNonEtymologicalCode() == "und" then
+				-- [Term?] displayed instead.
-			return nil
+				return nil, orig_target
-		else
+			end
-			target = "Reconstruction:" .. lang:getNonEtymologicalName() .. "/" .. target
+			target = "wikt:Reconstruction:" .. lang:getFullName() .. "/" .. target
+		elseif anti_asterisk ~= 1 and (lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub") then
+			--error("The specified language " .. lang:getCanonicalName()
+			--.. " is unattested, while the given term does not begin with '*' to indicate that it is reconstructed.")
+			orig_target = "*" .. target
 		end
-	-- Reconstructed languages and substrates require an initial *.
+	else
-	elseif lang:hasType("reconstructed") or lang:getFamilyCode() == "qfa-sub" then
+		if reconstructed == 1 then
-		local check, m_utildata = target:match("^:*([^:]*):"), mw.loadData("Module:utilities/data")
+			target = "*" .. target
-		check = check and lower(check)
-		if m_utildata.interwikis[check] or m_utildata.namespaces[check] then
-			return target
-		else
-			error("The specified language " .. lang:getCanonicalName()
-				.. " is unattested, while the given word is not marked with '*' to indicate that it is reconstructed.")
 		end
-	elseif lang:hasType("appendix-constructed") then
-		target = "Appendix:" .. lang:getNonEtymologicalName() .. "/" .. target
 	end
+	target = (lang:hasType("conlang") and "Contionary:" or "wikt:") .. target
-	return target, escaped > 0
+	return target, orig_target, escaped > 0
+end
+function export.get_link_page(target, lang, sc, plain)
+	local target, auto_display, escaped = export.get_link_page_with_auto_display(target, lang, sc, plain)
+	return target, escaped
 end
 -- Make a link from a given link's parts
-local function makeLink(link, lang, sc, id, allow_self_link, isolated, plain)
+local function make_link(link, lang, sc, id, isolated, cats, no_alt_ast, plain)
 	-- Convert percent encoding to plaintext.
-	link.target = mw.uri.decode(link.target, "PATH")
+	link.target = link.target and decode_uri(link.target, "PATH")
-	link.fragment = link.fragment and mw.uri.decode(link.fragment, "PATH")
+	link.fragment = link.fragment and decode_uri(link.fragment, "PATH")
-	-- Find fragments (when link didn't come from parseLink).
+	-- Find fragments (if one isn't already set).
 	-- Prevents {{l|en|word#Etymology 2|word}} from linking to [[word#Etymology 2#English]].
 	-- # can be escaped as \#.
-	if link.target then
+	if link.target and link.fragment == nil then
-		link.target = escape(link.target, "#")
+		link.target, link.fragment = get_fragment(link.target)
-		if link.fragment == nil then
-			-- Replace numeric character references with the corresponding character (&#29; → '),
-			-- as they contain #, which causes the numeric character reference to be
-			-- misparsed (wa'a → wa&#29;a → pagename wa&, fragment 29;a).
-			link.target = get_entities(link.target)
-			local first, second = link.target:match("^([^#]+)#(.+)$")
-			if first then
-				link.target, link.fragment = first, second
-			end
-		end
-		link.target = unescape(link.target, "#")
-		link.fragment = link.fragment and unescape(link.fragment, "#")
 	end
-	-- If there is no display form, then create a default one.
+	-- Process the target
-	if not link.display then
+	local auto_display, escaped
-		link.display = link.target
+	link.target, auto_display, escaped = export.get_link_page_with_auto_display(link.target, lang, sc, plain)
+	-- Create a default display form.
+	-- If the target is "" then it's a link like [[#English]], which refers to the current page.
+	if auto_display == "" then
+		auto_display = (m_headword_data or get_headword_data()).pagename
 	end
-	local display_is_target = link.display == link.target
-	-- Process the target
-	local escaped
-	link.target, escaped = export.getLinkPage(link.target, lang, sc, plain)
 	-- If the display is the target and the reconstruction * has been escaped, remove the escaping backslash.
-	if display_is_target and escaped then
+	if escaped then
-		link.display = link.display:gsub("\\([^\\]*%*)", "%1", 1)
+		auto_display = auto_display:gsub("\\([^\\]*%*)", "%1", 1)
 	end
 	-- Process the display form.
-	link.display = lang:makeDisplayText(link.display, sc, not display_is_target)
+	if link.display then
+		local orig_display = link.display
+		link.display = lang:makeDisplayText(link.display, sc, true)
+		if cats then
+			auto_display = lang:makeDisplayText(auto_display, sc)
+			-- If the alt text is the same as what would have been automatically generated, then the alt parameter is redundant (e.g. {{l|en|foo|foo}}, {{l|en|w:foo|foo}}, but not {{l|en|w:foo|w:foo}}).
+			-- If they're different, but the alt text could have been entered as the term parameter without it affecting the target page, then the target parameter is redundant (e.g. {{l|ru|фу|фу́}}).
+			-- If `no_alt_ast` is true, use pcall to catch the error which will be thrown if this is a reconstructed lang and the alt text doesn't have *.
+			if link.display == auto_display then
+			else
+				local ok, check
+				if no_alt_ast then
+					ok, check = pcall(export.get_link_page, orig_display, lang, sc, plain)
+				else
+					ok = true
+					check = export.get_link_page(orig_display, lang, sc, plain)
+				end
+			end
+		end
+	else
+		link.display = lang:makeDisplayText(auto_display, sc)
+	end
 	if not link.target then
 		return link.display
 	end
-	-- If the target is the same as the current page and there is no sense id
+	-- If the target is the same as the current page, there is no sense id
-	-- and linking to the same page hasn't been turned on, then return a "self-link"
+	-- and either the language code is "und" or the current L2 is the current
-	-- like the software does.
+	-- language then return a "self-link" like the software does.
-	if (not (allow_self_link or id)) and link.target == mw.title.getCurrentTitle().prefixedText then
+	if link.target == get_current_title().prefixedText then
-		return tostring(mw.html.create("strong")
+		local fragment, current_L2 = link.fragment, get_current_L2()
-			:addClass("selflink")
+		if (
-			:wikitext(link.display))
+				fragment and fragment == current_L2 or
+				not (id or fragment) and (lang:getFullCode() == "und" or lang:getFullName() == current_L2)
+			) then
+			return tostring(mw.html.create("strong")
+				:addClass("selflink")
+				:wikitext(link.display))
+		end
 	end
-	-- Add fragment. Do not add a section link to "Undetermined", as such sections do not exist and are invalid. TabbedLanguages handles links without a section by linking to the "last visited" section, but adding "Undetermined" would break that feature. For localized prefixes that make syntax error, please use the format: ["xyz"] = true.
+	-- Add fragment. Do not add a section link to "Undetermined", as such sections do not exist and are invalid.
-	local prefix, lower_prefix = link.target:match("^:*([^:]+):")
+	-- TabbedLanguages handles links without a section by linking to the "last visited" section, but adding
+	-- "Undetermined" would break that feature. For localized prefixes that make syntax error, please use the
-	local m_utildata
+	-- format: ["xyz"] = true.
-	if prefix then
+	local prefix = link.target:match("^:*([^:]+):")
-		lower_prefix = lower(prefix)
+	prefix = prefix and ulower(prefix)
-		m_utildata = mw.loadData("Module:utilities/data")
-	end
+	if prefix ~= "category" and not (prefix and load_data("Module:data/interwikis")[prefix]) then
+		if (link.fragment or link.target:sub(-1) == "#") and not plain then
-	if not (m_utildata and m_utildata.interwikis[lower_prefix]) then
+			if cats then
+				insert(cats, lang:getFullName() .. " links with manual fragments")
-		if (not link.fragment) and lang:getNonEtymologicalCode() ~= "und" then
+			end
+		end
+		if not link.fragment then
 			if id then
-				link.fragment = require("Module:senseid").anchor(lang, id)
+				link.fragment = lang:getFullCode() == "und" and anchor_encode(id) or language_anchor(lang, id)
-			elseif not (link.target:find("^Appendix:") or link.target:find("^Reconstruction:") or plain) then
+			elseif lang:getFullCode() ~= "und" and not (link.target:match("^Appendix:") or link.target:match("^Reconstruction:")) then
-				link.fragment = lang:getNonEtymologicalName()
+				link.fragment = anchor_encode(lang:getFullName())
 			end
-		elseif plain and id then
-			link.fragment = id
 		end
 	end
-	if isolated then
+	-- Put inward-facing square brackets around a link to isolated spacing character(s).
-		link.display = mw.loadData("Module:links/data").display_change[link.display] or link.display
+	if isolated and #link.display > 0 and not umatch(decode_entities(link.display), "%S") then
+		link.display = "&#x5D;" .. link.display .. "&#x5B;"
 	end
 	link.target = link.target:gsub("^(:?)(.*)", function(m1, m2)
-		return m1 .. encode(m2, "#%%&+/:<=>@[\\%]_{|}")
+		return m1 .. encode_entities(m2, "#%&+/:<=>@[\\]_{|}")
 	end)
-	link.fragment = link.fragment and encode(link.fragment, "#%%&+/:<=>@[\\%]_{|}")
+	link.fragment = link.fragment and encode_entities(remove_formatting(link.fragment), "#%&+/:<=>@[\\]_{|}")
-	return "[[" .. link.target .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
+	return "[[" ..
+	link.target:gsub("^[^:]", ":%0") .. (link.fragment and "#" .. link.fragment or "") .. "|" .. link.display .. "]]"
 end
 -- Split a link into its parts
-local function parseLink(linktext)
+local function parse_link(linktext)
 	local link = { target = linktext }
-	local first, second = link.target:match("^([^|]+)|(.+)$")
+	local target = link.target
-	-- Prevent characters whose HTML entities are unsupported titles from being incorrectly recognised as the entity if they are in a link being re-parsed (e.g. "&" becomes "&amp;" when returned, but "&amp;" is also an unsupported title. If "&" is given as a link which is then re-parsed, we don't want it to be perceived as "&amp;".)
+	link.target, link.display = target:match("^(..-)|(.+)$")
-	if link.target:match("&[^;]+;") then
+	if not link.target then
-		local unsupported_titles = mw.loadData("Module:links/data").unsupported_titles
+		link.target = target
-		if unsupported_titles[second] and unsupported_titles[second] ~= first then
+		link.display = target
-			link.target = get_entities(link.target)
-			first, second = link.target:match("^([^|]+)|(.+)$")
-		end
 	end
-	if first then
-		link.target = first
-		link.display = second
-	else
-		link.display = link.target
-	end
 	-- There's no point in processing these, as they aren't real links.
 	local target_lower = link.target:lower()
-	for _, falsePositive in ipairs({"category", "cat", "file", "image"}) do
+	for _, false_positive in ipairs({ "category", "cat", "file", "image" }) do
-		if target_lower:match("^" .. falsePositive .. ":") then return nil end
+		if target_lower:match("^" .. false_positive .. ":") then
+			return nil
+		end
 	end
-	first, second = link.target:match("^(.+)#(.+)$")
+	link.display = decode_entities(link.display)
+	link.target, link.fragment = get_fragment(link.target)
-	if first then
+	-- So that make_link does not look for a fragment again.
-		link.target = first
+	if not link.fragment then
-		link.fragment = second
-	else
-		-- So that makeLink does not look for a fragment again
 		link.fragment = false
 	end
 	return link
+end
+local function check_params_ignored_when_embedded(alt, lang, id, cats)
+	if alt then
+		if cats then
+			insert(cats, lang:getFullName() .. " links with ignored alt parameters")
+		end
+	end
+	if id then
+		if cats then
+			insert(cats, lang:getFullName() .. " links with ignored id parameters")
+		end
+	end
 end
 -- Find embedded links and ensure they link to the correct section.
-local function process_embedded_links(text, data, allow_self_link, plain)
+local function process_embedded_links(text, alt, lang, sc, id, cats, no_alt_ast, plain)
 	-- Process the non-linked text.
-	text = data.lang:makeDisplayText(text, data.sc[1], true)
+	text = lang:makeDisplayText(text, sc, true)
-	-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set allReconstructed to true, while `|[[*foo]]` would not.
+	-- If the text begins with * and another character, then act as if each link begins with *. However, don't do this if the * is contained within a link at the start. E.g. `|*[[foo]]` would set all_reconstructed to true, while `|[[*foo]]` would not.
-	local allReconstructed = false
+	local all_reconstructed = false
 	if not plain then
-		if require("Module:utilities").get_plaintext(text:gsub("%[%[.-%]%]", ".")):match("^*.") then
+		-- anchor_encode removes links etc.
-			allReconstructed = true
+		if anchor_encode(text):sub(1, 1) == "*" then
+			all_reconstructed = true
 		end
 		-- Otherwise, handle any escapes.
 		text = text:gsub("^(\\-)\\%*", "%1*")
 	end
-	if data.alt then
+	check_params_ignored_when_embedded(alt, lang, id, cats)
-		mw.log("(from Module:links)", "text with embedded wikilinks:", text,
-			"ignored alt:", data.alt, "lang:", data.lang:getNonEtymologicalCode())
+	local function process_link(space1, linktext, space2)
-	end
-	if data.id then
-		mw.log("(from Module:links)", "text with embedded wikilinks:", text,
-			"ignored id:", data.id, "lang:", data.lang:getNonEtymologicalCode())
-	end
-	local function processLink(space1, linktext, space2)
 		local capture = "[[" .. linktext .. "]]"
+		local link = parse_link(linktext)
-		linktext = get_entities(linktext)
+		-- Return unprocessed false positives untouched (e.g. categories).
-		local link = parseLink(linktext)
+		if not link then
+			return capture
-		--Return unprocessed false positives untouched (e.g. categories).
-		if not link then return capture end
-		if allReconstructed and not link.target:find("^%*") then
-			link.target = "*" .. link.target
 		end
-		linktext = makeLink(link, data.lang, data.sc, data.id, allow_self_link, false, plain)
+		if all_reconstructed then
+			if link.target:find("^!!") then
+				-- Check for anti-asterisk !! at the beginning of a target, indicating that a reconstructed term
+				-- wants a part of the term to link to a non-reconstructed term, e.g. Old English
+				-- {{ang-noun|m|head=*[[!!Crist|Cristes]] [[!!mæsseǣfen]]}}.
+				link.target = link.target:sub(3)
+				-- Also remove !! from the display, which may have been copied from the target (as in mæsseǣfen in
+				-- the example above).
+				link.display = link.display:gsub("^!!", "")
+			elseif not link.target:match("^%*") then
+				link.target = "*" .. link.target
+			end
+		end
+		linktext = make_link(link, lang, sc, id, false, nil, no_alt_ast, plain)
 			:gsub("^%[%[", "\3")
 			:gsub("%]%]$", "\4")
 		return space1 .. linktext .. space2
 	end
-	-- Use chars 1 and 2 as temporary substitutions, so that we can use charsets. These are converted to chars 3 and 4 by processLink, which means we can convert any remaining chars 1 and 2 back to square brackets (i.e. those not part of a link).
+	-- Use chars 1 and 2 as temporary substitutions, so that we can use charsets. These are converted to chars 3 and 4 by process_link, which means we can convert any remaining chars 1 and 2 back to square brackets (i.e. those not part of a link).
 	text = text
 		:gsub("%[%[", "\1")
 		:gsub("%]%]", "\2")
 	-- If the script uses ^ to capitalize transliterations, make sure that any carets preceding links are on the inside, so that they get processed with the following text.
-	if text:match("%^") and not data.sc:hasCapitalization() and data.sc:isTransliterated() then
+	if (
+			text:find("^", nil, true) and
+			not sc:hasCapitalization() and
+			sc:isTransliterated()
+		) then
 		text = escape(text, "^")
 			:gsub("%^\1", "\1%^")
 		text = unescape(text, "^")
 	end
-	text = text:gsub("\1(%s*)([^\1\2]-)(%s*)\2", processLink)
+	text = text:gsub("\1(%s*)([^\1\2]-)(%s*)\2", process_link)
 	-- Remove the extra * at the beginning of a language link if it's immediately followed by a link whose display begins with * too.
-	if allReconstructed then
+	if all_reconstructed then
 		text = text:gsub("^%*\3([^|\1-\4]+)|%*", "\3%1|*")
 	end
 	return (text
 		:gsub("[\1\3]", "[[")
-		:gsub("[\2\4]", "]]"))
+		:gsub("[\2\4]", "]]")
+	)
 end
-local function handle_redundant_wikilink(text, alt)
+local function simple_link(term, fragment, alt, lang, sc, id, cats, no_alt_ast, srwc)
-	local temp = text:match("^%[%[(.-)%]%]$")
+	local plain
-	if not temp then
+	if lang == nil then
-		return text, alt
+		lang, plain = get_lang("und"), true
 	end
-	local temp_lower = temp:lower()
-	for _, falsePositive in ipairs({"category", "cat", "file", "image"}) do
+	-- Get the link target and display text. If the term is the empty string, treat the input as a link to the current page.
-		if temp_lower:match("^" .. falsePositive .. ":") then
+	if term == "" then
-			return text, alt
+		term = get_current_title().prefixedText
+	elseif term then
+		local new_term, new_alt = export.get_wikilink_parts(term, true)
+		if new_term then
+			check_params_ignored_when_embedded(alt, lang, id, cats)
+			-- [[|foo]] links are treated as plaintext "[[|foo]]".
+			-- FIXME: Pipes should be handled via a proper escape sequence, as they can occur in unsupported titles.
+			if new_term == "" then
+				term, alt = nil, term
+			else
+				local title = new_title(new_term)
+				if title then
+					local ns = title.namespace
+					-- File: and Category: links should be returned as-is.
+					if ns == 6 or ns == 14 then
+						return term
+					end
+				end
+				term, alt = new_term, new_alt
+				if cats then
+					if not (srwc and srwc(term, alt)) then
+						insert(cats, lang:getFullName() .. " links with redundant wikilinks")
+					end
+				end
+			end
 		end
 	end
-	-- Note: it's possible for "[[" or "]]" to be uninvolved in links, so we need to check for both individually (e.g. "[[aaa]] bb]]" would not have a redundant wikilink).
+	if alt then
-	if temp and not (temp:find("%[%[") or temp:find("%]%]")) then
+		alt = selective_trim(alt)
-		text, alt = temp:match("^([^|]+)|?(.-)$")
 		if alt == "" then
 			alt = nil
 		end
 	end
-	return text, alt
+	-- If there's nothing to process, return nil.
+	if not (term or alt) then
+		return nil
+	end
+	-- If there is no script, get one.
+	if not sc then
+		sc = lang:findBestScript(alt or term)
+	end
+	-- Embedded wikilinks need to be processed individually.
+	if term then
+		local open = find(term, "[[", nil, true)
+		if open and find(term, "]]", open + 2, true) then
+			return process_embedded_links(term, alt, lang, sc, id, cats, no_alt_ast, plain)
+		end
+		term = selective_trim(term)
+	end
+	-- If not, make a link using the parameters.
+	return make_link({
+		target = term,
+		display = alt,
+		fragment = fragment
+	}, lang, sc, id, true, cats, no_alt_ast, plain)
 end
@@ Line 372: / Line 730: @@
 : Sense id string. If this argument is defined, the link will point to a language-specific sense id ({{ll|en|identifier|id=HTML}}) created by the template {{temp|senseid}}. A sense id consists of the language's canonical name, a hyphen (<code>-</code>), and the string that was supplied as the <code class="n">id</code> argument. This is useful when a term has more than one sense in a language. If the <code class="n">term</code> argument contains wikilinks, this argument is ignored. (Links in which the sense id is ignored are tracked with the tracking template {{whatlinkshere|tracking=links/id-ignored}}.)
 The second argument is as follows:
-; <code class="n">allowSelfLink</code>
+; <code class="n">allow_self_link</code>
 : If {{code|lua|true}}, the function will also generate links to the current page. The default ({{code|lua|false}}) will not generate a link but generate a bolded "self link" instead.
 The following special options are processed for each link (both simple text and with embedded wikilinks):
@@ Line 380: / Line 738: @@
 ** {{temp|l|en|*nix}} links to the nonexistent page [[Reconstruction:English/nix]] (<code class="n">*</code> is interpreted as a reconstruction), but {{temp|l|en|:*nix}} links to [[*nix]].
 ** {{temp|l|sl|Franche-Comté}} links to the nonexistent page [[Franche-Comte]] (<code>é</code> is converted to <code>e</code> by <code class="n">makeEntryName</code>), but {{temp|l|sl|:Franche-Comté}} links to [[Franche-Comté]].]==]
-function export.language_link(data, allow_self_link)
+function export.language_link(data)
 	if type(data) ~= "table" then
-		error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
+		error(
-	-- Nothing to process, return nil.
+		"The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
-	elseif not (data.term or data.alt) then
-		return nil
 	end
-	local text = data.term
+	-- Categorize links to "und".
+	local lang, cats = data.lang, data.cats
-	data.sc = data.sc or data.lang:findBestScript(text)
+	if cats and lang:getCode() == "und" then
+		insert(cats, "Undetermined language links")
-	ignore_cap = ignore_cap or mw.loadData("Module:links/data").ignore_cap
-	if (ignore_cap[data.lang:getCode()] or ignore_cap[data.lang:getNonEtymologicalCode()]) and text then
-		text = text:gsub("%^", "")
 	end
-	-- Do we have a redundant wikilink? If so, remove it.
+	return simple_link(
-	if text then
+		data.term,
-		text, data.alt = handle_redundant_wikilink(text, data.alt)
+		data.fragment,
-	end
+		data.alt,
+		lang,
-	-- Do we have embedded wikilinks?
+		data.sc,
-	if text and text:find("%[%[.-%]%]") then
+		data.id,
-		text = process_embedded_links(text, data, allow_self_link)
+		cats,
-	-- If not, make a link using the parameters.
+		data.no_alt_ast,
-	else
+		data.suppress_redundant_wikilink_cat
-		text = text and cond_trim(text)
+	)
-		data.alt = data.alt and cond_trim(data.alt)
-		text = makeLink({ target = text, display = data.alt }, data.lang, data.sc, data.id, allow_self_link, true)
-	end
-	return text
 end
-function export.plain_link(data, allow_self_link)
+function export.plain_link(data)
 	if type(data) ~= "table" then
-		error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
+		error(
-	-- Nothing to process, return nil.
+		"The first argument to the function plain_link must be a table. See Module:links/documentation for more information.")
-	elseif not (data.term or data.alt) then
-		return nil
-	-- Only have alt, just return it.
-	elseif not data.term then
-		return data.alt
 	end
-	local text = data.term
+	return simple_link(
-	if (not data.lang) or data.lang:getNonEtymologicalCode() ~= "und" then
+		data.term,
-		data.lang = require("Module:languages").getByCode("und")
+		data.fragment,
-	end
+		data.alt,
-	data.sc = data.sc or require("Module:scripts").findBestScriptWithoutLang(text)
+		nil,
+		data.sc,
-	-- Do we have a redundant wikilink? If so, remove it.
+		data.id,
-	if text then
+		data.cats,
-		text, data.alt = handle_redundant_wikilink(text, data.alt)
+		data.no_alt_ast,
-	end
+		data.suppress_redundant_wikilink_cat
+	)
-	-- Do we have embedded wikilinks?
-	if text:find("%[%[.-%]%]") then
-		text = process_embedded_links(text, data, allow_self_link, true)
-	-- If not, make a link using the parameters.
-	else
-		text = cond_trim(text)
-		data.alt = data.alt and cond_trim(data.alt)
-		text = makeLink({ target = text, display = data.alt }, data.lang, data.sc, data.id, allow_self_link, true, true)
-	end
-	return text
 end
 --[==[Replace any links with links to the correct section, but don't link the whole text if no embedded links are found. Returns the display text form.]==]
-function export.embedded_language_links(data, allow_self_link)
+function export.embedded_language_links(data)
 	if type(data) ~= "table" then
-		error("The first argument to the function language_link must be a table. See Module:links/documentation for more information.")
+		error(
+		"The first argument to the function embedded_language_links must be a table. See Module:links/documentation for more information.")
+	end
+	local term, lang, sc = data.term, data.lang, data.sc
+	-- If we don't have a script, get one.
+	if not sc then
+		sc = lang:findBestScript(term)
 	end
-	local text = data.term
+	-- Do we have embedded wikilinks? If so, they need to be processed individually.
-	data.sc = data.sc or data.lang:findBestScript(text)
+	local open = find(term, "[[", nil, true)
+	if open and find(term, "]]", open + 2, true) then
-	-- Do we have embedded wikilinks?
+		return process_embedded_links(term, data.alt, lang, sc, data.id, data.cats, data.no_alt_ast)
-	if text:find("%[%[.-%]%]") then
-		text = process_embedded_links(text, data, allow_self_link)
-	else
-		-- If there are no embedded wikilinks, return the display text.
-		text = cond_trim(text)
-		text = (data.lang:makeDisplayText(text, data.sc, true))
 	end
-	return text
+	-- If not, return the display text.
+	term = selective_trim(term)
+	-- FIXME: Double-escape any percent-signs, because we don't want to treat non-linked text as having percent-encoded characters. This is a hack: percent-decoding should come out of [[Module:languages]] and only dealt with in this module, as it's specific to links.
+	term = term:gsub("%%", "%%25")
+	return lang:makeDisplayText(term, sc, true)
 end
-function export.mark(text, itemType, face, lang)
+function export.mark(text, item_type, face, lang)
 	local tag = { "", "" }
-	if itemType == "gloss" then
+	if item_type == "gloss" then
 		tag = { '<span class="mention-gloss-double-quote">“</span><span class="mention-gloss">',
 			'</span><span class="mention-gloss-double-quote">”</span>' }
-	elseif itemType == "tr" then
+	elseif item_type == "tr" then
 		if face == "term" then
-			tag = { '<span lang="' .. lang:getNonEtymologicalCode() .. '" class="tr mention-tr Latn">',
+			tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr mention-tr Latn">',
 				'</span>' }
 		else
-			tag = { '<span lang="' .. lang:getNonEtymologicalCode() .. '" class="tr Latn">', '</span>' }
+			tag = { '<span lang="' .. lang:getFullCode() .. '" class="tr Latn">', '</span>' }
 		end
-	elseif itemType == "ts" then
+	elseif item_type == "ts" then
 		-- \226\129\160 = word joiner (zero-width non-breaking space) U+2060
 		tag = { '<span class="ts mention-ts Latn">/\226\129\160', '\226\129\160/</span>' }
-	elseif itemType == "pos" then
+	elseif item_type == "pos" then
 		tag = { '<span class="ann-pos">', '</span>' }
-	elseif itemType == "annotations" then
+	elseif item_type == "non-gloss" then
+		tag = { '<span class="ann-non-gloss">', '</span>' }
+	elseif item_type == "annotations" then
 		tag = { '<span class="mention-gloss-paren annotation-paren">(</span>',
 			'<span class="mention-gloss-paren annotation-paren">)</span>' }
+	elseif item_type == "infl" then
+		tag = { '<span class="ann-infl">', '</span>' }
 	end
@@ Line 501: / Line 843: @@
 end
---[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.
+local pos_tags
+--[==[Formats the annotations that are displayed with a link created by {{code|lua|full_link}}. Annotations are the extra bits of information that are displayed following the linked term, and include things such as gender, transliteration, gloss and so on.
 * The first argument is a table possessing some or all of the following keys:
 *:; <code class="n">genders</code>
@@ Line 510: / Line 854: @@
 *:: Gloss that translates the term in the link, or gives some other descriptive information.
 *:; <code class="n">pos</code>
-*:: Part of speech of the linked term. If the given argument matches one of the templates in [[:Category:Part of speech tags]], then call that to show a part-of-speech tag. Otherwise, just show the given text as it is.
+*:: Part of speech of the linked term. If the given argument matches one of the aliases in `pos_aliases` in [[Module:headword/data]], or consists of a part of speech or alias followed by `f` (for a non-lemma form), expand it appropriately. Otherwise, just show the given text as it is.
+*:; <code class="n">ng</code>
+*:: Arbitrary non-gloss descriptive text for the link. This should be used in preference to putting descriptive text in `gloss` or `pos`.
 *:; <code class="n">lit</code>
 *:: Literal meaning of the term, if the usual meaning is figurative or idiomatic.
+*:; <code class="n">infl</code>
+*:: Table containing a list of grammar tags in the style of [[Module:form of]] `tagged_inflections`.
 *:Any of the above values can be omitted from the <code class="n">info</code> argument. If a completely empty table is given (with no annotations at all), then an empty string is returned.
 * The second argument is a string. Valid values are listed in [[Module:script utilities/data]] "data.translit" table.]==]
@@ Line 529: / Line 877: @@
 	if data.genders and #data.genders > 0 then
-		local m_gen = require("Module:gender and number")
+		local genders, gender_cats = format_genders(data.genders, data.lang)
-		insert(output, "&nbsp;" .. m_gen.format_list(data.genders, data.lang))
+		insert(output, "&nbsp;" .. genders)
+		if gender_cats then
+			local cats = data.cats
+			if cats then
+				extend(cats, gender_cats)
+			end
+		end
 	end
@@ Line 545: / Line 899: @@
 		if data.tr[1] and data.ts[1] then
-			insert(annotations,
+			insert(annotations, tag_translit(data.tr[1], data.lang, kind) .. " " .. export.mark(data.ts[1], "ts"))
-				require("Module:script utilities").tag_translit(data.tr[1], data.lang, kind)
-				.. " " .. export.mark(data.ts[1], "ts"))
 		elseif data.ts[1] then
 			insert(annotations, export.mark(data.ts[1], "ts"))
 		else
-			insert(annotations,
+			insert(annotations, tag_translit(data.tr[1], data.lang, kind))
-				require("Module:script utilities").tag_translit(data.tr[1], data.lang, kind))
 		end
 	end
@@ Line 564: / Line 915: @@
 	if data.pos then
 		-- debug category for pos= containing transcriptions
-		if data.pos:find("/[^><]*/") then
+		if data.pos:match("/[^><]-/") then
 			data.pos = data.pos .. "[[Category:links likely containing transcriptions in pos]]"
 		end
-		pos_tags = pos_tags or mw.loadData("Module:links/data").pos_tags
+		-- Canonicalize part of speech aliases as well as non-lemma aliases like 'nf' or 'nounf' for "noun form".
-		insert(annotations, export.mark(pos_tags[data.pos] or data.pos, "pos"))
+		pos_tags = pos_tags or (m_headword_data or get_headword_data()).pos_aliases
+		local pos = pos_tags[data.pos]
+		if not pos and data.pos:find("f$") then
+			local pos_form = data.pos:sub(1, -2)
+			-- We only expand something ending in 'f' if the result is a recognized non-lemma POS.
+			pos_form = (pos_tags[pos_form] or pos_form) .. " form"
+			if (m_headword_data or get_headword_data()).nonlemmas[pos_form .. "s"] then
+				pos = pos_form
+			end
+		end
+		insert(annotations, export.mark(pos or data.pos, "pos"))
+	end
+	-- Inflection data
+	if data.infl then
+		local m_form_of = require(form_of_module)
+		-- Split tag sets manually, since tagged_inflections creates a numbered list, and we do not want that.
+		local infl_outputs = {}
+		local tag_sets = m_form_of.split_tag_set(data.infl)
+		for _, tag_set in ipairs(tag_sets) do
+			table.insert(infl_outputs,
+				m_form_of.tagged_inflections({ tags = tag_set, lang = data.lang, nocat = true, nolink = true, nowrap = true }))
+		end
+		insert(annotations, export.mark(table.concat(infl_outputs, "; "), "infl"))
+	end
+	-- Non-gloss text
+	if data.ng then
+		insert(annotations, export.mark(data.ng, "non-gloss"))
 	end
@@ Line 575: / Line 954: @@
 	if data.lit then
 		insert(annotations, "literally " .. export.mark(data.lit, "gloss"))
+	end
+	-- Provide a hook to insert additional annotations such as nested inflections.
+	if data.postprocess_annotations then
+		data.postprocess_annotations {
+			data = data,
+			annotations = annotations
+		}
 	end
@@ Line 584: / Line 971: @@
 end
---[==[Creates a full link, with annotations (see <code class="n">[[#format_link_annotations|format_link_annotations]]</code>), in the style of {{temp|l}} or {{temp|m}}.
+-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and >
-The first argument, <code class="n">data</code>, must be a table. It contains the various elements that can be supplied as parameters to {{temp|l}} or {{temp|m}}:
+-- because they end up forming part of CSS class names inside of <span ...> and will interfere with finding the end
+-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as
+-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to &lt;
+-- and &gt; somehow works magically without any further work; they appear in the URL as < and >, and get passed to
+-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If
+-- someone knows, please modify this comment appropriately!
+local accel_char_map
+local function get_accel_char_map()
+	accel_char_map = {
+		["%"] = ".",
+		[" "] = "_",
+		["_"] = u(0xFFF0),
+		["<"] = "&lt;",
+		[">"] = "&gt;",
+	}
+	return accel_char_map
+end
+local function encode_accel_param_chars(param)
+	return (param:gsub("[% <>_]", accel_char_map or get_accel_char_map()))
+end
+local function encode_accel_param(prefix, param)
+	if not param then
+		return ""
+	end
+	if type(param) == "table" then
+		local filled_params = {}
+		-- There may be gaps in the sequence, especially for translit params.
+		local maxindex = 0
+		for k in pairs(param) do
+			if type(k) == "number" and k > maxindex then
+				maxindex = k
+			end
+		end
+		for i = 1, maxindex do
+			filled_params[i] = param[i] or ""
+		end
+		-- [[Module:accel]] splits these up again.
+		param = concat(filled_params, "*~!")
+	end
+	-- This is decoded again by [[WT:ACCEL]].
+	return prefix .. encode_accel_param_chars(param)
+end
+local function insert_if_not_blank(list, item)
+	if item == "" then
+		return
+	end
+	insert(list, item)
+end
+local function get_class(lang, tr, accel, nowrap)
+	if not accel and not nowrap then
+		return ""
+	end
+	local classes = {}
+	if accel then
+		insert(classes, "form-of lang-" .. lang:getFullCode())
+		local form = accel.form
+		if form then
+			insert(classes, encode_accel_param_chars(form) .. "-form-of")
+		end
+		insert_if_not_blank(classes, encode_accel_param("gender-", accel.gender))
+		insert_if_not_blank(classes, encode_accel_param("pos-", accel.pos))
+		insert_if_not_blank(classes, encode_accel_param("transliteration-", accel.translit or (tr ~= "-" and tr or nil)))
+		insert_if_not_blank(classes, encode_accel_param("target-", accel.target))
+		insert_if_not_blank(classes, encode_accel_param("origin-", accel.lemma))
+		insert_if_not_blank(classes, encode_accel_param("origin_transliteration-", accel.lemma_translit))
+		if accel.no_store then
+			insert(classes, "form-of-nostore")
+		end
+	end
+	if nowrap then
+		insert(classes, nowrap)
+	end
+	return concat(classes, " ")
+end
+-- Add any left or right regular or accent qualifiers, labels or references to a formatted term. `data` is the object
+-- specifying the term, which should optionally contain:
+-- * a language object in `lang`; required if any accent qualifiers or labels are given;
+-- * left regular qualifiers in `q` (an array of strings or a single string); an empty array or blank string will be
+--   ignored;
+-- * right regular qualifiers in `qq` (an array of strings or a single string); an empty array or blank string will be
+--   ignored;
+-- * left accent qualifiers in `a` (an array of strings); an empty array will be ignored;
+-- * right accent qualifiers in `aa` (an array of strings); an empty array will be ignored;
+-- * left labels in `l` (an array of strings); an empty array will be ignored;
+-- * right labels in `ll` (an array of strings); an empty array will be ignored;
+-- * references in `refs`, an array either of strings (formatted reference text) or objects containing fields `text`
+--   (formatted reference text) and optionally `name` and/or `group`.
+-- `formatted` is the formatted version of the term itself.
+local function add_qualifiers_and_refs_to_term(data, formatted)
+	local q = data.q
+	if type(q) == "string" then
+		q = { q }
+	end
+	local qq = data.qq
+	if type(qq) == "string" then
+		qq = { qq }
+	end
+	if q and q[1] or qq and qq[1] or data.a and data.a[1] or data.aa and data.aa[1] or data.l and data.l[1] or
+		data.ll and data.ll[1] or data.refs and data.refs[1] then
+		formatted = format_qualifiers {
+			lang = data.lang,
+			text = formatted,
+			q = q,
+			qq = qq,
+			a = data.a,
+			aa = data.aa,
+			l = data.l,
+			ll = data.ll,
+			refs = data.refs,
+		}
+	end
+	return formatted
+end
+--[==[
+Creates a full link, with annotations (see `[[#format_link_annotations|format_link_annotations]]`), in the style of {{tl|l}} or {{tl|m}}.
+The first argument, `data`, must be a table. It contains the various elements that can be supplied as parameters to {{tl|l}} or {{tl|m}}:
 { {
 	term = entry_to_link_to,
@@ Line 591: / Line 1,101: @@
 	lang = language_object,
 	sc = script_object,
+	track_sc = boolean,
+	no_nonstandard_sc_cat = boolean,
+	fragment = link_fragment,
 	id = sense_id,
 	genders = { "gender1", "gender2", ... },
 	tr = transliteration,
+	respect_link_tr = boolean,
 	ts = transcription,
 	gloss = gloss,
 	pos = part_of_speech_tag,
+	ng = non-gloss text,
 	lit = literal_translation,
+	infl = { "form_of_grammar_tag1", "form_of_grammar_tag2", ... },
+	no_alt_ast = boolean,
 	accel = {accelerated_creation_tags},
 	interwiki = interwiki,
+	pretext = "text_at_beginning" or nil,
+	posttext = "text_at_end" or nil,
+	q = { "left_qualifier1", "left_qualifier2", ...} or "left_qualifier",
+	qq = { "right_qualifier1", "right_qualifier2", ...} or "right_qualifier",
+	l = { "left_label1", "left_label2", ...},
+	ll = { "right_label1", "right_label2", ...},
+	a = { "left_accent_qualifier1", "left_accent_qualifier2", ...},
+	aa = { "right_accent_qualifier1", "right_accent_qualifier2", ...},
+	refs = { "formatted_ref1", "formatted_ref2", ...} or { {text = "text", name = "name", group = "group"}, ... },
+	show_qualifiers = boolean,
 } }
-Any one of the items in the <code class="n">data</code> table may be {{code|lua|nil}}, but an error will be shown if neither <code class="n">term</code> nor <code class="n">alt</code> nor <code class="n">tr</code> is present.
+Any one of the items in the `data` table may be {nil}, but an error will be shown if neither `term` nor `alt` nor `tr`
-Thus, calling {{code|lua|2=full_link{ term = term, lang = lang, sc = sc } }}, where <code class="n">term</code> is an entry name, <code class="n">lang</code>  is a [[Module:languages#Language objects|language object]] from [[Module:languages]], and <code class="n">sc</code> is a [[Module:scripts#Script objects|script object]] from [[Module:scripts]], will give a plain link similar to the one produced by the template {{temp|l}}, and calling {{code|lua|2=full_link( { term = term, lang = lang, sc = sc }, "term" )}} will give a link similar to the one produced by the template {{temp|m}}.
+is present. Thus, calling {full_link{ term = term, lang = lang, sc = sc }}, where `term` is the page to link to (which
+may have diacritics that will be stripped and/or embedded bracketed links) and `lang` is a
+[[Module:languages#Language objects|language object]] from [[Module:languages]], will give a plain link similar to the
+one produced by the template {{tl|l}}, and calling {full_link( { term = term, lang = lang, sc = sc }, "term" )} will
+give a link similar to the one produced by the template {{tl|m}}.
 The function will:
-* Try to determine the script, based on the characters found in the term or alt argument, if the script was not given.
+* Try to determine the script, based on the characters found in the `term` or `alt` argument, if the script was not
-* Call <code class="n">[[#language_link|language_link]]</code> on the term or alt forms, to remove diacritics in the page name, process any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.
+  given. If a script is given and `track_sc` is {true}, it will check whether the input script is the same as the one
-* Call <code class="n">[[Module:script utilities#tag_text]]</code> to add the appropriate language and script tags to the term, and to italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are included.
+  which would have been automatically generated and add the category [[:Category:LANG terms with redundant script codes]]
-* Generate a transliteration, based on the alt or term arguments, if the script is not Latin and no transliteration was provided.
+  if yes, or [[:Category:LANG terms with non-redundant manual script codes]] if no. This should be used when the input
-* Add the annotations (transliteration, gender, gloss etc.) after the link.]==]
+  script object is directly determined by a template's `sc` parameter.
-function export.full_link(data, face, allow_self_link, no_check_redundant_translit)
+* Call `[[#language_link|language_link]]` on the `term` or `alt` forms, to remove diacritics in the page name, process
-	-- Prevent data from being destructively modified.
+  any embedded wikilinks and create links to Reconstruction or Appendix pages when necessary.
-	local data = require("Module:table").shallowcopy(data)
+* Call `[[Module:script utilities#tag_text]]` to add the appropriate language and script tags to the term and
+  italicize terms written in the Latin script if necessary. Accelerated creation tags, as used by [[WT:ACCEL]], are
+  included.
+* Generate a transliteration, based on the `alt` or `term` arguments, if the script is not Latin, no transliteration was
+  provided in `tr` and the combination of the term's language and script support automatic transliteration. The
+  transliteration itself will be linked if both `.respect_link_tr` is specified and the language of the term has the
+  `link_tr` property set for the script of the term; but not otherwise.
+* Add the annotations (transliteration, gender, gloss, etc.) after the link.
+* If `no_alt_ast` is specified, then the `alt` text does not need to contain an asterisk if the language is
+  reconstructed. This should only be used by modules which really need to allow links to reconstructions that don't
+  display asterisks (e.g. number boxes).
+* If `pretext` or `posttext` is specified, this is text to (respectively) prepend or append to the output, directly
+  before processing qualifiers, labels and references. This can be used to add arbitrary extra text inside of the
+  qualifiers, labels and references.
+* If `show_qualifiers` is specified or the `show_qualifiers` argument is given, then left and right qualifiers, accent
+  qualifiers, labels and references will be displayed, otherwise they will be ignored. (This is because a fair amount of
+  code stores qualifiers, labels and/or references in these fields and displays them itself, rather than expecting
+  {full_link()} to display them.)]==]
+function export.full_link(data, face, allow_self_link, show_qualifiers)
 	if type(data) ~= "table" then
 		error("The first argument to the function full_link must be a table. "
 			.. "See Module:links/documentation for more information.")
 	end
-	local terms = {true}
+	-- Prevent data from being destructively modified.
+	local data = shallow_copy(data)
+	-- FIXME: this shouldn't be added to `data`, as that means the input table needs to be cloned.
+	data.cats = {}
+	-- Categorize links to "und".
+	local lang, cats = data.lang, data.cats
+	if cats and lang:getCode() == "und" then
+		insert(cats, "Undetermined language links")
+	end
+	local terms = { true }
 	-- Generate multiple forms if applicable.
-	for _, param in ipairs{"term", "alt"} do
+	for _, param in ipairs { "term", "alt" } do
-		if type(data[param]) == "string" and data[param]:find("//") then
+		if type(data[param]) == "string" and data[param]:find("//", nil, true) then
-			data[param] = escape(data[param], "//")
+			data[param] = export.split_on_slashes(data[param])
-			data[param] = split(data[param], "//") or {}
+		elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//", nil, true)) then
-			for i, subparam in ipairs(data[param]) do
+			if not data.no_generate_forms then
-				data[param][i] = unescape(subparam, "//")
+				data[param] = lang:generateForms(data[param])
-				if subparam == "" then
+			else
-					data[param][i] = nil
+				data[param] = { data[param] }
-				end
 			end
-		elseif type(data[param]) == "string" and not (type(data.term) == "string" and data.term:find("//")) then
-			data[param] = data.lang:generateForms(data[param])
 		else
 			data[param] = {}
 		end
 	end
-	for _, param in ipairs{"sc", "tr", "ts"} do
+	for _, param in ipairs { "sc", "tr", "ts" } do
-		data[param] = {data[param]}
+		data[param] = { data[param] }
 	end
-	for _, param in ipairs{"term", "alt", "sc", "tr", "ts"} do
+	for _, param in ipairs { "term", "alt", "sc", "tr", "ts" } do
 		for i in pairs(data[param]) do
 			terms[i] = true
 		end
 	end
 	-- Create the link
 	local output = {}
-	local categories = {}
+	local id, no_alt_ast, srwc, accel, nevercalltr = data.id, data.no_alt_ast, data.suppress_redundant_wikilink_cat,
-	local link = ""
+		data.accel, data.never_call_transliteration_module
-	local annotations
+	local link_tr = data.respect_link_tr and lang:link_tr(data.sc[1])
-	local phonetic_extraction = mw.loadData("Module:links/data").phonetic_extraction
-	phonetic_extraction = phonetic_extraction[data.lang:getCode()] or phonetic_extraction[data.lang:getNonEtymologicalCode()]
 	for i in ipairs(terms) do
+		local link
 		-- Is there any text to show?
 		if (data.term[i] or data.alt[i]) then
 			-- Try to detect the script if it was not provided
-			local best = data.lang:findBestScript(data.alt[i] or data.term[i])
+			local display_term = data.alt[i] or data.term[i]
+			local best = lang:findBestScript(display_term)
+			-- no_nonstandard_sc_cat is intended for use in [[Module:interproject]]
+			if (
+					not data.no_nonstandard_sc_cat and
+					best:getCode() == "None" and
+					find_best_script_without_lang(display_term):getCode() ~= "None"
+				) then
+				insert(cats, lang:getFullName() .. " terms in nonstandard scripts")
+			end
 			if not data.sc[i] then
 				data.sc[i] = best
+				-- Track uses of sc parameter.
+			elseif data.track_sc then
+				if data.sc[i]:getCode() == best:getCode() then
+					insert(cats, lang:getFullName() .. " terms with redundant script codes")
+				else
+					insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")
+				end
 			end
 			-- If using a discouraged character sequence, add to maintenance category
 			if data.sc[i]:hasNormalizationFixes() == true then
 				if (data.term[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.term[i])) ~= toNFC(data.term[i])) or (data.alt[i] and data.sc[i]:fixDiscouragedSequences(toNFC(data.alt[i])) ~= toNFC(data.alt[i])) then
-					insert(categories, "Pages using discouraged character sequences")
+					insert(cats, "Pages using discouraged character sequences")
 				end
 			end
-			local class = ""
+			link = simple_link(
+				data.term[i],
-			-- Encode certain characters to avoid various delimiter-related issues at various stages. We need to encode < and >
+				data.fragment,
-			-- because they end up forming part of CSS class names inside of <span ...> and will interfere with finding the end
+				data.alt[i],
-			-- of the HTML tag. I first tried converting them to URL encoding, i.e. %3C and %3E; they then appear in the URL as
+				lang,
-			-- %253C and %253E, which get mapped back to %3C and %3E when passed to [[Module:accel]]. But mapping them to &lt;
+				data.sc[i],
-			-- and &gt; somehow works magically without any further work; they appear in the URL as < and >, and get passed to
+				id,
-			-- [[Module:accel]] as < and >. I have no idea who along the chain of calls is doing the encoding and decoding. If
+				cats,
-			-- someone knows, please modify this comment appropriately!
+				no_alt_ast,
-			local encode_accel_char_map = {
+				srwc
-				["%"] = ".",
+			)
-				[" "] = "_",
+		end
-				["<"] = "&lt;",
+		-- simple_link can return nil, so check if a link has been generated.
-				[">"] = "&gt;",
+		if link then
-			}
+			-- Add "nowrap" class to prefixes in order to prevent wrapping after the hyphen
-			local function encode_accel_param_chars(param)
+			local nowrap
-				local retval = param:gsub("[% <>]", encode_accel_char_map) -- discard second return value
+			local display_term = data.alt[i] or data.term[i]
-				return retval
+			if display_term and (display_term:find("^%-") or display_term:find("^־")) then -- Hebrew maqqef -- FIXME, use hyphens from [[Module:affix]]
+				nowrap = "nowrap"
 			end
-			local function encode_accel_param(prefix, param)
+			link = tag_text(link, lang, data.sc[i], face, get_class(lang, data.tr[i], accel, nowrap))
-				if not param then
-					return ""
-				end
-				if type(param) == "table" then
-					local filled_params = {}
-					-- There may be gaps in the sequence, especially for translit params.
-					local maxindex = 0
-					for k, v in pairs(param) do
-						if type(k) == "number" and k > maxindex then
-							maxindex = k
-						end
-					end
-					for i=1,maxindex do
-						filled_params[i] = param[i] or ""
-					end
-					-- [[Module:accel]] splits these up again.
-					param = table.concat(filled_params, "*~!")
-				end
-				-- This is decoded again by [[WT:ACCEL]].
-				return prefix .. encode_accel_param_chars(param)
-			end
-			if data.accel then
-				local form = data.accel.form and encode_accel_param_chars(data.accel.form) .. "-form-of" or ""
-				local gender = encode_accel_param("gender-", data.accel.gender)
-				local pos = encode_accel_param("pos-", data.accel.pos)
-				local translit = encode_accel_param("transliteration-",
-					data.accel.translit or (data.tr[i] ~= "-" and data.tr[i] or nil))
-				local target = encode_accel_param("target-", data.accel.target)
-				local lemma = encode_accel_param("origin-", data.accel.lemma)
-				local lemma_translit = encode_accel_param("origin_transliteration-", data.accel.lemma_translit)
-				local no_store = data.accel.no_store and "form-of-nostore" or ""
-				local accel =
-					form .. " " ..
-					gender .. " " ..
-					pos .. " " ..
-					translit .. " " ..
-					target .. " " ..
-					lemma .. " " ..
-					lemma_translit .. " " ..
-					no_store .. " "
-				class = "form-of lang-" .. data.lang:getNonEtymologicalCode() .. " " .. accel
-			end
-			-- Only make a link if the term has been given, otherwise just show the alt text without a link
-			local term_data = {term = data.term[i], alt = data.alt[i], lang = data.lang, sc = data.sc[i], id = data.id, genders = data.genders, tr = data.tr[i], ts = data.ts[i], gloss = data.gloss, pos = data.pos, lit = data.lit, accel = data.accel, interwiki = data.interwiki}
-			link = require("Module:script utilities").tag_text(
-				data.term[i] and export.language_link(term_data, allow_self_link)
-				or data.alt[i], data.lang, data.sc[i], face, class)
 		else
 			--[[	No term to show.
 					Is there at least a transliteration we can work from?	]]
-			link = require("Module:script utilities").request_script(data.lang, data.sc[i])
+			link = request_script(lang, data.sc[i])
+			-- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms).
-			if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and data.lang:getFamilyCode() ~= "qfa-sub" then
+			if (link == "" or (not data.tr[i]) or data.tr[i] == "-") and lang:getFamilyCode() ~= "qfa-sub" then
-				-- No link to show, and no transliteration either. Show a term request (unless it's a substrate, as they rarely take terms).
+				-- If there are multiple terms, break the loop instead.
-				if mw.title.getCurrentTitle().nsText ~= "Template" then
+				if i > 1 then
-					insert(categories, data.lang:getNonEtymologicalName() .. " term requests")
+					remove(output)
+					break
+				elseif NAMESPACE ~= "Template" then
+					insert(cats, lang:getFullName() .. " term requests")
 				end
 				link = "<small>[Term?]</small>"
@@ Line 759: / Line 1,284: @@
 		end
 		insert(output, link)
-		if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\">／</span>") end
+		if i < #terms then insert(output, "<span class=\"Zsym mention\" style=\"font-size:100%;\">&nbsp;/ </span>") end
 	end
-	-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).
+	-- When suppress_tr is true, do not show or generate any transliteration
-	if data.tr[1] == "" or data.tr[1] == "-" then
+	if data.suppress_tr then
 		data.tr[1] = nil
+	else
+		-- TODO: Currently only handles the first transliteration, pending consensus on how to handle multiple translits for multiple forms, as this is not always desirable (e.g. traditional/simplified Chinese).
+		if data.tr[1] == "" or data.tr[1] == "-" then
+			data.tr[1] = nil
+		else
+			local phonetic_extraction = load_data("Module:links/data").phonetic_extraction
+			phonetic_extraction = phonetic_extraction[lang:getCode()] or phonetic_extraction[lang:getFullCode()]
+			if phonetic_extraction then
+				data.tr[1] = data.tr[1] or
+				require(phonetic_extraction).getTranslit(export.remove_links(data.alt[1] or data.term[1]))
+			elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then
+				-- Track whenever there is manual translit. The categories below like 'terms with redundant transliterations'
+				-- aren't sufficient because they only work with reference to automatic translit and won't operate at all in
+				-- languages without any automatic translit, like Persian and Hebrew.
+				if data.tr[1] then
+					local full_code = lang:getFullCode()
+				end
+				if not nevercalltr then
+					-- Try to generate a transliteration.
+					local text = data.alt[1] or data.term[1]
+					if not link_tr then
+						text = export.remove_links(text, true)
+					end
-	elseif phonetic_extraction then
+					local automated_tr = lang:transliterate(text, data.sc[1])
-		local m_phonetic = require(phonetic_extraction)
-		data.tr[1] = data.tr[1] or m_phonetic.getTranslit(export.remove_links(data.alt[1] or data.term[1]))
-	elseif (data.term[1] or data.alt[1]) and data.sc[1]:isTransliterated() then
+					if automated_tr then
-		-- Try to generate a transliteration, unless transliteration has been supplied and no_check_redundant_translit is
+						local manual_tr = data.tr[1]
-		-- given. (Checking for redundant transliteration can use up significant amounts of memory so we don't want to do
-		-- it if memory is tight. `no_check_redundant_translit` is currently set when called ultimately from
-		-- {{multitrans|...|no-check-redundant-translit=1}}.)
-		if not (data.tr[1] and no_check_redundant_translit) then
-			local text = data.alt[1] or data.term[1]
-			if not data.lang:link_tr() then
-				text = export.remove_links(text, true)
-			end
-			local automated_tr, tr_categories
-			automated_tr, data.tr_fail, tr_categories = data.lang:transliterate(text, data.sc[1])
-			if automated_tr or data.tr_fail then
-				local manual_tr = data.tr[1]
-				if manual_tr then
+						if manual_tr then
-					if (export.remove_links(manual_tr) == export.remove_links(automated_tr)) and (not data.tr_fail) then
+							if export.remove_links(manual_tr) == export.remove_links(automated_tr) then
-						insert(categories, "Terms with redundant transliterations")
+								insert(cats, lang:getFullName() .. " terms with redundant transliterations")
-						insert(categories, "Terms with redundant transliterations/" .. data.lang:getNonEtymologicalCode())
+							else
-					elseif not data.tr_fail then
+								-- Prevents Arabic root categories from flooding the tracking categories.
-						-- Prevents Arabic root categories from flooding the tracking categories.
+								if NAMESPACE ~= "Category" then
-						if mw.title.getCurrentTitle().nsText ~= "Category" then
+									insert(cats,
-							insert(categories, "Terms with manual transliterations different from the automated ones")
+										lang:getFullName() .. " terms with non-redundant manual transliterations")
-							insert(categories, "Terms with manual transliterations different from the automated ones/" .. data.lang:getNonEtymologicalCode())
+								end
+							end
 						end
-					end
-				end
-				if (not manual_tr) or data.lang:overrideManualTranslit() then
+						if not manual_tr or lang:overrideManualTranslit(data.sc[1]) then
-					data.tr[1] = automated_tr
+							data.tr[1] = automated_tr
-					for _, category in ipairs(tr_categories) do
+						end
-						insert(categories, category)
 					end
 				end
@@ Line 809: / Line 1,341: @@
 		end
 	end
 	-- Link to the transliteration entry for languages that require this
-	if data.tr[1] and data.lang:link_tr() and not (data.tr[1]:match("%[%[(.-)%]%]") or data.tr_fail) then
+	if data.tr[1] and link_tr and not data.tr[1]:match("%[%[(.-)%]%]") then
-		data.tr[1] = export.language_link{lang = data.lang, term = data.tr[1]}
+		data.tr[1] = simple_link(
-	elseif data.tr[1] and not (data.lang:link_tr() or data.tr_fail) then
+			data.tr[1],
+			nil,
+			nil,
+			lang,
+			get_script("Latn"),
+			nil,
+			cats,
+			no_alt_ast,
+			srwc
+		)
+	elseif data.tr[1] and not link_tr then
 		-- Remove the pseudo-HTML tags added by remove_links.
 		data.tr[1] = data.tr[1]:gsub("</?link>", "")
 	end
-	if data.tr[1] and gsub(data.tr[1], "[%s%p]", ""):len() == 0 then data.tr[1] = nil end
+	if data.tr[1] and not umatch(data.tr[1], "[^%s%p]") then data.tr[1] = nil end
 	insert(output, export.format_link_annotations(data, face))
-	categories = #categories > 0 and require("Module:utilities").format_categories(categories, data.lang, "-", nil, nil, data.sc) or ""
+	if data.pretext then
+		insert(output, 1, data.pretext)
-	return concat(output) .. categories
+	end
+	if data.posttext then
+		insert(output, data.posttext)
+	end
+	local categories = cats[1] and format_categories(cats, lang, "-", nil, nil, data.sc) or ""
+	output = concat(output)
+	if show_qualifiers or data.show_qualifiers then
+		output = add_qualifiers_and_refs_to_term(data, output)
+	end
+	return output .. categories
 end
@@ Line 840: / Line 1,393: @@
 		return ""
 	end
 	text = text
 		:gsub("%[%[", "\1")
@@ Line 849: / Line 1,402: @@
 		function(c1, c2, c3)
 			-- Don't remove files.
-			for _, falsePositive in ipairs({"file", "image"}) do
+			for _, false_positive in ipairs({ "file", "image" }) do
-				if c2:lower():match("^" .. falsePositive .. ":") then return c1 .. c2 .. c3 end
+				if c2:lower():match("^" .. false_positive .. ":") then return c1 .. c2 .. c3 end
 			end
 			-- Remove categories completely.
-			for _, falsePositive in ipairs({"category", "cat"}) do
+			for _, false_positive in ipairs({ "category", "cat" }) do
-				if c2:lower():match("^" .. falsePositive .. ":") then return "" end
+				if c2:lower():match("^" .. false_positive .. ":") then return "" end
 			end
 			-- In piped links, remove all text before the pipe, unless it's the final character (i.e. the pipe trick), in which case just remove the pipe.
@@ Line 864: / Line 1,417: @@
 			end
 		end)
 	text = text
 		:gsub("\1", "[[")
@@ Line 870: / Line 1,423: @@
 	return text
-end
---[=[
-This decodes old section encodings.
-For example, Norwegian_Bokm.C3.A5l → Norwegian_Bokmål.
-It isn't picky about whether the section encodings represent the UTF-8 encoding
-of a real Unicode character, so it will mangle section names that contain
-a period followed by two uppercase hex characters. At least such section names
-are probably pretty rare.
-Wiktionary adds an additional id="" attribute for sections
-using a legacy encoding, if it is different from the modern minimally modified attribute.
-It is like percent encoding (URI or URL encoding) except with "." instead of "%".
-See [[mw:Manual:$wgFragmentMode]] and the code that does the encoding at
-https://gerrit.wikimedia.org/r/plugins/gitiles/mediawiki/core/+/7bf779524ab1fd8e1d74f79ea4840564d48eea4d/includes/parser/Sanitizer.php#893
-]=]
--- The character class %x should not be used, as it includes the characters a-f,
--- which do not occur in these anchor encodings.
-local capitalHex = "[0-9A-F]"
-local function decodeAnchor(anchor)
-	return (anchor:gsub("%.(" .. capitalHex .. capitalHex .. ")",
-		function(hexByte)
-			return string.char(tonumber(hexByte, 16))
-		end))
 end
@@ Line 903: / Line 1,430: @@
 	end
-	link = link:gsub("_", " ")
+	local target, section = get_fragment((link:gsub("_", " ")))
-	local numberSigns = select(2, link:gsub("#", ""))
-	if numberSigns > 1 then
+	if not section then
-		error("The section link should only contain one number sign (#).")
+		error("No \"#\" delineating a section name")
 	end
-	link = mw.uri.decode(link, "WIKI")
+	return simple_link(
-	local page, section = link:match("^([^#]*)#(.+)$")
+		target,
-	if page == "" then
+		section,
-		page = nil
+		target .. " §&nbsp;" .. section
-	end
+	)
-	if section then
-		section = decodeAnchor(section)
-		-- URI-encode (percent-encode) section to allow square brackets and
-		-- other dodgy characters in section name.
-		-- If not percent-encoded, they prevent the parser from creating a link.
-		-- Decode percent-encoding in the displayed text
-		if page then
-			return "[[" .. page .. "#" .. mw.uri.encode(section, "WIKI")
-				.. "|" .. page .. " §&nbsp;" .. section .. "]]"
-		else
-			return "[[#" .. mw.uri.encode(section, "WIKI")
-				.. "|§&nbsp;" .. section .. "]]"
-		end
-	else
-		error("The function “section_link” could not find a number sign marking a section name.")
-	end
 end
 return export