Module:languages: Difference between revisions

no edit summary
No edit summary
No edit summary
Line 60: Line 60:
(indicating a missing code). If `not_real_lang` is given, this check is suppressed.
(indicating a missing code). If `not_real_lang` is given, this check is suppressed.
]=]
]=]
-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them.
local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name)
local pe = require("Module:string utilities").pattern_escape
local fail, cats, sections = nil, {}
-- See [[Module:languages/data]].
if not find(text, "\244") or self:loadData("Module:languages/data").contiguous_substitution[self._code] then
sections = {text}
else
sections = split(text, "\244[\128-\143][\128-\191]*", true)
end
for _, section in ipairs(sections) do
-- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules).
if gsub(section, "%s+", "") ~= "" then
local sub, sub_fail, sub_cats = require("Module:languages/doSubstitutions")(section, self, sc, substitution_data, function_name)
-- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text).
if sub and subbedChars then
local noSub
for _, pattern in ipairs(require("Module:languages/data/patterns")) do
if match(section, pattern .. "%z?") then
noSub = true
end
end
if not noSub then
sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets, true)
end
end
if (not sub) or sub_fail then
text = sub
fail = sub_fail
cats = sub_cats or {}
break
end
text = sub and gsub(text, pe(section), pe(sub), 1) or text
if type(sub_cats) == "table" then
for _, cat in ipairs(sub_cats) do
insert(cats, cat)
end
end
end
end
-- Trim, unless there are only spacing characters, while ignoring any final formatting characters.
text = text and text:gsub("^([\128-\191\244]*)%s+(%S)", "%1%2")
:gsub("(%S)%s+([\128-\191\244]*)$", "%1%2")
-- Remove duplicate categories.
if #cats > 1 then
cats = remove_duplicates(cats)
end
return text, fail, cats, subbedChars
end


local function normalize(text, sc)
local function normalize(text, sc)