45,647
edits
No edit summary |
No edit summary |
||
Line 60: | Line 60: | ||
(indicating a missing code). If `not_real_lang` is given, this check is suppressed. | (indicating a missing code). If `not_real_lang` is given, this check is suppressed. | ||
]=] | ]=] | ||
-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them. | |||
local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name) | |||
local pe = require("Module:string utilities").pattern_escape | |||
local fail, cats, sections = nil, {} | |||
-- See [[Module:languages/data]]. | |||
if not find(text, "\244") or self:loadData("Module:languages/data").contiguous_substitution[self._code] then | |||
sections = {text} | |||
else | |||
sections = split(text, "\244[\128-\143][\128-\191]*", true) | |||
end | |||
for _, section in ipairs(sections) do | |||
-- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules). | |||
if gsub(section, "%s+", "") ~= "" then | |||
local sub, sub_fail, sub_cats = require("Module:languages/doSubstitutions")(section, self, sc, substitution_data, function_name) | |||
-- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text). | |||
if sub and subbedChars then | |||
local noSub | |||
for _, pattern in ipairs(require("Module:languages/data/patterns")) do | |||
if match(section, pattern .. "%z?") then | |||
noSub = true | |||
end | |||
end | |||
if not noSub then | |||
sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets, true) | |||
end | |||
end | |||
if (not sub) or sub_fail then | |||
text = sub | |||
fail = sub_fail | |||
cats = sub_cats or {} | |||
break | |||
end | |||
text = sub and gsub(text, pe(section), pe(sub), 1) or text | |||
if type(sub_cats) == "table" then | |||
for _, cat in ipairs(sub_cats) do | |||
insert(cats, cat) | |||
end | |||
end | |||
end | |||
end | |||
-- Trim, unless there are only spacing characters, while ignoring any final formatting characters. | |||
text = text and text:gsub("^([\128-\191\244]*)%s+(%S)", "%1%2") | |||
:gsub("(%S)%s+([\128-\191\244]*)$", "%1%2") | |||
-- Remove duplicate categories. | |||
if #cats > 1 then | |||
cats = remove_duplicates(cats) | |||
end | |||
return text, fail, cats, subbedChars | |||
end | |||
local function normalize(text, sc) | local function normalize(text, sc) |