45,646
edits
No edit summary |
No edit summary |
||
Line 116: | Line 116: | ||
return text, fail, cats, subbedChars | return text, fail, cats, subbedChars | ||
end | |||
-- Temporarily convert various formatting characters to PUA to prevent them from being disrupted by the substitution process. | |||
local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim) | |||
-- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned. | |||
local patterns = shallowcopy(require("Module:languages/data/patterns")) | |||
if keepCarets then | |||
insert(patterns, "((\\+)%^)") | |||
insert(patterns, "((%^))") | |||
end | |||
-- Ensure any whitespace at the beginning and end is temp substituted, to prevent it from being accidentally trimmed. We only want to trim any final spaces added during the substitution process (e.g. by a module), which means we only do this during the first round of temp substitutions. | |||
if not noTrim then | |||
insert(patterns, "^([\128-\191\244]*(%s+))") | |||
insert(patterns, "((%s+)[\128-\191\244]*)$") | |||
end | |||
-- Pre-substitution, of "[[" and "]]", which makes pattern matching more accurate. | |||
text = gsub(text, "%f[%[]%[%[", "\1") | |||
:gsub("%f[%]]%]%]", "\2") | |||
local i, pe = #subbedChars, require("Module:string utilities").pattern_escape | |||
for _, pattern in ipairs(patterns) do | |||
-- Patterns ending in \0 stand are for things like "[[" or "]]"), so the inserted PUA are treated as breaks between terms by modules that scrape info from pages. | |||
local term_divider | |||
pattern = gsub(pattern, "%z$", function(divider) | |||
term_divider = divider == "\0" | |||
return "" | |||
end) | |||
text = gsub(text, pattern, function(...) | |||
local m = {...} | |||
local m1New = m[1] | |||
for k = 2, #m do | |||
local n = i + k - 1 | |||
subbedChars[n] = m[k] | |||
local byte2 = floor(n / 4096) % 64 + (term_divider and 128 or 136) | |||
local byte3 = floor(n / 64) % 64 + 128 | |||
local byte4 = n % 64 + 128 | |||
m1New = gsub(m1New, pe(m[k]), "\244" .. char(byte2) .. char(byte3) .. char(byte4), 1) | |||
end | |||
i = i + #m - 1 | |||
return m1New | |||
end) | |||
end | |||
text = gsub(text, "\1", "%[%[") | |||
:gsub("\2", "%]%]") | |||
return text, subbedChars | |||
end | |||
-- Reinsert any formatting that was temporarily substituted. | |||
local function undoTempSubstitutions(text, subbedChars) | |||
local pe = require("Module:string utilities").pattern_escape | |||
for i = 1, #subbedChars do | |||
local byte2 = floor(i / 4096) % 64 + 128 | |||
local byte3 = floor(i / 64) % 64 + 128 | |||
local byte4 = i % 64 + 128 | |||
text = gsub(text, "\244[" .. char(byte2) .. char(byte2+8) .. "]" .. char(byte3) .. char(byte4), pe(subbedChars[i])) | |||
end | |||
text = gsub(text, "\1", "%[%[") | |||
:gsub("\2", "%]%]") | |||
return text | |||
end | end | ||