Module:script utilities: Difference between revisions

No edit summary
Minor optimisations.
Line 2: Line 2:


local anchors_module = "Module:anchors"
local anchors_module = "Module:anchors"
local debug_track_module = "Module:debug/track"
local links_module = "Module:links"
local links_module = "Module:links"
local munge_text_module = "Module:munge text"
local munge_text_module = "Module:munge text"
Line 50: Line 51:
process_params = require(parameters_module).process
process_params = require(parameters_module).process
return process_params(...)
return process_params(...)
end
local function track(...)
track = require(debug_track_module)
return track(...)
end
end


Line 69: Line 75:
--[==[
--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local m_data
local m_data
local function get_data()
local function get_data()
m_data, get_data = mw.loadData("Module:script utilities/data"), nil
m_data, get_data = mw.loadData("Module:script utilities/data"), nil
return m_data
return m_data
end
end


--[=[
--[=[
Line 84: Line 90:
[[Module:parameters]]
[[Module:parameters]]
[[Module:utilities]]
[[Module:utilities]]
[[Module:debug/track]]
]=]
]=]


Line 138: Line 145:
end
end


local function Kore_ruby(txt)
-- Ustring turns on the codepoint-aware string matching. The basic string function
return (ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>"))
-- should be used for simple sequences of characters, Ustring function for
-- sets – [].
local function trackPattern(text, pattern, tracking)
if pattern and umatch(text, pattern) then
track("script/" .. tracking)
end
end
 
local function track_text(text, lang, sc)
if lang and text then
local langCode = lang:getFullCode()
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/ang/acute]]
if langCode == "ang" then
local decomposed = toNFD(text)
local acute = u(0x301)
trackPattern(decomposed, acute, "ang/acute")
--[=[
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-phi]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-theta]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-kappa]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-rho]]
ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ.
]=]
elseif langCode == "el" or langCode == "grc" then
trackPattern(text, "ϑ", "Greek/wrong-theta")
trackPattern(text, "ϰ", "Greek/wrong-kappa")
trackPattern(text, "ϱ", "Greek/wrong-rho")
trackPattern(text, "ϕ", "Greek/wrong-phi")
--[=[
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/spacing-coronis]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/spacing-smooth-breathing]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/wrong-apostrophe]]
When spacing coronis and spacing smooth breathing are used as apostrophes,
they should be replaced with right single quotation marks (’).
]=]
if langCode == "grc" then
trackPattern(text, u(0x1FBD), "Ancient Greek/spacing-coronis")
trackPattern(text, u(0x1FBF), "Ancient Greek/spacing-smooth-breathing")
trackPattern(text, "[" .. u(0x1FBD) .. u(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true)
end
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Russian/grave-accent]]
elseif langCode == "ru" then
local decomposed = toNFD(text)
trackPattern(decomposed, u(0x300), "Russian/grave-accent")
 
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Chuvash/latin-homoglyph]]
elseif langCode == "cv" then
trackPattern(text, "[ĂăĔĕÇçŸÿ]", "Chuvash/latin-homoglyph")
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Tibetan/trailing-punctuation]]
elseif langCode == "bo" then
trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation")
trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation")
 
--[=[
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/broken-ae]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/broken-am]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/wrong-rue-lue]]
]=]
elseif langCode == "th" then
trackPattern(text, "เ".."เ", "Thai/broken-ae")
trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am")
trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue")
 
--[=[
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/broken-ae]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/broken-am]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-no]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-mo]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-lo]]
]=]
elseif langCode == "lo" then
trackPattern(text, "ເ".."ເ", "Lao/broken-ae")
trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am")
trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no")
trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo")
trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo")
 
--[=[
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lü/broken-ae]]
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lü/possible-wrong-sequence]]
]=]
elseif langCode == "khb" then
trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae")
trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence")
end
end
end
 
local function Kore_ruby(...)
 
-- Cache character sets on the first call.
local Hang_chars = get_script("Hang"):getCharacters()
local Hani_chars = get_script("Hani"):getCharacters()
 
-- Overwrite with the actual function, which is called directly on subsequent calls.
function Kore_ruby(txt)
return (ugsub(txt, "([%-".. Hani_chars .. "]+)%(([%-" .. Hang_chars .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>"))
end
 
return Kore_ruby(...)
end
end


Line 163: Line 276:
end
end
end
end
track_text(text, lang, sc)


-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
Line 192: Line 307:
-- By the time we are called, > signs have been converted to &gt; by a call to encode_entities() in
-- By the time we are called, > signs have been converted to &gt; by a call to encode_entities() in
-- make_link() in [[Module:links]] (near the bottom of the function).
-- make_link() in [[Module:links]] (near the bottom of the function).
display = display:gsub("&gt;", "")
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null.
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null.
display = display:gsub("g", "")
display = display:gsub("&gt;", ""):gsub("g", "")
end
if display:find("<") then
display = munge_text(display, function(txt)
txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3")
return Kore_ruby(txt)
end)
else
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3")
display = Kore_ruby(display)
end
end
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3")
display = Kore_ruby(display)
text = "[[" .. title .. "|" .. display .. "]]"
text = "[[" .. title .. "|" .. display .. "]]"
else
else
text = munge_text(text, function(txt)
text = munge_text(text, function(txt)
if lang and lang:getCode() == "okm" then
if lang and lang:getCode() == "okm" then
txt = txt:gsub("&gt;", "")
txt = txt:gsub("&gt;", ""):gsub("g", "")
txt = txt:gsub("g", "")
end
end
if txt == text then -- special case for the entire text being plain
if txt == text then -- special case for the entire text being plain
Line 210: Line 330:
txt = txt:gsub("%-(%-?)", "%1")
txt = txt:gsub("%-(%-?)", "%1")
end
end
txt = Kore_ruby(txt)
return Kore_ruby(txt)
return txt
end)
end)
end
end
Line 218: Line 337:
if sc:getCode() == "Image" then
if sc:getCode() == "Image" then
face = nil
face = nil
end
if face == "hypothetical" then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script-utilities/face/hypothetical]]
track("script-utilities/face/hypothetical")
end
end


Line 294: Line 418:


if attributes then
if attributes then
track("tag_translit/attributes")
insert(opening_tag, attributes)
insert(opening_tag, attributes)
end
end
Line 326: Line 451:


if attributes then
if attributes then
track("tag_transcription/attributes")
insert(opening_tag, attributes)
insert(opening_tag, attributes)
end
end
Line 404: Line 530:
-- If there are no non-Latin scripts, return nothing.
-- If there are no non-Latin scripts, return nothing.
if not has_nonlatin then
if not has_nonlatin and lang:getCode() ~= "und" then
return ""
return ""
end
end