Module:script utilities: Difference between revisions
No edit summary |
Minor optimisations. |
||
| Line 2: | Line 2: | ||
local anchors_module = "Module:anchors" | local anchors_module = "Module:anchors" | ||
local debug_track_module = "Module:debug/track" | |||
local links_module = "Module:links" | local links_module = "Module:links" | ||
local munge_text_module = "Module:munge text" | local munge_text_module = "Module:munge text" | ||
| Line 50: | Line 51: | ||
process_params = require(parameters_module).process | process_params = require(parameters_module).process | ||
return process_params(...) | return process_params(...) | ||
end | |||
local function track(...) | |||
track = require(debug_track_module) | |||
return track(...) | |||
end | end | ||
| Line 69: | Line 75: | ||
--[==[ | --[==[ | ||
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | ||
local m_data | |||
local function get_data() | |||
m_data, get_data = mw.loadData("Module:script utilities/data"), nil | |||
return m_data | |||
end | |||
--[=[ | --[=[ | ||
| Line 84: | Line 90: | ||
[[Module:parameters]] | [[Module:parameters]] | ||
[[Module:utilities]] | [[Module:utilities]] | ||
[[Module:debug/track]] | |||
]=] | ]=] | ||
| Line 138: | Line 145: | ||
end | end | ||
local function | -- Ustring turns on the codepoint-aware string matching. The basic string function | ||
-- should be used for simple sequences of characters, Ustring function for | |||
-- sets – []. | |||
local function trackPattern(text, pattern, tracking) | |||
if pattern and umatch(text, pattern) then | |||
track("script/" .. tracking) | |||
end | |||
end | |||
local function track_text(text, lang, sc) | |||
if lang and text then | |||
local langCode = lang:getFullCode() | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/ang/acute]] | |||
if langCode == "ang" then | |||
local decomposed = toNFD(text) | |||
local acute = u(0x301) | |||
trackPattern(decomposed, acute, "ang/acute") | |||
--[=[ | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-phi]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-theta]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-kappa]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Greek/wrong-rho]] | |||
ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ. | |||
]=] | |||
elseif langCode == "el" or langCode == "grc" then | |||
trackPattern(text, "ϑ", "Greek/wrong-theta") | |||
trackPattern(text, "ϰ", "Greek/wrong-kappa") | |||
trackPattern(text, "ϱ", "Greek/wrong-rho") | |||
trackPattern(text, "ϕ", "Greek/wrong-phi") | |||
--[=[ | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/spacing-coronis]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/spacing-smooth-breathing]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Ancient Greek/wrong-apostrophe]] | |||
When spacing coronis and spacing smooth breathing are used as apostrophes, | |||
they should be replaced with right single quotation marks (’). | |||
]=] | |||
if langCode == "grc" then | |||
trackPattern(text, u(0x1FBD), "Ancient Greek/spacing-coronis") | |||
trackPattern(text, u(0x1FBF), "Ancient Greek/spacing-smooth-breathing") | |||
trackPattern(text, "[" .. u(0x1FBD) .. u(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true) | |||
end | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Russian/grave-accent]] | |||
elseif langCode == "ru" then | |||
local decomposed = toNFD(text) | |||
trackPattern(decomposed, u(0x300), "Russian/grave-accent") | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Chuvash/latin-homoglyph]] | |||
elseif langCode == "cv" then | |||
trackPattern(text, "[ĂăĔĕÇçŸÿ]", "Chuvash/latin-homoglyph") | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script/Tibetan/trailing-punctuation]] | |||
elseif langCode == "bo" then | |||
trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation") | |||
trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation") | |||
--[=[ | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/broken-ae]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/broken-am]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Thai/wrong-rue-lue]] | |||
]=] | |||
elseif langCode == "th" then | |||
trackPattern(text, "เ".."เ", "Thai/broken-ae") | |||
trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am") | |||
trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue") | |||
--[=[ | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/broken-ae]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/broken-am]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-no]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-mo]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lao/possible-broken-ho-lo]] | |||
]=] | |||
elseif langCode == "lo" then | |||
trackPattern(text, "ເ".."ເ", "Lao/broken-ae") | |||
trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am") | |||
trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no") | |||
trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo") | |||
trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo") | |||
--[=[ | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lü/broken-ae]] | |||
[[Special:WhatLinksHere/Wiktionary:Tracking/script/Lü/possible-wrong-sequence]] | |||
]=] | |||
elseif langCode == "khb" then | |||
trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae") | |||
trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence") | |||
end | |||
end | |||
end | |||
local function Kore_ruby(...) | |||
-- Cache character sets on the first call. | |||
local Hang_chars = get_script("Hang"):getCharacters() | |||
local Hani_chars = get_script("Hani"):getCharacters() | |||
-- Overwrite with the actual function, which is called directly on subsequent calls. | |||
function Kore_ruby(txt) | |||
return (ugsub(txt, "([%-".. Hani_chars .. "]+)%(([%-" .. Hang_chars .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")) | |||
end | |||
return Kore_ruby(...) | |||
end | end | ||
| Line 163: | Line 276: | ||
end | end | ||
end | end | ||
track_text(text, lang, sc) | |||
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | ||
| Line 192: | Line 307: | ||
-- By the time we are called, > signs have been converted to > by a call to encode_entities() in | -- By the time we are called, > signs have been converted to > by a call to encode_entities() in | ||
-- make_link() in [[Module:links]] (near the bottom of the function). | -- make_link() in [[Module:links]] (near the bottom of the function). | ||
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null. | -- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null. | ||
display = display:gsub("g", "") | display = display:gsub(">", ""):gsub("g", "") | ||
end | |||
if display:find("<") then | |||
display = munge_text(display, function(txt) | |||
txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3") | |||
return Kore_ruby(txt) | |||
end) | |||
else | |||
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3") | |||
display = Kore_ruby(display) | |||
end | end | ||
text = "[[" .. title .. "|" .. display .. "]]" | text = "[[" .. title .. "|" .. display .. "]]" | ||
else | else | ||
text = munge_text(text, function(txt) | text = munge_text(text, function(txt) | ||
if lang and lang:getCode() == "okm" then | if lang and lang:getCode() == "okm" then | ||
txt = txt:gsub(">", "") | txt = txt:gsub(">", ""):gsub("g", "") | ||
end | end | ||
if txt == text then -- special case for the entire text being plain | if txt == text then -- special case for the entire text being plain | ||
| Line 210: | Line 330: | ||
txt = txt:gsub("%-(%-?)", "%1") | txt = txt:gsub("%-(%-?)", "%1") | ||
end | end | ||
return Kore_ruby(txt) | |||
end) | end) | ||
end | end | ||
| Line 218: | Line 337: | ||
if sc:getCode() == "Image" then | if sc:getCode() == "Image" then | ||
face = nil | face = nil | ||
end | |||
if face == "hypothetical" then | |||
-- [[Special:WhatLinksHere/Wiktionary:Tracking/script-utilities/face/hypothetical]] | |||
track("script-utilities/face/hypothetical") | |||
end | end | ||
| Line 294: | Line 418: | ||
if attributes then | if attributes then | ||
track("tag_translit/attributes") | |||
insert(opening_tag, attributes) | insert(opening_tag, attributes) | ||
end | end | ||
| Line 326: | Line 451: | ||
if attributes then | if attributes then | ||
track("tag_transcription/attributes") | |||
insert(opening_tag, attributes) | insert(opening_tag, attributes) | ||
end | end | ||
| Line 404: | Line 530: | ||
-- If there are no non-Latin scripts, return nothing. | -- If there are no non-Latin scripts, return nothing. | ||
if not has_nonlatin then | if not has_nonlatin and lang:getCode() ~= "und" then | ||
return "" | return "" | ||
end | end | ||