Module:script utilities: Difference between revisions

no edit summary
No edit summary
No edit summary
Line 41: Line 41:
return export.tag_text(text, lang, sc, face, class)
return export.tag_text(text, lang, sc, face, class)
end
-- Ustring turns on the codepoint-aware string matching. The basic string function
-- should be used for simple sequences of characters, Ustring function for
-- sets – [].
local function trackPattern(text, pattern, tracking, ustring)
local find = ustring and mw.ustring.find or string.find
end
local function track(text, lang, sc)
local U = mw.ustring.char
if lang and text then
local langCode = lang:getCode()
-- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]]
if langCode == "ang" then
local decomposed = mw.ustring.toNFD(text)
local acute = U(0x301)
trackPattern(decomposed, acute, "ang/acute")
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]]
ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ.
]=]
elseif langCode == "el" or langCode == "grc" then
trackPattern(text, "ϑ", "Greek/wrong-theta")
trackPattern(text, "ϰ", "Greek/wrong-kappa")
trackPattern(text, "ϱ", "Greek/wrong-rho")
trackPattern(text, "ϕ", "Greek/wrong-phi")
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]]
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]]
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]]
When spacing coronis and spacing smooth breathing are used as apostrophes,
they should be replaced with right single quotation marks (’).
]=]
if langCode == "grc" then
trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis")
trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing")
trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true)
end
-- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]]
elseif langCode == "ru" then
local decomposed = mw.ustring.toNFD(text)
trackPattern(decomposed, U(0x300), "Russian/grave-accent")
-- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]]
elseif langCode == "bo" then
trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true)
trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true)
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]]
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]]
[[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]]
]=]
elseif langCode == "th" then
trackPattern(text, "เ".."เ", "Thai/broken-ae")
trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true)
trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true)
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]]
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]]
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-no]]
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-mo]]
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-lo]]
]=]
elseif langCode == "lo" then
trackPattern(text, "ເ".."ເ", "Lao/broken-ae")
trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true)
trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no")
trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo")
trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo")
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Lü/broken-ae]]
[[Special:WhatLinksHere/Template:tracking/script/Lü/possible-wrong-sequence]]
]=]
elseif langCode == "khb" then
trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae")
trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence", true)
end
end
end
end


Line 140: Line 48:
sc = require("Module:scripts").findBestScript(text, lang)
sc = require("Module:scripts").findBestScript(text, lang)
end
end
track(text, lang, sc)
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.