45,647
edits
No edit summary |
No edit summary |
||
Line 9: | Line 9: | ||
[[Module:languages]] | [[Module:languages]] | ||
[[Module:parameters]] | [[Module:parameters]] | ||
[[Module:utilities]] | [[Module:utilities/format_categories]] | ||
]=] | ]=] | ||
function export.is_Latin_script(sc) | function export.is_Latin_script(sc) | ||
-- Latn, Latf, Latinx, pjt-Latn | -- Latn, Latf, Latinx, pjt-Latn | ||
return | return sc:getCode():find("Lat") and true or false | ||
end | end | ||
Line 43: | Line 43: | ||
end | end | ||
-- | -- Ustring turns on the codepoint-aware string matching. The basic string function | ||
local function | -- should be used for simple sequences of characters, Ustring function for | ||
local | -- sets – []. | ||
local function trackPattern(text, pattern, tracking, ustring) | |||
local find = ustring and mw.ustring.find or string.find | |||
end | |||
local function track(text, lang, sc) | |||
local U = mw.ustring.char | |||
if lang and text then | |||
local langCode = lang:getCode() | |||
-- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]] | |||
if langCode == "ang" then | |||
local decomposed = mw.ustring.toNFD(text) | |||
local acute = U(0x301) | |||
trackPattern(decomposed, acute, "ang/acute") | |||
--[=[ | |||
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]] | |||
ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ. | |||
]=] | |||
elseif langCode == "el" or langCode == "grc" then | |||
trackPattern(text, "ϑ", "Greek/wrong-theta") | |||
trackPattern(text, "ϰ", "Greek/wrong-kappa") | |||
trackPattern(text, "ϱ", "Greek/wrong-rho") | |||
trackPattern(text, "ϕ", "Greek/wrong-phi") | |||
--[=[ | |||
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]] | |||
When spacing coronis and spacing smooth breathing are used as apostrophes, | |||
they should be replaced with right single quotation marks (’). | |||
]=] | |||
if langCode == "grc" then | |||
trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis") | |||
trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing") | |||
trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true) | |||
end | |||
-- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]] | |||
elseif langCode == "ru" then | |||
local decomposed = mw.ustring.toNFD(text) | |||
trackPattern(decomposed, U(0x300), "Russian/grave-accent") | |||
-- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]] | |||
elseif langCode == "bo" then | |||
trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true) | |||
trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true) | |||
--[=[ | |||
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]] | |||
]=] | |||
elseif langCode == "th" then | |||
trackPattern(text, "เ".."เ", "Thai/broken-ae") | |||
trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true) | |||
trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true) | |||
--[=[ | |||
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-no]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-mo]] | |||
[[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-lo]] | |||
]=] | |||
elseif langCode == "lo" then | |||
trackPattern(text, "ເ".."ເ", "Lao/broken-ae") | |||
trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true) | |||
trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no") | |||
trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo") | |||
trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo") | |||
--[=[ | |||
-- | [[Special:WhatLinksHere/Template:tracking/script/Lü/broken-ae]] | ||
- | [[Special:WhatLinksHere/Template:tracking/script/Lü/possible-wrong-sequence]] | ||
-- | ]=] | ||
elseif langCode == "khb" then | |||
trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae") | |||
trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence", true) | |||
end | end | ||
end | end | ||
end | end | ||
Line 84: | Line 139: | ||
if not sc then | if not sc then | ||
sc = require("Module:scripts").findBestScript(text, lang) | sc = require("Module:scripts").findBestScript(text, lang) | ||
end | |||
track(text, lang, sc) | |||
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | |||
if sc:getDirection() == "down" and text:find(" ") then | |||
text = require("Module:munge_text")(text, function(txt) | |||
-- having extra parentheses makes sure only the first return value gets through | |||
return (txt:gsub(" +", "<br>")) | |||
end) | |||
end | end | ||
-- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to | -- Hack Korean script text to remove hyphens. | ||
-- XXX: This should be handled in a more general fashion, but needs to | |||
-- be efficient by not doing anything if no hyphens are present, and currently this is the only | -- be efficient by not doing anything if no hyphens are present, and currently this is the only | ||
-- language needing such processing. | -- language needing such processing. | ||
if | -- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]]. | ||
text = munge_text(text, function(txt) | if sc:getCode() == "Kore" and (text:find("%-") or text:find("[()]")) then | ||
-- | text = require("Module:munge_text")(text, function(txt) | ||
-- Hani/Hang regex is a reasonable subset of Hani/Hang from [[Module:scripts/data]], | |||
-- last checked on 20220221 | |||
txt = txt:gsub("%-", "") | |||
txt = mw.ustring.gsub(txt, "([一-鿿㐀-䶿𠀀-𰀀-]+)%(([가-힣ᄀ-ᇿꥠ-ꥼힰ-ퟻ]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>") | |||
return txt | |||
end) | end) | ||
end | end | ||
Line 249: | Line 319: | ||
return "<small>[" .. disp_script .. " needed]</small>" .. | return "<small>[" .. disp_script .. " needed]</small>" .. | ||
(nocat and "" or require("Module:utilities") | (nocat and "" or require("Module:utilities/format_categories")({category}, lang, sort_key)) | ||
end | end | ||