|
|
Line 9: |
Line 9: |
| [[Module:languages]] | | [[Module:languages]] |
| [[Module:parameters]] | | [[Module:parameters]] |
| [[Module:utilities/format_categories]] | | [[Module:utilities]] |
| ]=] | | ]=] |
|
| |
|
| function export.is_Latin_script(sc) | | function export.is_Latin_script(sc) |
| -- Latn, Latf, Latinx, pjt-Latn | | -- Latn, Latf, Latinx, pjt-Latn |
| return sc:getCode():find("Lat") and true or false | | return mw.ustring.find(require("Module:scripts").getByCode(sc)["_code"], "Lat") and true or false |
| end | | end |
|
| |
|
Line 41: |
Line 41: |
| | | |
| return export.tag_text(text, lang, sc, face, class) | | return export.tag_text(text, lang, sc, face, class) |
| end
| |
|
| |
| -- Ustring turns on the codepoint-aware string matching. The basic string function
| |
| -- should be used for simple sequences of characters, Ustring function for
| |
| -- sets – [].
| |
| local function trackPattern(text, pattern, tracking, ustring)
| |
| local find = ustring and mw.ustring.find or string.find
| |
| end
| |
|
| |
| local function track(text, lang, sc)
| |
| local U = mw.ustring.char
| |
|
| |
| if lang and text then
| |
| local langCode = lang:getCode()
| |
|
| |
| -- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]]
| |
| if langCode == "ang" then
| |
| local decomposed = mw.ustring.toNFD(text)
| |
| local acute = U(0x301)
| |
|
| |
| trackPattern(decomposed, acute, "ang/acute")
| |
|
| |
| --[=[
| |
| [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]]
| |
| ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ.
| |
| ]=]
| |
| elseif langCode == "el" or langCode == "grc" then
| |
| trackPattern(text, "ϑ", "Greek/wrong-theta")
| |
| trackPattern(text, "ϰ", "Greek/wrong-kappa")
| |
| trackPattern(text, "ϱ", "Greek/wrong-rho")
| |
| trackPattern(text, "ϕ", "Greek/wrong-phi")
| |
|
| |
| --[=[
| |
| [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]]
| |
| When spacing coronis and spacing smooth breathing are used as apostrophes,
| |
| they should be replaced with right single quotation marks (’).
| |
| ]=]
| |
| if langCode == "grc" then
| |
| trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis")
| |
| trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing")
| |
| trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true)
| |
| end
| |
|
| |
| -- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]]
| |
| elseif langCode == "ru" then
| |
| local decomposed = mw.ustring.toNFD(text)
| |
|
| |
| trackPattern(decomposed, U(0x300), "Russian/grave-accent")
| |
|
| |
| -- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]]
| |
| elseif langCode == "bo" then
| |
| trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true)
| |
| trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true)
| |
|
| |
| --[=[
| |
| [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]]
| |
| ]=]
| |
| elseif langCode == "th" then
| |
| trackPattern(text, "เ".."เ", "Thai/broken-ae")
| |
| trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true)
| |
| trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true)
| |
|
| |
| --[=[
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-no]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-mo]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lao/possible-broken-ho-lo]]
| |
| ]=]
| |
| elseif langCode == "lo" then
| |
| trackPattern(text, "ເ".."ເ", "Lao/broken-ae")
| |
| trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true)
| |
| trackPattern(text, "ຫນ", "Lao/possible-broken-ho-no")
| |
| trackPattern(text, "ຫມ", "Lao/possible-broken-ho-mo")
| |
| trackPattern(text, "ຫລ", "Lao/possible-broken-ho-lo")
| |
|
| |
| --[=[
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lü/broken-ae]]
| |
| [[Special:WhatLinksHere/Template:tracking/script/Lü/possible-wrong-sequence]]
| |
| ]=]
| |
| elseif langCode == "khb" then
| |
| trackPattern(text, "ᦵ".."ᦵ", "Lü/broken-ae")
| |
| trackPattern(text, "[ᦀ-ᦫ][ᦵᦶᦷᦺ]", "Lü/possible-wrong-sequence", true)
| |
| end
| |
| end
| |
| end | | end |
|
| |
|
Line 176: |
Line 84: |
| if not sc then | | if not sc then |
| sc = require("Module:scripts").findBestScript(text, lang) | | sc = require("Module:scripts").findBestScript(text, lang) |
| end
| |
|
| |
| track(text, lang, sc)
| |
|
| |
| -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
| |
| if sc and sc:getDirection() == "down" and text:find(" ") then
| |
| text = munge_text(text, function(txt)
| |
| -- having extra parentheses makes sure only the first return value gets through
| |
| return (txt:gsub(" +", "<br>"))
| |
| end)
| |
| end | | end |
|
| |
|
| -- Hack Korean, Middle Korean, Jeju text to remove hyphens. | | -- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to |
| -- This should be handled in a more general fashion, but needs to
| |
| -- be efficient by not doing anything if no hyphens are present, and currently this is the only | | -- be efficient by not doing anything if no hyphens are present, and currently this is the only |
| -- language needing such processing. | | -- language needing such processing. |
| local lang_code = lang:getCode() | | if lang:getCode() == "ko" and text:find("%-") then |
| if (lang_code == "ko" or lang_code == "okm" or lang_code == "jje") and text:find("%-") then
| |
| text = munge_text(text, function(txt) | | text = munge_text(text, function(txt) |
| -- having extra parentheses makes sure only the first return value gets through | | -- having extra parentheses makes sure only the first return value gets through |
Line 353: |
Line 249: |
| | | |
| return "<small>[" .. disp_script .. " needed]</small>" .. | | return "<small>[" .. disp_script .. " needed]</small>" .. |
| (nocat and "" or require("Module:utilities/format_categories")({category}, lang, sort_key)) | | (nocat and "" or require("Module:utilities").format_categories({category}, lang, sort_key)) |
| end | | end |
|
| |
|