Module:script utilities: Difference between revisions

Jump to navigation Jump to search
no edit summary
(Created page with "local export = {} --[=[ Modules used: Module:script utilities/data Module:scripts Module:languages Module:parameters Module:utilities [[Module:debu...")
 
No edit summary
(11 intermediate revisions by the same user not shown)
Line 5: Line 5:
[[Module:script utilities/data]]
[[Module:script utilities/data]]
[[Module:scripts]]
[[Module:scripts]]
[[Module:senseid]] (only when id's present)
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)
[[Module:languages]]
[[Module:languages]]
[[Module:parameters]]
[[Module:parameters]]
[[Module:utilities]]
[[Module:utilities]]
[[Module:debug]]
]=]
]=]


function export.is_Latin_script(sc)
function export.is_Latin_script(sc)
-- Latn, Latf, Latinx, nv-Latn, pjt-Latn
-- Latn, Latf, Latinx, pjt-Latn
return sc:getCode():find("Lat") and true or false
return mw.ustring.find(require("Module:scripts").getByCode(sc)["_code"], "Lat") and true or false
end
end


Line 42: Line 43:
end
end


-- Ustring turns on the codepoint-aware string matching. The basic string function
-- Apply a function to `text`, but not to the target of wikilinks or to HTML tags.
-- should be used for simple sequences of characters, Ustring function for
local function munge_text(text, fn)
-- sets – [].
local has_html = text:find("<")
local function trackPattern(text, pattern, tracking, ustring)
local has_two_part_link = text:find("%[%[.*|")
local find = ustring and mw.ustring.find or string.find
if not has_html and not has_two_part_link then
if pattern and find(text, pattern) then
return fn(text)
require("Module:debug").track("script/" .. tracking)
end
end
end


local function track(text, lang, sc)
local strutils = require("Module:string utilities")
local U = mw.ustring.char
if lang and text then
local langCode = lang:getCode()
-- [[Special:WhatLinksHere/Template:tracking/script/ang/acute]]
if langCode == "ang" then
local decomposed = mw.ustring.toNFD(text)
local acute = U(0x301)
trackPattern(decomposed, acute, "ang/acute")
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-phi]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-theta]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-kappa]]
[[Special:WhatLinksHere/Template:tracking/script/Greek/wrong-rho]]
ϑ, ϰ, ϱ, ϕ should generally be replaced with θ, κ, ρ, φ.
]=]
elseif langCode == "el" or langCode == "grc" then
trackPattern(text, "ϑ", "Greek/wrong-theta")
trackPattern(text, "ϰ", "Greek/wrong-kappa")
trackPattern(text, "ϱ", "Greek/wrong-rho")
trackPattern(text, "ϕ", "Greek/wrong-phi")
--[=[
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-coronis]]
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/spacing-smooth-breathing]]
[[Special:WhatLinksHere/Template:tracking/script/Ancient Greek/wrong-apostrophe]]
When spacing coronis and spacing smooth breathing are used as apostrophes,
they should be replaced with right single quotation marks (’).
]=]
if langCode == "grc" then
trackPattern(text, U(0x1FBD), "Ancient Greek/spacing-coronis")
trackPattern(text, U(0x1FBF), "Ancient Greek/spacing-smooth-breathing")
trackPattern(text, "[" .. U(0x1FBD) .. U(0x1FBF) .. "]", "Ancient Greek/wrong-apostrophe", true)
end
-- [[Special:WhatLinksHere/Template:tracking/script/Russian/grave-accent]]
elseif langCode == "ru" then
local decomposed = mw.ustring.toNFD(text)
trackPattern(decomposed, U(0x300), "Russian/grave-accent")
-- [[Special:WhatLinksHere/Template:tracking/script/Tibetan/trailing-punctuation]]
elseif langCode == "bo" then
trackPattern(text, "[་།]$", "Tibetan/trailing-punctuation", true)
trackPattern(text, "[་།]%]%]$", "Tibetan/trailing-punctuation", true)


--[=[
local function munge_text_with_html(txt)
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-ae]]
local parts = strutils.capturing_split(txt, "(<[^>]->)")
[[Special:WhatLinksHere/Template:tracking/script/Thai/broken-am]]
for i = 1, #parts, 2 do
[[Special:WhatLinksHere/Template:tracking/script/Thai/wrong-rue-lue]]
parts[i] = fn(parts[i])
]=]
end
elseif langCode == "th" then
return table.concat(parts)
trackPattern(text, "เ".."เ", "Thai/broken-ae")
end
trackPattern(text, "ํ[่้๊๋]?า", "Thai/broken-am", true)
trackPattern(text, "[ฤฦ]า", "Thai/wrong-rue-lue", true)


--[=[
if has_two_part_link then
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-ae]]
-- The hard case is when both two-part links and HTML tags occur, because crippled Lua patterns
[[Special:WhatLinksHere/Template:tracking/script/Lao/broken-am]]
-- don't support alternation. We need to first split on two-part links (which seem more likely
]=]
-- to occur), then split odd-numbered fragments on HTML tags, then apply the function to
elseif langCode == "lo" then
-- odd-numbered subfragments. This is unlikely to be very efficient, but should occur rarely.
trackPattern(text, "ເ".."ເ", "Lao/broken-ae")
local parts = strutils.capturing_split(text, "(%[%[[^%[%]|]-|)")
trackPattern(text, "ໍ[່້໊໋]?າ", "Lao/broken-am", true)
for i = 1, #parts, 2 do
if has_html then
parts[i] = munge_text_with_html(parts[i])
else
parts[i] = fn(parts[i])
end
end
end
return table.concat(parts)
else -- HTML tags only
return munge_text_with_html(text)
end
end
end
end
Line 127: Line 84:
if not sc then
if not sc then
sc = require("Module:scripts").findBestScript(text, lang)
sc = require("Module:scripts").findBestScript(text, lang)
end
-- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to
-- be efficient by not doing anything if no hyphens are present, and currently this is the only
-- language needing such processing.
if lang:getCode() == "ko" and text:find("%-") then
text = munge_text(text, function(txt)
-- having extra parentheses makes sure only the first return value gets through
return (txt:gsub("%-", ""))
end)
end
end
track(text, lang, sc)
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
if sc and sc:getDirection() == "down" then
--[[ First, escape targets of wikilinks and HTML tags,
which should not have their spaces
replaced with line breaks. ]]
local escaped = {}
local i = 0
local escape_format = "$%d"
local function escape(text, pattern, prefix)
return text:gsub(
pattern,
function(item)
i = i + 1
escaped[i] = item
return (prefix or "") .. escape_format:format(i)
end)
end
text = escape(text, "%[%[([^|]+|)", "[[")
text = escape(text, "<[^>]+>")
text = text:gsub(" +", "<br>")
-- Unescape whatever was escaped.
text = text:gsub(
"$(%d)",
function(a)
a = tonumber(a)
return escaped[a]
end
)
end
if sc:getCode() == "Imag" then
if sc:getCode() == "Imag" then
face = nil
face = nil
Line 180: Line 111:
local output = {}
local output = {}
if id then
if id then
table.insert(output, 'id="' .. require("Module:utilities").make_id(lang, id) .. '"')
table.insert(output, 'id="' .. require("Module:senseid").anchor(lang, id) .. '"')
end
end
Line 190: Line 121:
return table.concat(output, " ")
return table.concat(output, " ")
end
if face == "hypothetical" then
-- [[Special:WhatLinksHere/Template:tracking/script-utilities/face/hypothetical]]
require("Module:debug").track("script-utilities/face/hypothetical")
end
end
Line 212: Line 138:
end
end


function export.tag_translit(translit, lang, kind, attributes)
function export.tag_translit(translit, lang, kind, attributes, is_manual)
if type(lang) == "table" then
if type(lang) == "table" then
lang = lang.getCode and lang:getCode()
lang = lang.getCode and lang:getCode()
or error("Third argument to tag_translit should be a language code or language object.")
or error("Second argument to tag_translit should be a language code or language object.")
end
end
Line 224: Line 150:
table.insert(opening_tag, data.tag)
table.insert(opening_tag, data.tag)
if lang == "ja" then
if lang == "ja" then
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
else
else
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
end
end
Line 267: Line 193:


-- Add a notice to request the native script of a word
-- Add a notice to request the native script of a word
function export.request_script(lang, sc)
function export.request_script(lang, sc, usex, nocat, sort_key)
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
Line 308: Line 234:
end
end
-- If there are non-Latin scripts, return nothing.
-- If there are no non-Latin scripts, return nothing.
if not has_nonlatin then
if not has_nonlatin then
return ""
return ""
Line 314: Line 240:
end
end
local category = ""
local category
if mw.title.getCurrentTitle().nsText ~= "Template" then
if usex then
category = "[[Category:" .. lang:getCanonicalName() .. " terms needing " .. cat_script .. " script]]"
category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples"
else
category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms"
end
end
return "<small>[" .. disp_script .. " needed]</small>" .. category
return "<small>[" .. disp_script .. " needed]</small>" ..
(nocat and "" or require("Module:utilities").format_categories({category}, lang, sort_key))
end
end


function export.template_rfscript(frame)
function export.template_rfscript(frame)
local args = frame.args
params = {
local lang = args[1] or error("The first parameter (language code) has not been given")
[1] = { required = true, default = "und" },
local sc = args["sc"]; if sc == "" then sc = nil end
["sc"] = {},
lang = require("Module:languages").getByCode(lang) or error("The language code \"" .. lang .. "\" is not valid.")
["usex"] = { type = "boolean" },
sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
["nocat"] = { type = "boolean" },
["sort"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local ret = export.request_script(lang, sc)
local lang = require("Module:languages").getByCode(args[1], 1)
local sc = args.sc and require("Module:scripts").getByCode(args.sc, true)
 
local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort)
if ret == "" then
if ret == "" then
Line 358: Line 294:
error(result)
error(result)
else
else
error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getCategoryName() .. '.', 2)
error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getDisplayForm() .. '.', 2)
end
end
end
end

Navigation menu