Module:script utilities: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
Line 5: | Line 5: | ||
[[Module:script utilities/data]] | [[Module:script utilities/data]] | ||
[[Module:scripts]] | [[Module:scripts]] | ||
[[Module:senseid]] (only when id's present) | |||
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text) | |||
[[Module:languages]] | [[Module:languages]] | ||
[[Module:parameters]] | [[Module:parameters]] | ||
Line 11: | Line 13: | ||
function export.is_Latin_script(sc) | function export.is_Latin_script(sc) | ||
-- Latn, Latf, Latinx | -- Latn, Latf, Latinx, pjt-Latn | ||
return sc:getCode():find("Lat") and true or false | return sc:getCode():find("Lat") and true or false | ||
end | end | ||
Line 39: | Line 41: | ||
return export.tag_text(text, lang, sc, face, class) | return export.tag_text(text, lang, sc, face, class) | ||
end | |||
-- Apply a function to `text`, but not to the target of wikilinks or to HTML tags. | |||
local function munge_text(text, fn) | |||
local has_html = text:find("<") | |||
local has_two_part_link = text:find("%[%[.*|") | |||
if not has_html and not has_two_part_link then | |||
return fn(text) | |||
end | |||
local strutils = require("Module:string utilities") | |||
local function munge_text_with_html(txt) | |||
local parts = strutils.capturing_split(txt, "(<[^>]->)") | |||
for i = 1, #parts, 2 do | |||
parts[i] = fn(parts[i]) | |||
end | |||
return table.concat(parts) | |||
end | |||
if has_two_part_link then | |||
-- The hard case is when both two-part links and HTML tags occur, because crippled Lua patterns | |||
-- don't support alternation. We need to first split on two-part links (which seem more likely | |||
-- to occur), then split odd-numbered fragments on HTML tags, then apply the function to | |||
-- odd-numbered subfragments. This is unlikely to be very efficient, but should occur rarely. | |||
local parts = strutils.capturing_split(text, "(%[%[[^%[%]|]-|)") | |||
for i = 1, #parts, 2 do | |||
if has_html then | |||
parts[i] = munge_text_with_html(parts[i]) | |||
else | |||
parts[i] = fn(parts[i]) | |||
end | |||
end | |||
return table.concat(parts) | |||
else -- HTML tags only | |||
return munge_text_with_html(text) | |||
end | |||
end | end | ||
Line 48: | Line 87: | ||
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | ||
if sc and sc:getDirection() == "down" | if sc and sc:getDirection() == "down" and text:find(" ") then | ||
text = munge_text(text, function(txt) | |||
-- having extra parentheses makes sure only the first return value gets through | |||
return (txt:gsub(" +", "<br>")) | |||
end) | |||
text = | |||
end | end | ||
-- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to | |||
-- be efficient by not doing anything if no hyphens are present, and currently this is the only | |||
-- language needing such processing. | |||
if lang:getCode() == "ko" and text:find("%-") then | |||
text = munge_text(text, function(txt) | |||
-- having extra parentheses makes sure only the first return value gets through | |||
return (txt:gsub("%-", "")) | |||
end) | |||
end | |||
if sc:getCode() == "Imag" then | if sc:getCode() == "Imag" then | ||
face = nil | face = nil | ||
Line 96: | Line 119: | ||
local output = {} | local output = {} | ||
if id then | if id then | ||
table.insert(output, 'id="' .. require("Module: | table.insert(output, 'id="' .. require("Module:senseid").anchor(lang, id) .. '"') | ||
end | end | ||
Line 123: | Line 146: | ||
end | end | ||
function export.tag_translit(translit, lang, kind, attributes) | function export.tag_translit(translit, lang, kind, attributes, is_manual) | ||
if type(lang) == "table" then | if type(lang) == "table" then | ||
lang = lang.getCode and lang:getCode() | lang = lang.getCode and lang:getCode() | ||
or error(" | or error("Second argument to tag_translit should be a language code or language object.") | ||
end | end | ||
Line 135: | Line 158: | ||
table.insert(opening_tag, data.tag) | table.insert(opening_tag, data.tag) | ||
if lang == "ja" then | if lang == "ja" then | ||
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr"') | table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"') | ||
else | else | ||
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') | table.insert(opening_tag, 'lang="' .. lang .. '-Latn"') | ||
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'tr Latn"') | table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"') | ||
end | end | ||
Line 178: | Line 201: | ||
-- Add a notice to request the native script of a word | -- Add a notice to request the native script of a word | ||
function export.request_script(lang, sc) | function export.request_script(lang, sc, usex, nocat, sort_key) | ||
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') | local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') | ||
Line 219: | Line 242: | ||
end | end | ||
-- If there are non-Latin scripts, return nothing. | -- If there are no non-Latin scripts, return nothing. | ||
if not has_nonlatin then | if not has_nonlatin then | ||
return "" | return "" | ||
Line 225: | Line 248: | ||
end | end | ||
local category | local category | ||
if | if usex then | ||
category = " | category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples" | ||
else | |||
category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms" | |||
end | end | ||
return "<small>[" .. disp_script .. " needed]</small>" .. category | return "<small>[" .. disp_script .. " needed]</small>" .. | ||
(nocat and "" or require("Module:utilities").format_categories({category}, lang, sort_key)) | |||
end | end | ||
function export.template_rfscript(frame) | function export.template_rfscript(frame) | ||
params = { | |||
[1] = { required = true, default = "und" }, | |||
["sc"] = {}, | |||
["usex"] = { type = "boolean" }, | |||
["nocat"] = { type = "boolean" }, | |||
["sort"] = {}, | |||
} | |||
local args = require("Module:parameters").process(frame:getParent().args, params) | |||
local ret = export.request_script(lang, sc) | local lang = require("Module:languages").getByCode(args[1], 1) | ||
local sc = args.sc and require("Module:scripts").getByCode(args.sc, true) | |||
local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort) | |||
if ret == "" then | if ret == "" then | ||
Line 269: | Line 302: | ||
error(result) | error(result) | ||
else | else | ||
error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject: | error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getDisplayForm() .. '.', 2) | ||
end | end | ||
end | end |
Revision as of 15:17, 15 September 2021
- The following documentation is located at Module:script utilities/doc.[edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
--[=[
Modules used:
[[Module:script utilities/data]]
[[Module:scripts]]
[[Module:senseid]] (only when id's present)
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)
[[Module:languages]]
[[Module:parameters]]
[[Module:utilities]]
]=]
function export.is_Latin_script(sc)
-- Latn, Latf, Latinx, pjt-Latn
return sc:getCode():find("Lat") and true or false
end
-- Used by [[Template:lang]]
function export.lang_t(frame)
params = {
[1] = {},
[2] = { allow_empty = true, default = "" },
["sc"] = {},
["face"] = {},
["class"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local NAMESPACE = mw.title.getCurrentTitle().nsText
local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.")
lang = require("Module:languages").getByCode(lang) or require("Module:languages").err(lang, 1)
local text = args[2]
local sc = args["sc"]
sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
local face = args["face"]
return export.tag_text(text, lang, sc, face, class)
end
-- Apply a function to `text`, but not to the target of wikilinks or to HTML tags.
local function munge_text(text, fn)
local has_html = text:find("<")
local has_two_part_link = text:find("%[%[.*|")
if not has_html and not has_two_part_link then
return fn(text)
end
local strutils = require("Module:string utilities")
local function munge_text_with_html(txt)
local parts = strutils.capturing_split(txt, "(<[^>]->)")
for i = 1, #parts, 2 do
parts[i] = fn(parts[i])
end
return table.concat(parts)
end
if has_two_part_link then
-- The hard case is when both two-part links and HTML tags occur, because crippled Lua patterns
-- don't support alternation. We need to first split on two-part links (which seem more likely
-- to occur), then split odd-numbered fragments on HTML tags, then apply the function to
-- odd-numbered subfragments. This is unlikely to be very efficient, but should occur rarely.
local parts = strutils.capturing_split(text, "(%[%[[^%[%]|]-|)")
for i = 1, #parts, 2 do
if has_html then
parts[i] = munge_text_with_html(parts[i])
else
parts[i] = fn(parts[i])
end
end
return table.concat(parts)
else -- HTML tags only
return munge_text_with_html(text)
end
end
-- Wrap text in the appropriate HTML tags with language and script class.
function export.tag_text(text, lang, sc, face, class, id)
if not sc then
sc = require("Module:scripts").findBestScript(text, lang)
end
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
if sc and sc:getDirection() == "down" and text:find(" ") then
text = munge_text(text, function(txt)
-- having extra parentheses makes sure only the first return value gets through
return (txt:gsub(" +", "<br>"))
end)
end
-- Hack Korean text to remove hyphens. This should be handled in a more general fashion, but needs to
-- be efficient by not doing anything if no hyphens are present, and currently this is the only
-- language needing such processing.
if lang:getCode() == "ko" and text:find("%-") then
text = munge_text(text, function(txt)
-- having extra parentheses makes sure only the first return value gets through
return (txt:gsub("%-", ""))
end)
end
if sc:getCode() == "Imag" then
face = nil
end
local function class_attr(classes)
table.insert(classes, 1, sc:getCode())
if class and class ~= '' then
table.insert(classes, class)
end
return 'class="' .. table.concat(classes, ' ') .. '"'
end
local function tag_attr(...)
local output = {}
if id then
table.insert(output, 'id="' .. require("Module:senseid").anchor(lang, id) .. '"')
end
table.insert(output, class_attr({...}) )
if lang then
table.insert(output, 'lang="' .. lang:getCode() .. '"')
end
return table.concat(output, " ")
end
local data = mw.loadData("Module:script utilities/data").faces[face or "nil"]
local post = ""
if sc:getDirection() == "rtl" and (face == "translation" or mw.ustring.find(text, "%p$")) then
post = "‎"
end
-- Add a script wrapper
if data then
return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' .. post
else
error('Invalid script face "' .. face .. '".')
end
end
function export.tag_translit(translit, lang, kind, attributes, is_manual)
if type(lang) == "table" then
lang = lang.getCode and lang:getCode()
or error("Second argument to tag_translit should be a language code or language object.")
end
local data = mw.loadData("Module:script utilities/data").translit[kind or "default"]
local opening_tag = {}
table.insert(opening_tag, data.tag)
if lang == "ja" then
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
else
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
end
if data.dir then
table.insert(opening_tag, 'dir="' .. data.dir .. '"')
end
table.insert(opening_tag, attributes)
return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">"
end
function export.tag_transcription(transcription, lang, kind, attributes)
if type(lang) == "table" then
lang = lang.getCode and lang:getCode()
or error("Third argument to tag_translit should be a language code or language object.")
end
local data = mw.loadData("Module:script utilities/data").transcription[kind or "default"]
local opening_tag = {}
table.insert(opening_tag, data.tag)
if lang == "ja" then
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"')
else
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"')
end
if data.dir then
table.insert(opening_tag, 'dir="' .. data.dir .. '"')
end
table.insert(opening_tag, attributes)
return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "</" .. data.tag .. ">"
end
-- Add a notice to request the native script of a word
function export.request_script(lang, sc, usex, nocat, sort_key)
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
-- By default, request for "native" script
local cat_script = "native"
local disp_script = "script"
-- If the script was not specified, and the language has only one script, use that.
if not sc and #scripts == 1 then
sc = scripts[1]
end
-- Is the script known?
if sc then
-- If the script is Latin, return nothing.
if export.is_Latin_script(sc) then
return ""
end
if sc:getCode() ~= scripts[1]:getCode() then
disp_script = sc:getCanonicalName()
end
-- The category needs to be specific to script only if there is chance
-- of ambiguity. This occurs when lang=und, or when the language has
-- multiple scripts.
if lang:getCode() == "und" or scripts[2] then
cat_script = sc:getCanonicalName()
end
else
-- The script is not known.
-- Does the language have at least one non-Latin script in its list?
local has_nonlatin = false
for i, val in ipairs(scripts) do
if not export.is_Latin_script(val) then
has_nonlatin = true
break
end
end
-- If there are no non-Latin scripts, return nothing.
if not has_nonlatin then
return ""
end
end
local category
if usex then
category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples"
else
category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms"
end
return "<small>[" .. disp_script .. " needed]</small>" ..
(nocat and "" or require("Module:utilities").format_categories({category}, lang, sort_key))
end
function export.template_rfscript(frame)
params = {
[1] = { required = true, default = "und" },
["sc"] = {},
["usex"] = { type = "boolean" },
["nocat"] = { type = "boolean" },
["sort"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local lang = require("Module:languages").getByCode(args[1], 1)
local sc = args.sc and require("Module:scripts").getByCode(args.sc, true)
local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort)
if ret == "" then
error("This language is written in the Latin alphabet. It does not need a native script.")
else
return ret
end
end
function export.checkScript(text, scriptCode, result)
local scriptObject = require("Module:scripts").getByCode(scriptCode)
if not scriptObject then
error('The script code "' .. scriptCode .. '" is not recognized.')
end
local originalText = text
-- Remove non-letter characters.
text = mw.ustring.gsub(text, "[%A]", "")
-- Remove all characters of the script in question.
text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "")
if text ~= "" then
if type(result) == "string" then
error(result)
else
error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getDisplayForm() .. '.', 2)
end
end
end
return export