Module:script utilities: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
 
Line 13: Line 13:
local require = require
local require = require
local toNFD = mw.ustring.toNFD
local toNFD = mw.ustring.toNFD
local dump = mw.dumpObject


--[==[
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function embedded_language_links(...)
local function embedded_language_links(...)
embedded_language_links = require(links_module).embedded_language_links
embedded_language_links = require(links_module).embedded_language_links
return embedded_language_links(...)
return embedded_language_links(...)
end
end
 
local function format_categories(...)
local function find_best_script_without_lang(...)
format_categories = require(utilities_module).format_categories
find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang
return format_categories(...)
return find_best_script_without_lang(...)
end
end
 
local function get_script(...)
local function format_categories(...)
get_script = require(scripts_module).getByCode
format_categories = require(utilities_module).format_categories
return get_script(...)
return format_categories(...)
end
end
 
local function language_anchor(...)
local function get_script(...)
language_anchor = require(anchors_module).language_anchor
get_script = require(scripts_module).getByCode
return language_anchor(...)
return get_script(...)
end
end
 
local function munge_text(...)
local function language_anchor(...)
munge_text = require(munge_text_module)
language_anchor = require(anchors_module).language_anchor
return munge_text(...)
return language_anchor(...)
end
end
 
local function process_params(...)
local function munge_text(...)
process_params = require(parameters_module).process
munge_text = require(munge_text_module)
return process_params(...)
return munge_text(...)
end
end
 
local function u(...)
local function process_params(...)
u = require(string_utilities_module).char
process_params = require(parameters_module).process
return u(...)
return process_params(...)
end
end
 
local function ugsub(...)
local function u(...)
ugsub = require(string_utilities_module).gsub
u = require(string_utilities_module).char
return ugsub(...)
return u(...)
end
end
 
local function umatch(...)
local function ugsub(...)
umatch = require(string_utilities_module).match
ugsub = require(string_utilities_module).gsub
return umatch(...)
return ugsub(...)
end
end
 
local function umatch(...)
umatch = require(string_utilities_module).match
return umatch(...)
end


--[==[
--[==[
Line 107: Line 113:
if sc then
if sc then
-- Track uses of sc parameter.
if sc:getCode() == lang:findBestScript(text):getCode() then
insert(cats, lang:getFullName() .. " terms with redundant script codes")
else
insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")
end
else
else
sc = lang:findBestScript(text)
sc = lang:findBestScript(text)
Line 124: Line 136:
return export.tag_text(text, lang, sc, face, class) .. cats
return export.tag_text(text, lang, sc, face, class) .. cats
end
end
end
local function Kore_ruby(txt)
return (ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>"))
end
end


Line 141: Line 157:
function export.tag_text(text, lang, sc, face, class, id)
function export.tag_text(text, lang, sc, face, class, id)
if not sc then
if not sc then
sc = lang:findBestScript(text)
if lang then
sc = lang:findBestScript(text)
else
sc = find_best_script_without_lang(text)
end
end
end
 
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
if sc:getDirection():match("vertical") and text:find(" ") then
if sc:getDirection():find("vertical", nil, true) and text:find(" ", nil, true) then
text = munge_text(text, function(txt)
text = munge_text(text, function(txt)
-- having extra parentheses makes sure only the first return value gets through
-- having extra parentheses makes sure only the first return value gets through
Line 157: Line 177:
-- language needing such processing.
-- language needing such processing.
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
if sc:getCode() == "Kore" and (text:find("-", 1, true) or text:find("[()]")) then
if sc:getCode() == "Kore" and text:match("[%-()g]") then
text = munge_text(text, function(txt)
local title, display = require("Module:links").get_wikilink_parts(text, true)
txt = txt:gsub("%-(%-?)", "%1")
if title ~= nil then -- special case that the text is a single link, do not munge and preserve affix hyphens
txt = ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")
if lang and lang:getCode() == "okm" then -- Middle Korean code from [[User:Chom.kwoy]]
return txt
-- Comment from [[User:Lunabunn]]:
end)
-- In Middle Korean orthography, syllable formation is phonemic as opposed to morpheme-boundary-based a la
-- modern Korean. As such, for example, if you were to write nam-i, it would be rendered as na.mi so if you
-- then put na-mi to indicate particle boundaries as in modern Korean, the hyphen would be misplaced.
-- Previously, this was alleviated by specialcasing na--mi but [[User:Theknightwho]] made that resolve to -
-- in the Hangul (previously we used to just delete all -s in Hangul processing), so it broke.
-- [[User:Chom.kwoy]] implemented a different solution, which is writing -> instead using however many >s to
-- shift the hyphen by that number of letters in the romanization.
-- By the time we are called, > signs have been converted to &gt; by a call to encode_entities() in
-- make_link() in [[Module:links]] (near the bottom of the function).
display = display:gsub("&gt;", "")
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null.
display = display:gsub("g", "")
end
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3")
display = Kore_ruby(display)
text = "[[" .. title .. "|" .. display .. "]]"
else
text = munge_text(text, function(txt)
if lang and lang:getCode() == "okm" then
txt = txt:gsub("&gt;", "")
txt = txt:gsub("g", "")
end
if txt == text then -- special case for the entire text being plain
txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3")
else
txt = txt:gsub("%-(%-?)", "%1")
end
txt = Kore_ruby(txt)
return txt
end)
end
end
end
 
if sc:getCode() == "Image" then
if sc:getCode() == "Image" then
face = nil
face = nil
end
end


local function class_attr(classes)
local data = (m_data or get_data()).faces[face or "plain"]
-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well)
if data == nil then
-- e.g. ota-Arab adds both Arab and ota-Arab as classes
error('Invalid script face "' .. face .. '".')
if sc:getCode():find("-", 1, true) then
insert(classes, 1, (ugsub(sc:getCode(), ".+%-", "")))
insert(classes, 2, sc:getCode())
else
insert(classes, 1, sc:getCode())
end
if class and class ~= '' then
insert(classes, class)
end
return 'class="' .. concat(classes, ' ') .. '"'
end
end
 
local function tag_attr(...)
local tag = data.tag
local output = {}
local opening_tag = {tag}
if id then
 
insert(output, 'id="' .. language_anchor(lang, id) .. '"')
if lang and id then
end
insert(opening_tag, 'id="' .. language_anchor(lang, id) .. '"')
insert(output, class_attr({...}) )
if lang then
-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the
-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about
-- [lang=...] attributes.
insert(output, 'lang="' .. lang:getFullCode() .. '"')
end
return concat(output, " ")
end
end
 
local data = (m_data or get_data()).faces[face or "plain"]
local classes = {data.class}
-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well)
-- Add a script wrapper
-- e.g. ota-Arab adds both Arab and ota-Arab as classes
if data then
if sc:getCode():find("-", nil, true) then
return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>'
insert(classes, 1, (ugsub(sc:getCode(), ".+%-", "")))
insert(classes, 2, sc:getCode())
else
else
error('Invalid script face "' .. face .. '".')
insert(classes, 1, sc:getCode())
end
if class and class ~= '' then
insert(classes, class)
end
insert(opening_tag, 'class="' .. concat(classes, ' ') .. '"')
 
-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the
-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about
-- [lang=...] attributes.
if lang then
insert(opening_tag, 'lang="' .. lang:getFullCode() .. '"')
end
end
-- Add a script wrapper
return (data.prefix or "") .. "<" .. concat(opening_tag, " ") .. ">" .. text .. "</" .. tag .. ">"
end
end


Line 229: Line 274:
or error("Second argument to tag_translit should be a language code or language object.")
or error("Second argument to tag_translit should be a language code or language object.")
end
end
 
local data = (m_data or get_data()).translit[kind or "default"]
local data = (m_data or get_data()).translit[kind or "default"]
 
local opening_tag = {}
local tag = data.tag
local opening_tag = {tag}
insert(opening_tag, data.tag)
 
local class = data.class
if lang == "ja" then
if lang == "ja" then
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
else
else
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
end
 
local dir = data.dir
if dir then
insert(opening_tag, 'dir="' .. dir .. '"')
end
end
 
if data.dir then
if attributes then
insert(opening_tag, 'dir="' .. data.dir .. '"')
insert(opening_tag, attributes)
end
end
 
insert(opening_tag, attributes)
return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. tag .. ">"
return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">"
end
end


Line 257: Line 306:
or error("Second argument to tag_transcription should be a language code or language object.")
or error("Second argument to tag_transcription should be a language code or language object.")
end
end
 
local data = (m_data or get_data()).transcription[kind or "default"]
local data = (m_data or get_data()).transcription[kind or "default"]
 
local opening_tag = {}
local tag = data.tag
local opening_tag = {tag}
insert(opening_tag, data.tag)
 
local class = data.class
if lang == "ja" then
if lang == "ja" then
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts"')
else
else
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts Latn"')
end
 
local dir = data.dir
if dir then
insert(opening_tag, 'dir="' .. dir .. '"')
end
 
if attributes then
insert(opening_tag, attributes)
end
 
return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. tag .. ">"
end
 
--[==[Tags {def} as a definition.
The <code>def</code> parameter must be one of the following:
; {{code|lua|"gloss"}}
: The text is wrapped in {{code|html|2=<span class="(mention-gloss">...</span>}}.
; {{code|lua|"non-gloss"}}
: The text is wrapped in {{code|html|2=<span class="use-with-mention">...</span>}}.
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==]
function export.tag_definition(def, kind, attributes)
local data = (m_data or get_data()).definition[kind]
if data == nil then
error("Second argument to tag_definition should specify the kind of definition from the list in [[Module:script utilities/data]].")
end
 
local tag = data.tag
local opening_tag = {tag}
 
local class = data.class
if class then
insert(opening_tag, 'class="' .. class .. '"')
end
end
 
if data.dir then
if attributes then
insert(opening_tag, 'dir="' .. data.dir .. '"')
insert(opening_tag, attributes)
end
end
 
insert(opening_tag, attributes)
return "<" .. concat(opening_tag, " ") .. ">" .. def .. "</" .. tag .. ">"
return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. data.tag .. ">"
end
end


Line 338: Line 419:


--[==[This is used by {{temp|rfscript}}. See there for more information.]==]
--[==[This is used by {{temp|rfscript}}. See there for more information.]==]
do
function export.template_rfscript(frame)
local function get_args(frame)
local boolean = {type = "boolean"}
local boolean = {type = "boolean"}
local args = process_params(frame:getParent().args, {
return process_params(frame:getParent().args, {
[1] = {required = true, type = "language", default = "und"},
[1] = {required = true, type = "language", default = "und"},
["sc"] = {type = "script"},
["sc"] = {type = "script"},
["usex"] = boolean,
["usex"] = boolean,
["quote"] = boolean,
["quote"] = boolean,
["nocat"] = boolean,
["nocat"] = boolean,
["sort"] = true,
["sort"] = true,
})
})
end
local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort)
function export.template_rfscript(frame)
if ret == "" then
local args = get_args(frame)
error("This language is written in the Latin alphabet. It does not need a native script.")
local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort)
if ret == "" then
error("This language is written in the Latin alphabet. It does not need a native script.")
else
return ret
end
end
end
return ret
end
end


Navigation menu