47,697
edits
![]() | We're back! Sorry, bad combo of sickness, funeral and a month-long trip abroad. The site is back now. ![]() |
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local anchors_module = "Module:anchors" | |||
local links_module = "Module:links" | |||
local munge_text_module = "Module:munge text" | |||
local parameters_module = "Module:parameters" | |||
local scripts_module = "Module:scripts" | |||
local string_utilities_module = "Module:string utilities" | |||
local utilities_module = "Module:utilities" | |||
local concat = table.concat | |||
local insert = table.insert | |||
local require = require | |||
local toNFD = mw.ustring.toNFD | |||
--[==[ | |||
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | |||
local function embedded_language_links(...) | |||
embedded_language_links = require(links_module).embedded_language_links | |||
return embedded_language_links(...) | |||
end | |||
local function format_categories(...) | |||
format_categories = require(utilities_module).format_categories | |||
return format_categories(...) | |||
end | |||
local function get_script(...) | |||
get_script = require(scripts_module).getByCode | |||
return get_script(...) | |||
end | |||
local function language_anchor(...) | |||
language_anchor = require(anchors_module).language_anchor | |||
return language_anchor(...) | |||
end | |||
local function munge_text(...) | |||
munge_text = require(munge_text_module) | |||
return munge_text(...) | |||
end | |||
local function process_params(...) | |||
process_params = require(parameters_module).process | |||
return process_params(...) | |||
end | |||
local function u(...) | |||
u = require(string_utilities_module).char | |||
return u(...) | |||
end | |||
local function ugsub(...) | |||
ugsub = require(string_utilities_module).gsub | |||
return ugsub(...) | |||
end | |||
local function umatch(...) | |||
umatch = require(string_utilities_module).match | |||
return umatch(...) | |||
end | |||
--[==[ | |||
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==] | |||
local m_data | |||
local function get_data() | |||
m_data, get_data = mw.loadData("Module:script utilities/data"), nil | |||
return m_data | |||
end | |||
--[=[ | --[=[ | ||
Line 5: | Line 73: | ||
[[Module:script utilities/data]] | [[Module:script utilities/data]] | ||
[[Module:scripts]] | [[Module:scripts]] | ||
[[Module: | [[Module:anchors]] (only when IDs present) | ||
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text) | [[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text) | ||
[[Module:languages]] | [[Module:languages]] | ||
[[Module:parameters]] | [[Module:parameters]] | ||
[[Module:utilities | [[Module:utilities]] | ||
]=] | ]=] | ||
function export.is_Latin_script(sc) | function export.is_Latin_script(sc) | ||
-- Latn, Latf, | -- Latn, Latf, Latg, pjt-Latn | ||
return sc:getCode():find("Lat") and true or false | return sc:getCode():find("Lat") and true or false | ||
end | end | ||
-- | --[==[{{temp|#invoke:script utilities|lang_t}} | ||
function | This is used by {{temp|lang}} to wrap portions of text in a language tag. See there for more information.]==] | ||
do | |||
local function get_args(frame) | |||
return process_params(frame:getParent().args, { | |||
[1] = {required = true, type = "language", default = "und"}, | |||
[2] = {required = true, allow_empty = true, default = ""}, | |||
["sc"] = {type = "script"}, | |||
["face"] = true, | |||
["class"] = true, | |||
}) | |||
end | |||
function export.lang_t(frame) | |||
local args = get_args(frame) | |||
local lang = args[1] | |||
local sc = args["sc"] | |||
local text = args[2] | |||
local cats = {} | |||
if sc then | |||
else | |||
sc = lang:findBestScript(text) | |||
end | |||
text = embedded_language_links{ | |||
term = text, | |||
lang = lang, | |||
sc = sc | |||
} | |||
cats = #cats > 0 and format_categories(cats, lang, "-", nil, nil, sc) or "" | |||
local face = args["face"] | |||
local class = args["class"] | |||
return export.tag_text(text, lang, sc, face, class) .. cats | |||
end | end | ||
end | end | ||
-- | --[==[Wraps the given text in HTML tags with appropriate CSS classes (see [[WT:CSS]]) for the [[Module:languages#Language objects|language]] and script. This is required for all non-English text on Wiktionary. | ||
The actual tags and CSS classes that are added are determined by the <code>face</code> parameter. It can be one of the following: | |||
; {{code|lua|"term"}} | |||
: The text is wrapped in {{code|html|2=<i class="(sc) mention" lang="(lang)">...</i>}}. | |||
; {{code|lua|"head"}} | |||
: The text is wrapped in {{code|html|2=<strong class="(sc) headword" lang="(lang)">...</strong>}}. | |||
; {{code|lua|"hypothetical"}} | |||
: The text is wrapped in {{code|html|2=<span class="hypothetical-star">*</span><i class="(sc) hypothetical" lang="(lang)">...</i>}}. | |||
; {{code|lua|"bold"}} | |||
: The text is wrapped in {{code|html|2=<b class="(sc)" lang="(lang)">...</b>}}. | |||
; {{code|lua|nil}} | |||
: The text is wrapped in {{code|html|2=<span class="(sc)" lang="(lang)">...</span>}}. | |||
The optional <code>class</code> parameter can be used to specify an additional CSS class to be added to the tag.]==] | |||
function export.tag_text(text, lang, sc, face, class, id) | function export.tag_text(text, lang, sc, face, class, id) | ||
if not sc then | if not sc then | ||
sc = | sc = lang:findBestScript(text) | ||
end | end | ||
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | ||
if sc:getDirection():match("vertical") and text:find(" ") then | |||
text = | text = munge_text(text, function(txt) | ||
-- having extra parentheses makes sure only the first return value gets through | -- having extra parentheses makes sure only the first return value gets through | ||
return (txt:gsub(" +", "<br>")) | return (txt:gsub(" +", "<br>")) | ||
end) | end) | ||
end | end | ||
-- Hack Korean script text to remove hyphens. | -- Hack Korean script text to remove hyphens. | ||
-- | -- FIXME: This should be handled in a more general fashion, but needs to | ||
-- be efficient by not doing anything if no hyphens are present, and currently this is the only | -- be efficient by not doing anything if no hyphens are present, and currently this is the only | ||
-- language needing such processing. | -- language needing such processing. | ||
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]]. | -- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]]. | ||
if sc:getCode() == "Kore" and (text:find(" | if sc:getCode() == "Kore" and (text:find("-", 1, true) or text:find("[()]")) then | ||
text = | text = munge_text(text, function(txt) | ||
txt = txt:gsub("%-(%-?)", "%1") | |||
txt = ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>") | |||
txt = txt:gsub("%-", "") | |||
txt = | |||
return txt | return txt | ||
end) | end) | ||
end | end | ||
if sc:getCode() == " | if sc:getCode() == "Image" then | ||
face = nil | face = nil | ||
end | end | ||
local function class_attr(classes) | local function class_attr(classes) | ||
-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well) | |||
-- e.g. ota-Arab adds both Arab and ota-Arab as classes | |||
if sc:getCode():find("-", 1, true) then | |||
insert(classes, 1, (ugsub(sc:getCode(), ".+%-", ""))) | |||
insert(classes, 2, sc:getCode()) | |||
else | |||
insert(classes, 1, sc:getCode()) | |||
end | |||
if class and class ~= '' then | if class and class ~= '' then | ||
insert(classes, class) | |||
end | end | ||
return 'class="' . | return 'class="' .. concat(classes, ' ') .. '"' | ||
end | end | ||
Line 95: | Line 187: | ||
local output = {} | local output = {} | ||
if id then | if id then | ||
insert(output, 'id="' .. language_anchor(lang, id) .. '"') | |||
end | end | ||
insert(output, class_attr({...}) ) | |||
if lang then | if lang then | ||
-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the | |||
-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about | |||
-- [lang=...] attributes. | |||
insert(output, 'lang="' .. lang:getFullCode() .. '"') | |||
end | end | ||
return | return concat(output, " ") | ||
end | end | ||
local data = | local data = (m_data or get_data()).faces[face or "plain"] | ||
-- Add a script wrapper | -- Add a script wrapper | ||
if data then | if data then | ||
return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' | return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' | ||
else | else | ||
error('Invalid script face "' .. face .. '".') | error('Invalid script face "' .. face .. '".') | ||
Line 122: | Line 212: | ||
end | end | ||
--[==[Tags the transliteration for given text {translit} and language {lang}. It will add the language, script subtag (as defined in [https://www.rfc-editor.org/rfc/bcp/bcp47.txt BCP 47 2.2.3]) and [https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/dir dir] (directional) attributes as needed. | |||
The optional <code>kind</code> parameter can be one of the following: | |||
; {{code|lua|"term"}} | |||
: tag transliteration for {{temp|mention}} | |||
; {{code|lua|"usex"}} | |||
: tag transliteration for {{temp|usex}} | |||
; {{code|lua|"head"}} | |||
: tag transliteration for {{temp|head}} | |||
; {{code|lua|"default"}} | |||
: default | |||
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==] | |||
function export.tag_translit(translit, lang, kind, attributes, is_manual) | function export.tag_translit(translit, lang, kind, attributes, is_manual) | ||
if type(lang) == "table" then | if type(lang) == "table" then | ||
lang = lang. | -- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt | ||
lang = lang.getFullCode and lang:getFullCode() | |||
or error("Second argument to tag_translit should be a language code or language object.") | or error("Second argument to tag_translit should be a language code or language object.") | ||
end | end | ||
local data = | local data = (m_data or get_data()).translit[kind or "default"] | ||
local opening_tag = {} | local opening_tag = {} | ||
insert(opening_tag, data.tag) | |||
if lang == "ja" then | if lang == "ja" then | ||
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"') | |||
else | else | ||
insert(opening_tag, 'lang="' .. lang .. '-Latn"') | |||
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"') | |||
end | end | ||
if data.dir then | if data.dir then | ||
insert(opening_tag, 'dir="' .. data.dir .. '"') | |||
end | end | ||
insert(opening_tag, attributes) | |||
return "<" . | return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">" | ||
end | end | ||
function export.tag_transcription(transcription, lang, kind, attributes) | function export.tag_transcription(transcription, lang, kind, attributes) | ||
if type(lang) == "table" then | if type(lang) == "table" then | ||
lang = lang. | -- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt | ||
or error(" | lang = lang.getFullCode and lang:getFullCode() | ||
or error("Second argument to tag_transcription should be a language code or language object.") | |||
end | end | ||
local data = | local data = (m_data or get_data()).transcription[kind or "default"] | ||
local opening_tag = {} | local opening_tag = {} | ||
insert(opening_tag, data.tag) | |||
if lang == "ja" then | if lang == "ja" then | ||
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"') | |||
else | else | ||
insert(opening_tag, 'lang="' .. lang .. '-Latn"') | |||
insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"') | |||
end | end | ||
if data.dir then | if data.dir then | ||
insert(opening_tag, 'dir="' .. data.dir .. '"') | |||
end | end | ||
insert(opening_tag, attributes) | |||
return "<" . | return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. data.tag .. ">" | ||
end | end | ||
-- | --[==[Generates a request to provide a term in its native script, if it is missing. This is used by the {{temp|rfscript}} template as well as by the functions in [[Module:links]]. | ||
The function will add entries to one of the subcategories of [[:Category:Requests for native script by language]], and do several checks on the given language and script. In particular: | |||
* If the script was given, a subcategory named "Requests for (script) script" is added, but only if the language has more than one script. Otherwise, the main "Requests for native script" category is used. | |||
* Nothing is added at all if the language has no scripts other than Latin and its varieties.]==] | |||
function export.request_script(lang, sc, usex, nocat, sort_key) | function export.request_script(lang, sc, usex, nocat, sort_key) | ||
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') | local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.') | ||
Line 190: | Line 296: | ||
-- Is the script known? | -- Is the script known? | ||
if sc then | if sc and sc:getCode() ~= "None" then | ||
-- If the script is Latin, return nothing. | -- If the script is Latin, return nothing. | ||
if export.is_Latin_script(sc) then | if export.is_Latin_script(sc) then | ||
Line 196: | Line 302: | ||
end | end | ||
if sc:getCode() ~= scripts[1]:getCode() then | if (not scripts[1]) or sc:getCode() ~= scripts[1]:getCode() then | ||
disp_script = sc:getCanonicalName() | disp_script = sc:getCanonicalName() | ||
end | end | ||
-- The category needs to be specific to script only if there is chance | -- The category needs to be specific to script only if there is chance of ambiguity. This occurs when when the language has multiple scripts (or with codes such as "und"). | ||
if (not scripts[1]) or scripts[2] then | |||
if | |||
cat_script = sc:getCanonicalName() | cat_script = sc:getCanonicalName() | ||
end | end | ||
Line 211: | Line 315: | ||
local has_nonlatin = false | local has_nonlatin = false | ||
for | for _, val in ipairs(scripts) do | ||
if not export.is_Latin_script(val) then | if not export.is_Latin_script(val) then | ||
has_nonlatin = true | has_nonlatin = true | ||
Line 223: | Line 327: | ||
end | end | ||
end | end | ||
-- Etymology languages have their own categories, whose parents are the regular language. | |||
return "<small>[" .. disp_script .. " needed]</small>" .. (nocat and "" or | |||
format_categories("Requests for " .. cat_script .. " script " .. | |||
(usex and "in" or "for") .. " " .. lang:getCanonicalName() .. " " .. | |||
(usex == "quote" and "quotations" or usex and "usage examples" or "terms"), | |||
lang, sort_key | |||
) | |||
) | |||
end | end | ||
function | --[==[This is used by {{temp|rfscript}}. See there for more information.]==] | ||
do | |||
[1] = { required = true, default = "und" }, | local function get_args(frame) | ||
local boolean = {type = "boolean"} | |||
return process_params(frame:getParent().args, { | |||
[1] = {required = true, type = "language", default = "und"}, | |||
["sc"] = {type = "script"}, | |||
["usex"] = boolean, | |||
["quote"] = boolean, | |||
["nocat"] = boolean, | |||
["sort"] = true, | |||
}) | |||
end | |||
local args = | function export.template_rfscript(frame) | ||
local args = get_args(frame) | |||
local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort) | |||
if ret == "" then | |||
error("This language is written in the Latin alphabet. It does not need a native script.") | |||
else | |||
return ret | |||
end | |||
end | end | ||
end | end | ||
function export.checkScript(text, scriptCode, result) | function export.checkScript(text, scriptCode, result) | ||
local scriptObject = | local scriptObject = get_script(scriptCode) | ||
if not scriptObject then | if not scriptObject then | ||
Line 269: | Line 374: | ||
-- Remove non-letter characters. | -- Remove non-letter characters. | ||
text = | text = ugsub(text, "%A+", "") | ||
-- Remove all characters of the script in question. | -- Remove all characters of the script in question. | ||
text = | text = ugsub(text, "[" .. scriptObject:getCharacters() .. "]+", "") | ||
if text ~= "" then | if text ~= "" then |