Module:script utilities: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(9 intermediate revisions by the same user not shown)
Line 1: Line 1:
local export = {}
local export = {}
local anchors_module = "Module:anchors"
local links_module = "Module:links"
local munge_text_module = "Module:munge text"
local parameters_module = "Module:parameters"
local scripts_module = "Module:scripts"
local string_utilities_module = "Module:string utilities"
local utilities_module = "Module:utilities"
local concat = table.concat
local insert = table.insert
local require = require
local toNFD = mw.ustring.toNFD
local dump = mw.dumpObject
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function embedded_language_links(...)
embedded_language_links = require(links_module).embedded_language_links
return embedded_language_links(...)
end
local function find_best_script_without_lang(...)
find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang
return find_best_script_without_lang(...)
end
local function format_categories(...)
format_categories = require(utilities_module).format_categories
return format_categories(...)
end
local function get_script(...)
get_script = require(scripts_module).getByCode
return get_script(...)
end
local function language_anchor(...)
language_anchor = require(anchors_module).language_anchor
return language_anchor(...)
end
local function munge_text(...)
munge_text = require(munge_text_module)
return munge_text(...)
end
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end
local function u(...)
u = require(string_utilities_module).char
return u(...)
end
local function ugsub(...)
ugsub = require(string_utilities_module).gsub
return ugsub(...)
end
local function umatch(...)
umatch = require(string_utilities_module).match
return umatch(...)
end
--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
local m_data
local function get_data()
m_data, get_data = mw.loadData("Module:script utilities/data"), nil
return m_data
end


--[=[
--[=[
Line 5: Line 79:
[[Module:script utilities/data]]
[[Module:script utilities/data]]
[[Module:scripts]]
[[Module:scripts]]
[[Module:senseid]] (only when id's present)
[[Module:anchors]] (only when IDs present)
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)
[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)
[[Module:languages]]
[[Module:languages]]
[[Module:parameters]]
[[Module:parameters]]
[[Module:utilities/format_categories]]
[[Module:utilities]]
]=]
]=]


function export.is_Latin_script(sc)
function export.is_Latin_script(sc)
-- Latn, Latf, Latinx, pjt-Latn
-- Latn, Latf, Latg, pjt-Latn
return sc:getCode():find("Lat") and true or false
return sc:getCode():find("Lat") and true or false
end
end


-- Used by [[Template:lang]]
--[==[{{temp|#invoke:script utilities|lang_t}}
function export.lang_t(frame)
This is used by {{temp|lang}} to wrap portions of text in a language tag. See there for more information.]==]
params = {
do
[1] = {},
local function get_args(frame)
[2] = { allow_empty = true, default = "" },
return process_params(frame:getParent().args, {
["sc"] = {},
[1] = {required = true, type = "language", default = "und"},
["face"] = {},
[2] = {required = true, allow_empty = true, default = ""},
["class"] = {},
["sc"] = {type = "script"},
}
["face"] = true,
["class"] = true,
})
end
local args = require("Module:parameters").process(frame:getParent().args, params)
function export.lang_t(frame)
local NAMESPACE = mw.title.getCurrentTitle().nsText
local args = get_args(frame)
local lang = args[1] or (NAMESPACE == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.")
local lang = args[1]
lang = require("Module:languages").getByCode(lang) or require("Module:languages").err(lang, 1)
local sc = args["sc"]
local text = args[2]
local text = args[2]
local cats = {}
local sc = args["sc"]
if sc then
sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)
-- Track uses of sc parameter.
if sc:getCode() == lang:findBestScript(text):getCode() then
local face = args["face"]
insert(cats, lang:getFullName() .. " terms with redundant script codes")
else
if face == "term_i" and sc == "Latn" then
insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")
face = nil
end
else
sc = lang:findBestScript(text)
end
text = embedded_language_links{
term = text,
lang = lang,
sc = sc
}
cats = #cats > 0 and format_categories(cats, lang, "-", nil, nil, sc) or ""
local face = args["face"]
local class = args["class"]
return export.tag_text(text, lang, sc, face, class) .. cats
end
end
return export.tag_text(text, lang, sc, face, class)
end
end


-- Wrap text in the appropriate HTML tags with language and script class.
local function Kore_ruby(txt)
return (ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>"))
end
 
--[==[Wraps the given text in HTML tags with appropriate CSS classes (see [[WT:CSS]]) for the [[Module:languages#Language objects|language]] and script. This is required for all non-English text on Wiktionary.
The actual tags and CSS classes that are added are determined by the <code>face</code> parameter. It can be one of the following:
; {{code|lua|"term"}}
: The text is wrapped in {{code|html|2=<i class="(sc) mention" lang="(lang)">...</i>}}.
; {{code|lua|"head"}}
: The text is wrapped in {{code|html|2=<strong class="(sc) headword" lang="(lang)">...</strong>}}.
; {{code|lua|"hypothetical"}}
: The text is wrapped in {{code|html|2=<span class="hypothetical-star">*</span><i class="(sc) hypothetical" lang="(lang)">...</i>}}.
; {{code|lua|"bold"}}
: The text is wrapped in {{code|html|2=<b class="(sc)" lang="(lang)">...</b>}}.
; {{code|lua|nil}}
: The text is wrapped in {{code|html|2=<span class="(sc)" lang="(lang)">...</span>}}.
The optional <code>class</code> parameter can be used to specify an additional CSS class to be added to the tag.]==]
function export.tag_text(text, lang, sc, face, class, id)
function export.tag_text(text, lang, sc, face, class, id)
if not sc then
if not sc then
sc = require("Module:scripts").findBestScript(text, lang)
if lang then
sc = lang:findBestScript(text)
else
sc = find_best_script_without_lang(text)
end
end
end
 
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
if sc:getDirection() == "down" and text:find(" ") then
if sc:getDirection():find("vertical", nil, true) and text:find(" ", nil, true) then
text = require("Module:munge_text")(text, function(txt)
text = munge_text(text, function(txt)
-- having extra parentheses makes sure only the first return value gets through
-- having extra parentheses makes sure only the first return value gets through
return (txt:gsub(" +", "<br>"))
return (txt:gsub(" +", "<br>"))
Line 62: Line 173:


-- Hack Korean script text to remove hyphens.
-- Hack Korean script text to remove hyphens.
-- XXX: This should be handled in a more general fashion, but needs to
-- FIXME: This should be handled in a more general fashion, but needs to
-- be efficient by not doing anything if no hyphens are present, and currently this is the only
-- be efficient by not doing anything if no hyphens are present, and currently this is the only
-- language needing such processing.
-- language needing such processing.
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
if sc:getCode() == "Kore" and (text:find("%-") or text:find("[()]")) then
if sc:getCode() == "Kore" and text:match("[%-()g]") then
text = require("Module:munge_text")(text, function(txt)
local title, display = require("Module:links").get_wikilink_parts(text, true)
-- Hani/Hang regex is a reasonable subset of Hani/Hang from [[Module:scripts/data]],
if title ~= nil then -- special case that the text is a single link, do not munge and preserve affix hyphens
-- last checked on 20220221
if lang and lang:getCode() == "okm" then -- Middle Korean code from [[User:Chom.kwoy]]
txt = txt:gsub("%-", "")
-- Comment from [[User:Lunabunn]]:
txt = mw.ustring.gsub(txt, "([一-鿿㐀-䶿𠀀-𮯯𰀀-𱍏]+)%(([가-힣ᄀ-ᇿꥠ-ꥼힰ-ퟻ]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")
-- In Middle Korean orthography, syllable formation is phonemic as opposed to morpheme-boundary-based a la
return txt
-- modern Korean. As such, for example, if you were to write nam-i, it would be rendered as na.mi so if you
end)
-- then put na-mi to indicate particle boundaries as in modern Korean, the hyphen would be misplaced.
-- Previously, this was alleviated by specialcasing na--mi but [[User:Theknightwho]] made that resolve to -
-- in the Hangul (previously we used to just delete all -s in Hangul processing), so it broke.
-- [[User:Chom.kwoy]] implemented a different solution, which is writing -> instead using however many >s to
-- shift the hyphen by that number of letters in the romanization.
-- By the time we are called, > signs have been converted to &gt; by a call to encode_entities() in
-- make_link() in [[Module:links]] (near the bottom of the function).
display = display:gsub("&gt;", "")
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null.
display = display:gsub("g", "")
end
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3")
display = Kore_ruby(display)
text = "[[" .. title .. "|" .. display .. "]]"
else
text = munge_text(text, function(txt)
if lang and lang:getCode() == "okm" then
txt = txt:gsub("&gt;", "")
txt = txt:gsub("g", "")
end
if txt == text then -- special case for the entire text being plain
txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3")
else
txt = txt:gsub("%-(%-?)", "%1")
end
txt = Kore_ruby(txt)
return txt
end)
end
end
end
 
if sc:getCode() == "Imag" then
if sc:getCode() == "Image" then
face = nil
face = nil
end
end


local function class_attr(classes)
local data = (m_data or get_data()).faces[face or "plain"]
table.insert(classes, 1, sc:getCode())
if data == nil then
if class and class ~= '' then
error('Invalid script face "' .. face .. '".')
table.insert(classes, class)
end
end
 
return 'class="' .. table.concat(classes, ' ') .. '"'
local tag = data.tag
local opening_tag = {tag}
 
if lang and id then
insert(opening_tag, 'id="' .. language_anchor(lang, id) .. '"')
end
 
local classes = {data.class}
-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well)
-- e.g. ota-Arab adds both Arab and ota-Arab as classes
if sc:getCode():find("-", nil, true) then
insert(classes, 1, (ugsub(sc:getCode(), ".+%-", "")))
insert(classes, 2, sc:getCode())
else
insert(classes, 1, sc:getCode())
end
end
if class and class ~= '' then
local function tag_attr(...)
insert(classes, class)
local output = {}
if id then
table.insert(output, 'id="' .. require("Module:senseid").anchor(lang, id) .. '"')
end
table.insert(output, class_attr({...}) )
if lang then
table.insert(output, 'lang="' .. lang:getCode() .. '"')
end
return table.concat(output, " ")
end
end
insert(opening_tag, 'class="' .. concat(classes, ' ') .. '"')
local data = mw.loadData("Module:script utilities/data").faces[face or "nil"]
 
-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the
local post = ""
-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about
if sc:getDirection() == "rtl" and (face == "translation" or mw.ustring.find(text, "%p$")) then
-- [lang=...] attributes.
post = "&lrm;"
if lang then
insert(opening_tag, 'lang="' .. lang:getFullCode() .. '"')
end
end
 
-- Add a script wrapper
-- Add a script wrapper
if data then
return (data.prefix or "") .. "<" .. concat(opening_tag, " ") .. ">" .. text .. "</" .. tag .. ">"
return ( data.prefix or "" ) .. '<' .. data.tag .. ' ' .. tag_attr(data.class) .. '>' .. text .. '</' .. data.tag .. '>' .. post
else
error('Invalid script face "' .. face .. '".')
end
end
end


--[==[Tags the transliteration for given text {translit} and language {lang}. It will add the language, script subtag (as defined in [https://www.rfc-editor.org/rfc/bcp/bcp47.txt BCP 47 2.2.3]) and [https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/dir dir] (directional) attributes as needed.
The optional <code>kind</code> parameter can be one of the following:
; {{code|lua|"term"}}
: tag transliteration for {{temp|mention}}
; {{code|lua|"usex"}}
: tag transliteration for {{temp|usex}}
; {{code|lua|"head"}}
: tag transliteration for {{temp|head}}
; {{code|lua|"default"}}
: default
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==]
function export.tag_translit(translit, lang, kind, attributes, is_manual)
function export.tag_translit(translit, lang, kind, attributes, is_manual)
if type(lang) == "table" then
if type(lang) == "table" then
lang = lang.getCode and lang:getCode()
-- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt
lang = lang.getFullCode and lang:getFullCode()
or error("Second argument to tag_translit should be a language code or language object.")
or error("Second argument to tag_translit should be a language code or language object.")
end
end
 
local data = mw.loadData("Module:script utilities/data").translit[kind or "default"]
local data = (m_data or get_data()).translit[kind or "default"]
 
local opening_tag = {}
local tag = data.tag
local opening_tag = {tag}
table.insert(opening_tag, data.tag)
 
local class = data.class
if lang == "ja" then
if lang == "ja" then
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
else
else
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
end
 
local dir = data.dir
if dir then
insert(opening_tag, 'dir="' .. dir .. '"')
end
end
 
if data.dir then
if attributes then
table.insert(opening_tag, 'dir="' .. data.dir .. '"')
insert(opening_tag, attributes)
end
end
 
table.insert(opening_tag, attributes)
return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. tag .. ">"
return "<" .. table.concat(opening_tag, " ") .. ">" .. translit .. "</" .. data.tag .. ">"
end
end


function export.tag_transcription(transcription, lang, kind, attributes)
function export.tag_transcription(transcription, lang, kind, attributes)
if type(lang) == "table" then
if type(lang) == "table" then
lang = lang.getCode and lang:getCode()
-- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt
or error("Third argument to tag_translit should be a language code or language object.")
lang = lang.getFullCode and lang:getFullCode()
or error("Second argument to tag_transcription should be a language code or language object.")
end
end
 
local data = mw.loadData("Module:script utilities/data").transcription[kind or "default"]
local data = (m_data or get_data()).transcription[kind or "default"]
 
local opening_tag = {}
local tag = data.tag
local opening_tag = {tag}
table.insert(opening_tag, data.tag)
 
local class = data.class
if lang == "ja" then
if lang == "ja" then
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts"')
else
else
table.insert(opening_tag, 'lang="' .. lang .. '-Latn"')
insert(opening_tag, 'lang="' .. lang .. '-Latn"')
table.insert(opening_tag, 'class="' .. (data.classes and data.classes .. " " or "") .. 'ts Latn"')
insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts Latn"')
end
 
local dir = data.dir
if dir then
insert(opening_tag, 'dir="' .. dir .. '"')
end
end
 
if data.dir then
if attributes then
table.insert(opening_tag, 'dir="' .. data.dir .. '"')
insert(opening_tag, attributes)
end
 
return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. tag .. ">"
end
 
--[==[Tags {def} as a definition.
The <code>def</code> parameter must be one of the following:
; {{code|lua|"gloss"}}
: The text is wrapped in {{code|html|2=<span class="(mention-gloss">...</span>}}.
; {{code|lua|"non-gloss"}}
: The text is wrapped in {{code|html|2=<span class="use-with-mention">...</span>}}.
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==]
function export.tag_definition(def, kind, attributes)
local data = (m_data or get_data()).definition[kind]
if data == nil then
error("Second argument to tag_definition should specify the kind of definition from the list in [[Module:script utilities/data]].")
end
 
local tag = data.tag
local opening_tag = {tag}
 
local class = data.class
if class then
insert(opening_tag, 'class="' .. class .. '"')
end
 
if attributes then
insert(opening_tag, attributes)
end
end
 
table.insert(opening_tag, attributes)
return "<" .. concat(opening_tag, " ") .. ">" .. def .. "</" .. tag .. ">"
return "<" .. table.concat(opening_tag, " ") .. ">" .. transcription .. "</" .. data.tag .. ">"
end
end


-- Add a notice to request the native script of a word
--[==[Generates a request to provide a term in its native script, if it is missing. This is used by the {{temp|rfscript}} template as well as by the functions in [[Module:links]].
The function will add entries to one of the subcategories of [[:Category:Requests for native script by language]], and do several checks on the given language and script. In particular:
* If the script was given, a subcategory named "Requests for (script) script" is added, but only if the language has more than one script. Otherwise, the main "Requests for native script" category is used.
* Nothing is added at all if the language has no scripts other than Latin and its varieties.]==]
function export.request_script(lang, sc, usex, nocat, sort_key)
function export.request_script(lang, sc, usex, nocat, sort_key)
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
Line 186: Line 377:
-- Is the script known?
-- Is the script known?
if sc then
if sc and sc:getCode() ~= "None" then
-- If the script is Latin, return nothing.
-- If the script is Latin, return nothing.
if export.is_Latin_script(sc) then
if export.is_Latin_script(sc) then
Line 192: Line 383:
end
end
if sc:getCode() ~= scripts[1]:getCode() then
if (not scripts[1]) or sc:getCode() ~= scripts[1]:getCode() then
disp_script = sc:getCanonicalName()
disp_script = sc:getCanonicalName()
end
end
-- The category needs to be specific to script only if there is chance
-- The category needs to be specific to script only if there is chance of ambiguity. This occurs when when the language has multiple scripts (or with codes such as "und").
-- of ambiguity. This occurs when lang=und, or when the language has
if (not scripts[1]) or scripts[2] then
-- multiple scripts.
if lang:getCode() == "und" or scripts[2] then
cat_script = sc:getCanonicalName()
cat_script = sc:getCanonicalName()
end
end
Line 207: Line 396:
local has_nonlatin = false
local has_nonlatin = false
for i, val in ipairs(scripts) do
for _, val in ipairs(scripts) do
if not export.is_Latin_script(val) then
if not export.is_Latin_script(val) then
has_nonlatin = true
has_nonlatin = true
Line 219: Line 408:
end
end
end
end
-- Etymology languages have their own categories, whose parents are the regular language.
local category
return "<small>[" .. disp_script .. " needed]</small>" .. (nocat and "" or
format_categories("Requests for " .. cat_script .. " script " ..
if usex then
(usex and "in" or "for") .. " " .. lang:getCanonicalName() .. " " ..
category = "Requests for " .. cat_script .. " script in " .. lang:getCanonicalName() .. " usage examples"
(usex == "quote" and "quotations" or usex and "usage examples" or "terms"),
else
lang, sort_key
category = "Requests for " .. cat_script .. " script for " .. lang:getCanonicalName() .. " terms"
)
end
)
return "<small>[" .. disp_script .. " needed]</small>" ..
(nocat and "" or require("Module:utilities/format_categories")({category}, lang, sort_key))
end
end


--[==[This is used by {{temp|rfscript}}. See there for more information.]==]
function export.template_rfscript(frame)
function export.template_rfscript(frame)
params = {
local boolean = {type = "boolean"}
[1] = { required = true, default = "und" },
local args = process_params(frame:getParent().args, {
["sc"] = {},
[1] = {required = true, type = "language", default = "und"},
["usex"] = { type = "boolean" },
["sc"] = {type = "script"},
["nocat"] = { type = "boolean" },
["usex"] = boolean,
["sort"] = {},
["quote"] = boolean,
}
["nocat"] = boolean,
["sort"] = true,
})
local args = require("Module:parameters").process(frame:getParent().args, params)
local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort)
local lang = require("Module:languages").getByCode(args[1], 1)
local sc = args.sc and require("Module:scripts").getByCode(args.sc, true)
 
local ret = export.request_script(lang, sc, args.usex, args.nocat, args.sort)
if ret == "" then
if ret == "" then
error("This language is written in the Latin alphabet. It does not need a native script.")
error("This language is written in the Latin alphabet. It does not need a native script.")
else
return ret
end
end
return ret
end
end


function export.checkScript(text, scriptCode, result)
function export.checkScript(text, scriptCode, result)
local scriptObject = require("Module:scripts").getByCode(scriptCode)
local scriptObject = get_script(scriptCode)
if not scriptObject then
if not scriptObject then
Line 265: Line 448:
-- Remove non-letter characters.
-- Remove non-letter characters.
text = mw.ustring.gsub(text, "[%A]", "")
text = ugsub(text, "%A+", "")
-- Remove all characters of the script in question.
-- Remove all characters of the script in question.
text = mw.ustring.gsub(text, "[" .. scriptObject:getCharacters() .. "]", "")
text = ugsub(text, "[" .. scriptObject:getCharacters() .. "]+", "")
if text ~= "" then
if text ~= "" then

Latest revision as of 16:28, 17 April 2025



local export = {}

local anchors_module = "Module:anchors"
local links_module = "Module:links"
local munge_text_module = "Module:munge text"
local parameters_module = "Module:parameters"
local scripts_module = "Module:scripts"
local string_utilities_module = "Module:string utilities"
local utilities_module = "Module:utilities"

local concat = table.concat
local insert = table.insert
local require = require
local toNFD = mw.ustring.toNFD
local dump = mw.dumpObject

--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function embedded_language_links(...)
	embedded_language_links = require(links_module).embedded_language_links
	return embedded_language_links(...)
end

local function find_best_script_without_lang(...)
	find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang
	return find_best_script_without_lang(...)
end

local function format_categories(...)
	format_categories = require(utilities_module).format_categories
	return format_categories(...)
end

local function get_script(...)
	get_script = require(scripts_module).getByCode
	return get_script(...)
end

local function language_anchor(...)
	language_anchor = require(anchors_module).language_anchor
	return language_anchor(...)
end

local function munge_text(...)
	munge_text = require(munge_text_module)
	return munge_text(...)
end

local function process_params(...)
	process_params = require(parameters_module).process
	return process_params(...)
end

local function u(...)
	u = require(string_utilities_module).char
	return u(...)
end

local function ugsub(...)
	ugsub = require(string_utilities_module).gsub
	return ugsub(...)
end

local function umatch(...)
	umatch = require(string_utilities_module).match
	return umatch(...)
end

--[==[
Loaders for objects, which load data (or some other object) into some variable, which can then be accessed as "foo or get_foo()", where the function get_foo sets the object to "foo" and then returns it. This ensures they are only loaded when needed, and avoids the need to check for the existence of the object each time, since once "foo" has been set, "get_foo" will not be called again.]==]
	local m_data
	local function get_data()
		m_data, get_data = mw.loadData("Module:script utilities/data"), nil
		return m_data
	end

--[=[
	Modules used:
	[[Module:script utilities/data]]
	[[Module:scripts]]
	[[Module:anchors]] (only when IDs present)
	[[Module:string utilities]] (only when hyphens in Korean text or spaces in vertical text)
	[[Module:languages]]
	[[Module:parameters]]
	[[Module:utilities]]
]=]

function export.is_Latin_script(sc)
	-- Latn, Latf, Latg, pjt-Latn
	return sc:getCode():find("Lat") and true or false
end

--[==[{{temp|#invoke:script utilities|lang_t}}
This is used by {{temp|lang}} to wrap portions of text in a language tag. See there for more information.]==]
do
	local function get_args(frame)
		return process_params(frame:getParent().args, {
			[1] = {required = true, type = "language", default = "und"},
			[2] = {required = true, allow_empty = true, default = ""},
			["sc"] = {type = "script"},
			["face"] = true,
			["class"] = true,
		})
	end
	
	function export.lang_t(frame)
		local args = get_args(frame)
		
		local lang = args[1]
		local sc = args["sc"]
		local text = args[2]
		local cats = {}
		
		if sc then
			-- Track uses of sc parameter.
			if sc:getCode() == lang:findBestScript(text):getCode() then
				insert(cats, lang:getFullName() .. " terms with redundant script codes")
			else
				insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes")
			end
		else
			sc = lang:findBestScript(text)
		end
		
		text = embedded_language_links{
			term = text,
			lang = lang,
			sc = sc
		}
		
		cats = #cats > 0 and format_categories(cats, lang, "-", nil, nil, sc) or ""
		
		local face = args["face"]
		local class = args["class"]
		
		return export.tag_text(text, lang, sc, face, class) .. cats
	end
end

local function Kore_ruby(txt)
	return (ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>"))
end

--[==[Wraps the given text in HTML tags with appropriate CSS classes (see [[WT:CSS]]) for the [[Module:languages#Language objects|language]] and script. This is required for all non-English text on Wiktionary.
The actual tags and CSS classes that are added are determined by the <code>face</code> parameter. It can be one of the following:
; {{code|lua|"term"}}
: The text is wrapped in {{code|html|2=<i class="(sc) mention" lang="(lang)">...</i>}}.
; {{code|lua|"head"}}
: The text is wrapped in {{code|html|2=<strong class="(sc) headword" lang="(lang)">...</strong>}}.
; {{code|lua|"hypothetical"}}
: The text is wrapped in {{code|html|2=<span class="hypothetical-star">*</span><i class="(sc) hypothetical" lang="(lang)">...</i>}}.
; {{code|lua|"bold"}}
: The text is wrapped in {{code|html|2=<b class="(sc)" lang="(lang)">...</b>}}.
; {{code|lua|nil}}
: The text is wrapped in {{code|html|2=<span class="(sc)" lang="(lang)">...</span>}}.
The optional <code>class</code> parameter can be used to specify an additional CSS class to be added to the tag.]==]
function export.tag_text(text, lang, sc, face, class, id)
	if not sc then
		if lang then
			sc = lang:findBestScript(text)
		else
			sc = find_best_script_without_lang(text)
		end
	end

	-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom.
	if sc:getDirection():find("vertical", nil, true) and text:find(" ", nil, true) then
		text = munge_text(text, function(txt)
			-- having extra parentheses makes sure only the first return value gets through
			return (txt:gsub(" +", "<br>"))
		end)
	end

	-- Hack Korean script text to remove hyphens.
	-- FIXME: This should be handled in a more general fashion, but needs to
	-- be efficient by not doing anything if no hyphens are present, and currently this is the only
	-- language needing such processing.
	-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]].
	if sc:getCode() == "Kore" and text:match("[%-()g]") then
		local title, display = require("Module:links").get_wikilink_parts(text, true)
		if title ~= nil then -- special case that the text is a single link, do not munge and preserve affix hyphens
			if lang and lang:getCode() == "okm" then -- Middle Korean code from [[User:Chom.kwoy]]
				-- Comment from [[User:Lunabunn]]:
				-- In Middle Korean orthography, syllable formation is phonemic as opposed to morpheme-boundary-based a la
				-- modern Korean. As such, for example, if you were to write nam-i, it would be rendered as na.mi so if you
				-- then put na-mi to indicate particle boundaries as in modern Korean, the hyphen would be misplaced.
				-- Previously, this was alleviated by specialcasing na--mi but [[User:Theknightwho]] made that resolve to -
				-- in the Hangul (previously we used to just delete all -s in Hangul processing), so it broke.
				-- [[User:Chom.kwoy]] implemented a different solution, which is writing -> instead using however many >s to
				-- shift the hyphen by that number of letters in the romanization.
				
				-- By the time we are called, > signs have been converted to &gt; by a call to encode_entities() in
				-- make_link() in [[Module:links]] (near the bottom of the function).
				display = display:gsub("&gt;", "")
				-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null.
				display = display:gsub("g", "")
			end
			display = display:gsub("(.)%-(%-?)(.)", "%1%2%3")
			display = Kore_ruby(display)
			text = "[[" .. title .. "|" .. display .. "]]"
		else
			text = munge_text(text, function(txt)
				if lang and lang:getCode() == "okm" then
					txt = txt:gsub("&gt;", "")
					txt = txt:gsub("g", "")
				end
				if txt == text then -- special case for the entire text being plain
					txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3")
				else
					txt = txt:gsub("%-(%-?)", "%1")
				end
				txt = Kore_ruby(txt)
				return txt
			end)
		end
	end

	if sc:getCode() == "Image" then
		face = nil
	end

	local data = (m_data or get_data()).faces[face or "plain"]
	if data == nil then
		error('Invalid script face "' .. face .. '".')
	end

	local tag = data.tag
	local opening_tag = {tag}

	if lang and id then
		insert(opening_tag, 'id="' .. language_anchor(lang, id) .. '"')
	end

	local classes = {data.class}
	-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well)
	-- e.g. ota-Arab adds both Arab and ota-Arab as classes
	if sc:getCode():find("-", nil, true) then
		insert(classes, 1, (ugsub(sc:getCode(), ".+%-", "")))
		insert(classes, 2, sc:getCode())
	else
		insert(classes, 1, sc:getCode())
	end
	if class and class ~= '' then
		insert(classes, class)
	end
	insert(opening_tag, 'class="' .. concat(classes, ' ') .. '"')

	-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the
	-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about
	-- [lang=...] attributes.
	if lang then
		insert(opening_tag, 'lang="' .. lang:getFullCode() .. '"')
	end

	-- Add a script wrapper
	return (data.prefix or "") .. "<" .. concat(opening_tag, " ") .. ">" .. text .. "</" .. tag .. ">"
end

--[==[Tags the transliteration for given text {translit} and language {lang}. It will add the language, script subtag (as defined in [https://www.rfc-editor.org/rfc/bcp/bcp47.txt BCP 47 2.2.3]) and [https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/dir dir] (directional) attributes as needed.
The optional <code>kind</code> parameter can be one of the following:
; {{code|lua|"term"}}
: tag transliteration for {{temp|mention}}
; {{code|lua|"usex"}}
: tag transliteration for {{temp|usex}}
; {{code|lua|"head"}}
: tag transliteration for {{temp|head}}
; {{code|lua|"default"}}
: default
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==]
function export.tag_translit(translit, lang, kind, attributes, is_manual)
	if type(lang) == "table" then
		-- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt
		lang = lang.getFullCode and lang:getFullCode()
			or error("Second argument to tag_translit should be a language code or language object.")
	end

	local data = (m_data or get_data()).translit[kind or "default"]

	local tag = data.tag
	local opening_tag = {tag}

	local class = data.class
	if lang == "ja" then
		insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr"')
	else
		insert(opening_tag, 'lang="' .. lang .. '-Latn"')
		insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"')
	end

	local dir = data.dir
	if dir then
		insert(opening_tag, 'dir="' .. dir .. '"')
	end

	if attributes then
		insert(opening_tag, attributes)
	end

	return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. tag .. ">"
end

function export.tag_transcription(transcription, lang, kind, attributes)
	if type(lang) == "table" then
		-- FIXME: Do better support for etym languages; see https://www.rfc-editor.org/rfc/bcp/bcp47.txt
		lang = lang.getFullCode and lang:getFullCode()
			or error("Second argument to tag_transcription should be a language code or language object.")
	end

	local data = (m_data or get_data()).transcription[kind or "default"]

	local tag = data.tag
	local opening_tag = {tag}

	local class = data.class
	if lang == "ja" then
		insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts"')
	else
		insert(opening_tag, 'lang="' .. lang .. '-Latn"')
		insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts Latn"')
	end

	local dir = data.dir
	if dir then
		insert(opening_tag, 'dir="' .. dir .. '"')
	end

	if attributes then
		insert(opening_tag, attributes)
	end

	return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. tag .. ">"	
end

--[==[Tags {def} as a definition.
The <code>def</code> parameter must be one of the following:
; {{code|lua|"gloss"}}
: The text is wrapped in {{code|html|2=<span class="(mention-gloss">...</span>}}.
; {{code|lua|"non-gloss"}}
: The text is wrapped in {{code|html|2=<span class="use-with-mention">...</span>}}.
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==]
function export.tag_definition(def, kind, attributes)
	local data = (m_data or get_data()).definition[kind]
	if data == nil then
		error("Second argument to tag_definition should specify the kind of definition from the list in [[Module:script utilities/data]].")
	end

	local tag = data.tag
	local opening_tag = {tag}

	local class = data.class
	if class then
		insert(opening_tag, 'class="' .. class .. '"')
	end

	if attributes then
		insert(opening_tag, attributes)
	end

	return "<" .. concat(opening_tag, " ") .. ">" .. def .. "</" .. tag .. ">"
end

--[==[Generates a request to provide a term in its native script, if it is missing. This is used by the {{temp|rfscript}} template as well as by the functions in [[Module:links]].
The function will add entries to one of the subcategories of [[:Category:Requests for native script by language]], and do several checks on the given language and script. In particular:
* If the script was given, a subcategory named "Requests for (script) script" is added, but only if the language has more than one script. Otherwise, the main "Requests for native script" category is used.
* Nothing is added at all if the language has no scripts other than Latin and its varieties.]==]
function export.request_script(lang, sc, usex, nocat, sort_key)
	local scripts = lang.getScripts and lang:getScripts() or error('The language "' .. lang:getCode() .. '" does not have the method getScripts. It may be unwritten.')
	
	-- By default, request for "native" script
	local cat_script = "native"
	local disp_script = "script"
	
	-- If the script was not specified, and the language has only one script, use that.
	if not sc and #scripts == 1 then
		sc = scripts[1]
	end
	
	-- Is the script known?
	if sc and sc:getCode() ~= "None" then
		-- If the script is Latin, return nothing.
		if export.is_Latin_script(sc) then
			return ""
		end
		
		if (not scripts[1]) or sc:getCode() ~= scripts[1]:getCode() then
			disp_script = sc:getCanonicalName()
		end
		
		-- The category needs to be specific to script only if there is chance of ambiguity. This occurs when when the language has multiple scripts (or with codes such as "und").
		if (not scripts[1]) or scripts[2] then
			cat_script = sc:getCanonicalName()
		end
	else
		-- The script is not known.
		-- Does the language have at least one non-Latin script in its list?
		local has_nonlatin = false
		
		for _, val in ipairs(scripts) do
			if not export.is_Latin_script(val) then
				has_nonlatin = true
				break
			end
		end
		
		-- If there are no non-Latin scripts, return nothing.
		if not has_nonlatin then
			return ""
		end
	end
	-- Etymology languages have their own categories, whose parents are the regular language.
	return "<small>[" .. disp_script .. " needed]</small>" .. (nocat and "" or
		format_categories("Requests for " .. cat_script .. " script " ..
			(usex and "in" or "for") .. " " .. lang:getCanonicalName() .. " " ..
			(usex == "quote" and "quotations" or usex and "usage examples" or "terms"),
			lang, sort_key
		)
	)
end

--[==[This is used by {{temp|rfscript}}. See there for more information.]==]
function export.template_rfscript(frame)
	local boolean = {type = "boolean"}
	local args = process_params(frame:getParent().args, {
		[1] = {required = true, type = "language", default = "und"},
		["sc"] = {type = "script"},
		["usex"] = boolean,
		["quote"] = boolean,
		["nocat"] = boolean,
		["sort"] = true,
	})
	
	local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort)
	
	if ret == "" then
		error("This language is written in the Latin alphabet. It does not need a native script.")
	end
	return ret
end

function export.checkScript(text, scriptCode, result)
	local scriptObject = get_script(scriptCode)
	
	if not scriptObject then
		error('The script code "' .. scriptCode .. '" is not recognized.')
	end
	
	local originalText = text
	
	-- Remove non-letter characters.
	text = ugsub(text, "%A+", "")
	
	-- Remove all characters of the script in question.
	text = ugsub(text, "[" .. scriptObject:getCharacters() .. "]+", "")
	
	if text ~= "" then
		if type(result) == "string" then
			error(result)
		else
			error('The text "' .. originalText .. '" contains the letters "' .. text .. '" that do not belong to the ' .. scriptObject:getDisplayForm() .. '.', 2)
		end
	end
end

return export