48,407
edits
No edit summary |
No edit summary |
||
| Line 13: | Line 13: | ||
local require = require | local require = require | ||
local toNFD = mw.ustring.toNFD | local toNFD = mw.ustring.toNFD | ||
local dump = mw.dumpObject | |||
--[==[ | --[==[ | ||
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==] | ||
local function embedded_language_links(...) | |||
embedded_language_links = require(links_module).embedded_language_links | |||
return embedded_language_links(...) | |||
end | |||
local function format_categories(...) | local function find_best_script_without_lang(...) | ||
find_best_script_without_lang = require(scripts_module).findBestScriptWithoutLang | |||
return find_best_script_without_lang(...) | |||
end | |||
local function format_categories(...) | |||
format_categories = require(utilities_module).format_categories | |||
return format_categories(...) | |||
end | |||
local function get_script(...) | |||
get_script = require(scripts_module).getByCode | |||
return get_script(...) | |||
end | |||
local function language_anchor(...) | |||
language_anchor = require(anchors_module).language_anchor | |||
return language_anchor(...) | |||
end | |||
local function munge_text(...) | |||
munge_text = require(munge_text_module) | |||
return munge_text(...) | |||
end | |||
local function process_params(...) | |||
process_params = require(parameters_module).process | |||
return process_params(...) | |||
end | |||
local function u(...) | |||
u = require(string_utilities_module).char | |||
return u(...) | |||
end | |||
local function ugsub(...) | |||
ugsub = require(string_utilities_module).gsub | |||
return ugsub(...) | |||
end | |||
local function umatch(...) | |||
umatch = require(string_utilities_module).match | |||
return umatch(...) | |||
end | |||
--[==[ | --[==[ | ||
| Line 107: | Line 113: | ||
if sc then | if sc then | ||
-- Track uses of sc parameter. | |||
if sc:getCode() == lang:findBestScript(text):getCode() then | |||
insert(cats, lang:getFullName() .. " terms with redundant script codes") | |||
else | |||
insert(cats, lang:getFullName() .. " terms with non-redundant manual script codes") | |||
end | |||
else | else | ||
sc = lang:findBestScript(text) | sc = lang:findBestScript(text) | ||
| Line 124: | Line 136: | ||
return export.tag_text(text, lang, sc, face, class) .. cats | return export.tag_text(text, lang, sc, face, class) .. cats | ||
end | end | ||
end | |||
local function Kore_ruby(txt) | |||
return (ugsub(txt, "([%-".. get_script("Hani"):getCharacters() .. "]+)%(([%-" .. get_script("Hang"):getCharacters() .. "]+)%)", "<ruby>%1<rp>(</rp><rt>%2</rt><rp>)</rp></ruby>")) | |||
end | end | ||
| Line 141: | Line 157: | ||
function export.tag_text(text, lang, sc, face, class, id) | function export.tag_text(text, lang, sc, face, class, id) | ||
if not sc then | if not sc then | ||
sc = lang:findBestScript(text) | if lang then | ||
sc = lang:findBestScript(text) | |||
else | |||
sc = find_best_script_without_lang(text) | |||
end | |||
end | end | ||
-- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | -- Replace space characters with newlines in Mongolian-script text, which is written top-to-bottom. | ||
if sc:getDirection(): | if sc:getDirection():find("vertical", nil, true) and text:find(" ", nil, true) then | ||
text = munge_text(text, function(txt) | text = munge_text(text, function(txt) | ||
-- having extra parentheses makes sure only the first return value gets through | -- having extra parentheses makes sure only the first return value gets through | ||
| Line 157: | Line 177: | ||
-- language needing such processing. | -- language needing such processing. | ||
-- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]]. | -- 20220221: Also convert 漢字(한자) to ruby, instead of needing [[Template:Ruby]]. | ||
if sc:getCode() == "Kore" and | if sc:getCode() == "Kore" and text:match("[%-()g]") then | ||
local title, display = require("Module:links").get_wikilink_parts(text, true) | |||
if title ~= nil then -- special case that the text is a single link, do not munge and preserve affix hyphens | |||
if lang and lang:getCode() == "okm" then -- Middle Korean code from [[User:Chom.kwoy]] | |||
-- Comment from [[User:Lunabunn]]: | |||
end | -- In Middle Korean orthography, syllable formation is phonemic as opposed to morpheme-boundary-based a la | ||
-- modern Korean. As such, for example, if you were to write nam-i, it would be rendered as na.mi so if you | |||
-- then put na-mi to indicate particle boundaries as in modern Korean, the hyphen would be misplaced. | |||
-- Previously, this was alleviated by specialcasing na--mi but [[User:Theknightwho]] made that resolve to - | |||
-- in the Hangul (previously we used to just delete all -s in Hangul processing), so it broke. | |||
-- [[User:Chom.kwoy]] implemented a different solution, which is writing -> instead using however many >s to | |||
-- shift the hyphen by that number of letters in the romanization. | |||
-- By the time we are called, > signs have been converted to > by a call to encode_entities() in | |||
-- make_link() in [[Module:links]] (near the bottom of the function). | |||
display = display:gsub(">", "") | |||
-- 'g' in Middle Korean is a special sign to treat the following ㅇ sign as /G/ instead of null. | |||
display = display:gsub("g", "") | |||
end | |||
display = display:gsub("(.)%-(%-?)(.)", "%1%2%3") | |||
display = Kore_ruby(display) | |||
text = "[[" .. title .. "|" .. display .. "]]" | |||
else | |||
text = munge_text(text, function(txt) | |||
if lang and lang:getCode() == "okm" then | |||
txt = txt:gsub(">", "") | |||
txt = txt:gsub("g", "") | |||
end | |||
if txt == text then -- special case for the entire text being plain | |||
txt = txt:gsub("(.)%-(%-?)(.)", "%1%2%3") | |||
else | |||
txt = txt:gsub("%-(%-?)", "%1") | |||
end | |||
txt = Kore_ruby(txt) | |||
return txt | |||
end) | |||
end | |||
end | end | ||
if sc:getCode() == "Image" then | if sc:getCode() == "Image" then | ||
face = nil | face = nil | ||
end | end | ||
local | local data = (m_data or get_data()).faces[face or "plain"] | ||
if data == nil then | |||
error('Invalid script face "' .. face .. '".') | |||
end | end | ||
local | local tag = data.tag | ||
local opening_tag = {tag} | |||
if lang and id then | |||
insert(opening_tag, 'id="' .. language_anchor(lang, id) .. '"') | |||
end | end | ||
local data | local classes = {data.class} | ||
-- if the script code is hyphenated (i.e. language code-script code, add the last component as a class as well) | |||
-- | -- e.g. ota-Arab adds both Arab and ota-Arab as classes | ||
if | if sc:getCode():find("-", nil, true) then | ||
insert(classes, 1, (ugsub(sc:getCode(), ".+%-", ""))) | |||
insert(classes, 2, sc:getCode()) | |||
else | else | ||
insert(classes, 1, sc:getCode()) | |||
end | |||
if class and class ~= '' then | |||
insert(classes, class) | |||
end | |||
insert(opening_tag, 'class="' .. concat(classes, ' ') .. '"') | |||
-- FIXME: Is it OK to insert the etymology-only lang code and have it fall back to the first part of the | |||
-- lang code (by chopping off the '-...' part)? It seems the :lang() selector does this; not sure about | |||
-- [lang=...] attributes. | |||
if lang then | |||
insert(opening_tag, 'lang="' .. lang:getFullCode() .. '"') | |||
end | end | ||
-- Add a script wrapper | |||
return (data.prefix or "") .. "<" .. concat(opening_tag, " ") .. ">" .. text .. "</" .. tag .. ">" | |||
end | end | ||
| Line 229: | Line 274: | ||
or error("Second argument to tag_translit should be a language code or language object.") | or error("Second argument to tag_translit should be a language code or language object.") | ||
end | end | ||
local data = (m_data or get_data()).translit[kind or "default"] | local data = (m_data or get_data()).translit[kind or "default"] | ||
local opening_tag = {} | local tag = data.tag | ||
local opening_tag = {tag} | |||
local class = data.class | |||
if lang == "ja" then | if lang == "ja" then | ||
insert(opening_tag, 'class="' .. ( | insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr"') | ||
else | else | ||
insert(opening_tag, 'lang="' .. lang .. '-Latn"') | insert(opening_tag, 'lang="' .. lang .. '-Latn"') | ||
insert(opening_tag, 'class="' .. ( | insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. (is_manual and "manual-tr " or "") .. 'tr Latn"') | ||
end | |||
local dir = data.dir | |||
if dir then | |||
insert(opening_tag, 'dir="' .. dir .. '"') | |||
end | end | ||
if | if attributes then | ||
insert(opening_tag, | insert(opening_tag, attributes) | ||
end | end | ||
return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" .. tag .. ">" | |||
return "<" .. concat(opening_tag, " ") .. ">" .. translit .. "</" . | |||
end | end | ||
| Line 257: | Line 306: | ||
or error("Second argument to tag_transcription should be a language code or language object.") | or error("Second argument to tag_transcription should be a language code or language object.") | ||
end | end | ||
local data = (m_data or get_data()).transcription[kind or "default"] | local data = (m_data or get_data()).transcription[kind or "default"] | ||
local opening_tag = {} | local tag = data.tag | ||
local opening_tag = {tag} | |||
local class = data.class | |||
if lang == "ja" then | if lang == "ja" then | ||
insert(opening_tag, 'class="' .. ( | insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts"') | ||
else | else | ||
insert(opening_tag, 'lang="' .. lang .. '-Latn"') | insert(opening_tag, 'lang="' .. lang .. '-Latn"') | ||
insert(opening_tag, 'class="' .. (data. | insert(opening_tag, 'class="' .. (class and (class .. " ") or "") .. 'ts Latn"') | ||
end | |||
local dir = data.dir | |||
if dir then | |||
insert(opening_tag, 'dir="' .. dir .. '"') | |||
end | |||
if attributes then | |||
insert(opening_tag, attributes) | |||
end | |||
return "<" .. concat(opening_tag, " ") .. ">" .. transcription .. "</" .. tag .. ">" | |||
end | |||
--[==[Tags {def} as a definition. | |||
The <code>def</code> parameter must be one of the following: | |||
; {{code|lua|"gloss"}} | |||
: The text is wrapped in {{code|html|2=<span class="(mention-gloss">...</span>}}. | |||
; {{code|lua|"non-gloss"}} | |||
: The text is wrapped in {{code|html|2=<span class="use-with-mention">...</span>}}. | |||
The optional <code>attributes</code> parameter is used to specify additional HTML attributes for the tag.]==] | |||
function export.tag_definition(def, kind, attributes) | |||
local data = (m_data or get_data()).definition[kind] | |||
if data == nil then | |||
error("Second argument to tag_definition should specify the kind of definition from the list in [[Module:script utilities/data]].") | |||
end | |||
local tag = data.tag | |||
local opening_tag = {tag} | |||
local class = data.class | |||
if class then | |||
insert(opening_tag, 'class="' .. class .. '"') | |||
end | end | ||
if | if attributes then | ||
insert(opening_tag, | insert(opening_tag, attributes) | ||
end | end | ||
return "<" .. concat(opening_tag, " ") .. ">" .. def .. "</" .. tag .. ">" | |||
return "<" .. concat(opening_tag, " ") .. ">" .. | |||
end | end | ||
| Line 338: | Line 419: | ||
--[==[This is used by {{temp|rfscript}}. See there for more information.]==] | --[==[This is used by {{temp|rfscript}}. See there for more information.]==] | ||
function export.template_rfscript(frame) | |||
local boolean = {type = "boolean"} | |||
local args = process_params(frame:getParent().args, { | |||
[1] = {required = true, type = "language", default = "und"}, | |||
["sc"] = {type = "script"}, | |||
["usex"] = boolean, | |||
["quote"] = boolean, | |||
["nocat"] = boolean, | |||
["sort"] = true, | |||
}) | |||
local ret = export.request_script(args[1], args["sc"], args.quote and "quote" or args.usex, args.nocat, args.sort) | |||
if ret == "" then | |||
error("This language is written in the Latin alphabet. It does not need a native script.") | |||
end | end | ||
return ret | |||
end | end | ||