Module:languages: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
Line 74: Line 74:
their "bare display" form by taking the right part of two-part links and removing double brackets), but when this
their "bare display" form by taking the right part of two-part links and removing double brackets), but when this
happens is unclear to me [FIXME]. Some languages have a chop-up-and-paste-together scheme that sends parts of the
happens is unclear to me [FIXME]. Some languages have a chop-up-and-paste-together scheme that sends parts of the
text through the transliterate mechanism, and for others (those listed in {contiguous_substition} in
text through the transliterate mechanism, and for others (those listed with "cont" in {substition} in
[[Module:languages/data]]) they receive the full input text, but preprocessed in certain ways. (The wisdom of this is
[[Module:languages/data]]) they receive the full input text, but preprocessed in certain ways. (The wisdom of this is
still unclear to me.)
still unclear to me.)
Line 110: Line 110:
local export = {}
local export = {}


local debug_track_module = "Module:debug/track"
local etymology_languages_data_module = "Module:etymology languages/data"
local families_module = "Module:families"
local families_module = "Module:families"
local json_module = "Module:JSON"
local json_module = "Module:JSON"
local language_like_module = "Module:language-like"
local language_like_module = "Module:language-like"
local languages_data_module = "Module:languages/data"
local languages_data_patterns_module = "Module:languages/data/patterns"
local links_data_module = "Module:links/data"
local load_module = "Module:load"
local load_module = "Module:load"
local patterns_module = "Module:patterns"
local scripts_module = "Module:scripts"
local scripts_module = "Module:scripts"
local scripts_data_module = "Module:scripts/data"
local string_encode_entities_module = "Module:string/encode entities"
local string_encode_entities_module = "Module:string/encode entities"
local string_utilities_module = "Module:string utilities"
local string_utilities_module = "Module:string utilities"
Line 137: Line 144:
local insert = table.insert
local insert = table.insert
local ipairs = ipairs
local ipairs = ipairs
local is_known_language_tag = mw.language.isKnownLanguageTag
local make_object -- Defined below.
local make_object -- Defined below.
local match = string.match
local match = string.match
Line 145: Line 153:
local select = select
local select = select
local setmetatable = setmetatable
local setmetatable = setmetatable
local sub = string.sub
local type = type
local type = type
local unstrip = mw.text.unstrip
local unstrip = mw.text.unstrip
Line 157: Line 166:
check_object = require(utilities_module).check_object
check_object = require(utilities_module).check_object
return check_object(...)
return check_object(...)
end
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
end


Line 225: Line 239:


local function pattern_escape(...)
local function pattern_escape(...)
pattern_escape = require(string_utilities_module).pattern_escape
pattern_escape = require(patterns_module).pattern_escape
return pattern_escape(...)
return pattern_escape(...)
end
end
Line 235: Line 249:


local function replacement_escape(...)
local function replacement_escape(...)
replacement_escape = require(string_utilities_module).replacement_escape
replacement_escape = require(patterns_module).replacement_escape
return replacement_escape(...)
return replacement_escape(...)
end
end
Line 288: Line 302:
return uupper(...)
return uupper(...)
end
end
local function track(page)
debug_track("languages/" .. page)
return true
end


local function normalize_code(code)
local function normalize_code(code)
return load_data("Module:languages/data").aliases[code] or code
return load_data(languages_data_module).aliases[code] or code
end
 
local function check_inputs(self, check, default, ...)
local n = select("#", ...)
if n == 0 then
return false
end
local ret = check(self, (...))
if ret ~= nil then
return ret
elseif n > 1 then
local inputs = {...}
for i = 2, n do
ret = check(self, inputs[i])
if ret ~= nil then
return ret
end
end
end
return default
end
 
local function make_link(self, target, display)
local prefix, main
if self:getFamilyCode() == "qfa-sub" then
prefix, main = display:match("^(the )(.*)")
if not prefix then
prefix, main = display:match("^(a )(.*)")
end
end
return (prefix or "") .. "[[" .. target .. "|" .. (main or display) .. "]]"
end
end


Line 305: Line 355:
local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim)
local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim)
-- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned.
-- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned.
local patterns = shallow_copy(require("Module:languages/data/patterns"))
local patterns = shallow_copy(require(languages_data_patterns_module))
if keepCarets then
if keepCarets then
insert(patterns, "((\\+)%^)")
insert(patterns, "((\\+)%^)")
Line 362: Line 412:
local function checkNoEntities(self, text)
local function checkNoEntities(self, text)
local textNoEnc = decode_entities(text)
local textNoEnc = decode_entities(text)
if textNoEnc ~= text and load_data("Module:links/data").unsupported_titles[text] then
if textNoEnc ~= text and load_data(links_data_module).unsupported_titles[text] then
return text
return text
else
else
Line 373: Line 423:
if not check_object("script", true, sc) or sc:getCode() == "None" then
if not check_object("script", true, sc) or sc:getCode() == "None" then
return self:findBestScript(text)
return self:findBestScript(text)
else
return sc
end
end
return sc
end
end


Line 435: Line 484:
local module = safe_require("Module:" .. substitution_data)
local module = safe_require("Module:" .. substitution_data)
if module then
if module then
-- TODO: translit functions should take objects, not codes.
-- TODO: translit functions should be called with form NFD.
if function_name == "tr" then
if function_name == "tr" then
text, fail, cats = module[function_name](text, self:getCode(), sc:getCode())
text, fail, cats = module[function_name](text, self._code, sc:getCode())
else
else
text, fail, cats = module[function_name](sc:toFixedNFD(text), self:getCode(), sc:getCode())
text, fail, cats = module[function_name](sc:toFixedNFD(text), self, sc)
end
-- TODO: get rid of the `fail` and `cats` return values.
if fail ~= nil then
track("fail")
track("fail/" .. self._code)
end
if cats ~= nil then
track("cats")
track("cats/" .. self._code)
end
end
else
else
Line 454: Line 514:


-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them.
-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them.
local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name)
local function iterateSectionSubstitutions(self, text, sc, subbedChars, keepCarets, substitution_data, function_name)
local fail, cats, sections = nil, {}
local fail, cats, sections = nil, {}
-- See [[Module:languages/data]].
-- See [[Module:languages/data]].
if not find(text, "\244") or load_data("Module:languages/data").contiguous_substitution[self._code] then
if not find(text, "\244") or (load_data(languages_data_module).substitution[self._code] == "cont") then
sections = {text}
sections = {text}
else
else
Line 469: Line 529:
if sub and subbedChars then
if sub and subbedChars then
local noSub
local noSub
for _, pattern in ipairs(require("Module:languages/data/patterns")) do
for _, pattern in ipairs(require(languages_data_patterns_module)) do
if match(section, pattern .. "%z?") then
if match(section, pattern .. "%z?") then
noSub = true
noSub = true
Line 557: Line 617:
-- Add article and " substrate" to substrates that lack them.
-- Add article and " substrate" to substrates that lack them.
if self:getFamilyCode() == "qfa-sub" then
if self:getFamilyCode() == "qfa-sub" then
if not (match(form, "^[Tt]he ") or match(form, "^[Aa] ")) then
if not (sub(form, 1, 4) == "the " or sub(form, 1, 2) == "a ") then
form = "a " .. form
form = "a " .. form
end
end
if not match(form, "[Ss]ubstrate") then
if not match(form, " [Ss]ubstrate") then
form = form .. " substrate"
form = form .. " substrate"
end
end
Line 668: Line 728:
Language.hasType = require(language_like_module).hasType
Language.hasType = require(language_like_module).hasType
return self:hasType(...)
return self:hasType(...)
end
function Language:getMainCategoryName()
return self._data.main_category or "lemma"
end
end


Line 693: Line 749:
if wm_langs == nil then
if wm_langs == nil then
wm_langs = self._data.wikimedia_codes
wm_langs = self._data.wikimedia_codes
wm_langs = wm_langs and split(wm_langs, ",", true, true) or {self._code}
if wm_langs then
wm_langs = split(wm_langs, ",", true, true)
else
local code = self._code
if is_known_language_tag(code) then
wm_langs = {code}
else
-- Inherit, but only if no codes are specified in the data *and*
-- the language code isn't a valid Wikimedia language code.
local parent = self:getParent()
wm_langs = parent and parent:getWikimediaLanguageCodes() or {}
end
end
self._wikimediaLanguageCodes = wm_langs
self._wikimediaLanguageCodes = wm_langs
end
end
Line 717: Line 785:


function Language:makeWikipediaLink()
function Language:makeWikipediaLink()
return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]"
return make_link(self, "w:" .. self:getWikipediaArticle(), self:getCanonicalName())
end
end


Line 738: Line 806:
local codes = self:getScriptCodes()
local codes = self:getScriptCodes()
if codes[1] == "All" then
if codes[1] == "All" then
scripts = load_data("Module:scripts/data")
scripts = load_data(scripts_data_module)
else
else
scripts = {}
scripts = {}
Line 809: Line 877:
text = get_plaintext(text)
text = get_plaintext(text)


-- Remove all spaces and any ASCII punctuation. Some non-ASCII punctuation is script-specific, so can't be removed.
text = ugsub(text, "[%s!\"#%%&'()*,%-./:;?@[\\%]_{}]+", "")
if #text == 0 then
if #text == 0 then
return get_script("None")
return get_script("None")
Line 815: Line 885:
-- Try to match every script against the text,
-- Try to match every script against the text,
-- and return the one with the most matching characters.
-- and return the one with the most matching characters.
local bestscript
local bestcount, bestscript, length = 0
local bestcount, length = 0, 0
for i = 1, codes_len do
for i = 1, codes_len do
local sc = codes[i]
local sc = codes[i]
Line 906: Line 975:
end
end


--[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==]
do
function Language:inFamily(...)
local function check_family(self, family)
--check_object("family", nil, ...)
for _, family in ipairs{...} do
if type(family) == "table" then
if type(family) == "table" then
family = family:getCode()
family = family:getCode()
end
end
local self_family_code = self:getFamilyCode()
if self:getFamilyCode() == family then
if self_family_code == nil then
return false
elseif self_family_code == family then
return true
return true
end
end
Line 923: Line 987:
return true
return true
-- If the family isn't a real family (e.g. creoles) check any ancestors.
-- If the family isn't a real family (e.g. creoles) check any ancestors.
elseif self_family:getFamilyCode() == "qfa-not" then
elseif self_family:inFamily("qfa-not") then
local ancestors = self:getAncestors()
local ancestors = self:getAncestors()
for _, ancestor in ipairs(ancestors) do
for _, ancestor in ipairs(ancestors) do
Line 932: Line 996:
end
end
end
end
return false
 
end
--[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==]
function Language:inFamily(...)
if self:getFamilyCode() == nil then
return false
end
return check_inputs(self, check_family, false, ...)
end
end


function Language:getParent()
function Language:getParent()
Line 982: Line 1,053:
end
end


function Language:hasParent(...)
do
--check_object("language", nil, ...)
local function check_lang(self, lang)
for _, otherlang in ipairs{...} do
for _, parent in ipairs(self:getParentChain()) do
for _, parent in ipairs(self:getParentChain()) do
if (type(otherlang) == "string" and otherlang or otherlang:getCode()) == parent:getCode() then
if (type(lang) == "string" and lang or lang:getCode()) == parent:getCode() then
return true
return true
end
end
end
end
end
end
return false
 
function Language:hasParent(...)
return check_inputs(self, check_lang, false, ...)
end
end
end


Line 1,119: Line 1,192:
--[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==]
--[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==]
function Language:hasAncestor(...)
function Language:hasAncestor(...)
--check_object("language", nil, ...)
local function iterateOverAncestorTree(node, func, parent_check)
local function iterateOverAncestorTree(node, func, parent_check)
local ancestors = node:getAncestors()
local ancestors = node:getAncestors()
Line 1,184: Line 1,255:
end
end


function Language:getAncestorChain()
do
local function construct_node(lang, memo)
local branch, ancestors = {lang = lang:getCode()}
memo[lang:getCode()] = branch
for _, ancestor in ipairs(lang:getAncestors()) do
if ancestors == nil then
ancestors = {}
end
insert(ancestors, memo[ancestor:getCode()] or construct_node(ancestor, memo))
end
branch.ancestors = ancestors
return branch
end
 
function Language:getAncestorChain()
local chain = self._ancestorChain
if chain == nil then
chain = construct_node(self, {})
self._ancestorChain = chain
end
return chain
end
end
 
function Language:getAncestorChainOld()
local chain = self._ancestorChain
local chain = self._ancestorChain
if chain == nil then
if chain == nil then
Line 1,192: Line 1,287:
local ancestors = step:getAncestors()
local ancestors = step:getAncestors()
step = #ancestors == 1 and ancestors[1] or nil
step = #ancestors == 1 and ancestors[1] or nil
if not step then break end
if not step then
insert(chain, 1, step)
break
end
insert(chain, step)
end
end
self._ancestorChain = chain
self._ancestorChain = chain
Line 1,260: Line 1,357:
end
end


function Language:hasDescendant(...)
do
for _, lang in ipairs{...} do
local function check_lang(self, lang)
if type(lang) == "string" then
if type(lang) == "string" then
lang = get_by_code(lang, nil, true)
lang = get_by_code(lang, nil, true)
Line 1,269: Line 1,366:
end
end
end
end
return false
 
function Language:hasDescendant(...)
return check_inputs(self, check_lang, false, ...)
end
end
end


local function fetch_children(self, fmt)
local function fetch_children(self, fmt)
local m_etym_data = require("Module:etymology languages/data")
local m_etym_data = require(etymology_languages_data_module)
local self_code, children = self._code, {}
local self_code, children = self._code, {}
for code, lang in pairs(m_etym_data) do
for code, lang in pairs(m_etym_data) do
Line 1,340: Line 1,440:
if name == nil then
if name == nil then
name = self:getCanonicalName()
name = self:getCanonicalName()
-- If a substrate, omit any leading article.
if self:getFamilyCode() == "qfa-sub" then
name = name:gsub("^the ", ""):gsub("^a ", "")
end
-- Only add " language" if a full language.
-- Only add " language" if a full language.
if self:hasType("full") then
if self:hasType("full") then
Line 1,357: Line 1,461:
--[==[Creates a link to the category; the link text is the canonical name.]==]
--[==[Creates a link to the category; the link text is the canonical name.]==]
function Language:makeCategoryLink()
function Language:makeCategoryLink()
return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
return make_link(self, ":Category:" .. self:getCategoryName(), self:getDisplayForm())
end
end


Line 1,409: Line 1,513:


-- Check if the text is a listed unsupported title.
-- Check if the text is a listed unsupported title.
local unsupportedTitles = load_data("Module:links/data").unsupported_titles
local unsupportedTitles = load_data(links_data_module).unsupported_titles
if unsupportedTitles[text] then
if unsupportedTitles[text] then
return "Unsupported titles/" .. unsupportedTitles[text], nil, {}
return "Unsupported titles/" .. unsupportedTitles[text], nil, {}
Line 1,418: Line 1,522:
local fail, cats
local fail, cats
text = normalize(text, sc)
text = normalize(text, sc)
text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._data.entry_name, "makeEntryName")
text, fail, cats = iterateSectionSubstitutions(self, text, sc, nil, nil, self._data.entry_name, "makeEntryName")


text = umatch(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
text = umatch(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text
Line 1,425: Line 1,529:
-- Escape unsupported characters so they can be used in titles. ` is used as a delimiter for this, so a raw use of it in an unsupported title is also escaped here to prevent interference; this is only done with unsupported titles, though, so inclusion won't in itself mean a title is treated as unsupported (which is why it's excluded from the earlier test).
-- Escape unsupported characters so they can be used in titles. ` is used as a delimiter for this, so a raw use of it in an unsupported title is also escaped here to prevent interference; this is only done with unsupported titles, though, so inclusion won't in itself mean a title is treated as unsupported (which is why it's excluded from the earlier test).
if unsupported then
if unsupported then
local unsupported_characters = load_data("Module:links/data").unsupported_characters
local unsupported_characters = load_data(links_data_module).unsupported_characters
text = text:gsub("[#<>%[%]_`{|}\239]\191?\189?", unsupported_characters)
text = text:gsub("[#<>%[%]_`{|}\239]\191?\189?", unsupported_characters)
:gsub("%f[^%z/]%.%.?%f[%z/]", function(m)
:gsub("%f[^%z/]%.%.?%f[%z/]", function(m)
Line 1,446: Line 1,550:
end
end
sc = checkScript(text, self, sc)
sc = checkScript(text, self, sc)
return require("Module:" .. self._data.generate_forms).generateForms(text, self._code, sc:getCode())
return require("Module:" .. self._data.generate_forms).generateForms(text, self, sc)
end
end


Line 1,454: Line 1,558:
if (not text) or text == "" then
if (not text) or text == "" then
return text, nil, {}
return text, nil, {}
end
if match(text, "<[^<>]+>") then
track("track HTML tag")
end
end
-- Remove directional characters, soft hyphens, strip markers and HTML tags.
-- Remove directional characters, soft hyphens, strip markers and HTML tags.
Line 1,484: Line 1,591:


local sort_key = self._data.sort_key
local sort_key = self._data.sort_key
text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, sort_key, "makeSortKey")
text, fail, cats = iterateSectionSubstitutions(self, text, sc, nil, nil, sort_key, "makeSortKey")


if not sc:sortByScraping() then
if not sc:sortByScraping() then
Line 1,513: Line 1,620:
local fail, cats
local fail, cats
text = normalize(text, sc)
text = normalize(text, sc)
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, self._data.display_text, "makeDisplayText")
text, fail, cats, subbedChars = iterateSectionSubstitutions(self, text, sc, subbedChars, keepCarets, self._data.display_text, "makeDisplayText")


text = removeCarets(text, sc)
text = removeCarets(text, sc)
Line 1,528: Line 1,635:
return gsub(m1, "\244[\128-\191]*", "")
return gsub(m1, "\244[\128-\191]*", "")
end)
end)
if not prefix or prefix == text then
-- Check if the prefix is an interwiki, though ignore capitalised Wiktionary:, which is a namespace.
break
if not prefix or prefix == text or prefix == "Wiktionary"
end
or not (load_data("Module:data/interwikis")[ulower(prefix)] or prefix == "") then
local lower_prefix = ulower(prefix)
if not (load_data("Module:data/interwikis")[lower_prefix] or prefix == "") then
break
break
end
end
Line 1,546: Line 1,651:
:gsub("\4", ":")
:gsub("\4", ":")
end
end
 
if not self:hasType("conlang") then
text = gsub(text,"^%*", "")
end
text = gsub(text,"^%*%*", "*")
return text, fail, cats, subbedChars
return text, fail, cats, subbedChars
end
end
Line 1,590: Line 1,689:
if not (sc:isTransliterated() or module_override) then
if not (sc:isTransliterated() or module_override) then
-- temporary tracking to see if/when this gets triggered
-- temporary tracking to see if/when this gets triggered
track("non-transliterable")
track("non-transliterable/" .. self._code)
track("non-transliterable/" .. sc:getCode())
track("non-transliterable/" .. sc:getCode() .. "/" .. self._code)
return nil, true, {}
return nil, true, {}
end
end
Line 1,595: Line 1,698:
-- Remove any strip markers.
-- Remove any strip markers.
text = unstrip(text)
text = unstrip(text)
-- Do not process the formatting into PUA characters for certain languages.
local processed = load_data(languages_data_module).substitution[self._code] ~= "none"


-- Get the display text with the keepCarets flag set.
-- Get the display text with the keepCarets flag set.
local fail, cats, subbedChars
local fail, cats, subbedChars
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true)
if processed then
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true)
end


-- Transliterate (using the module override if applicable).
-- Transliterate (using the module override if applicable).
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._data.translit, "tr")
text, fail, cats, subbedChars = iterateSectionSubstitutions(self, text, sc, subbedChars, true, module_override or self._data.translit, "tr")


if not text then
if not text then
Line 1,618: Line 1,726:
end
end


text = escape_risky_characters(text)
if processed then
text = undoTempSubstitutions(text, subbedChars)
text = escape_risky_characters(text)
text = undoTempSubstitutions(text, subbedChars)
end


-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret).
-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret).
Line 1,626: Line 1,736:
return m1 .. uupper(m2)
return m1 .. uupper(m2)
end)
end)
end
-- Track module overrides.
if module_override ~= nil then
track("module_override")
end
end


Line 1,706: Line 1,821:
function export.getDataModuleName(code)
function export.getDataModuleName(code)
local letter = match(code, "^(%l)%l%l?$")
local letter = match(code, "^(%l)%l%l?$")
return letter == nil and "languages/data/exceptional" or
return "Module:" .. (
letter == nil and "languages/data/exceptional" or
#code == 2 and "languages/data/2" or
#code == 2 and "languages/data/2" or
"languages/data/3/" .. letter
"languages/data/3/" .. letter
)
end
end
get_data_module_name = export.getDataModuleName
get_data_module_name = export.getDataModuleName
Line 1,726: Line 1,843:
varieties = "unique",
varieties = "unique",
wikipedia_article = "unique",
wikipedia_article = "unique",
wikimedia_codes = "unique"
}
}
 
local function __index(self, k)
local function __index(self, k)
local stack, key_type = getmetatable(self), key_types[k]
local stack, key_type = getmetatable(self), key_types[k]
Line 1,766: Line 1,884:
end
end
end
end
 
local function __newindex()
local function __newindex()
error("table is read-only")
error("table is read-only")
end
end
 
local function __pairs(self)
local function __pairs(self)
-- Iterate down the stack, caching keys to avoid duplicate returns.
-- Iterate down the stack, caching keys to avoid duplicate returns.
Line 1,802: Line 1,920:
end
end
end
end
 
local __ipairs = require(table_module).indexIpairs
local __ipairs = require(table_module).indexIpairs
 
function make_stack(data)
function make_stack(data)
local stack = {
local stack = {
Line 1,817: Line 1,935:
return setmetatable({}, stack), stack
return setmetatable({}, stack), stack
end
end
 
return make_stack(data)
return make_stack(data)
end
end
 
local function get_stack(data)
local function get_stack(data)
local stack = getmetatable(data)
local stack = getmetatable(data)
return stack and type(stack) == "table" and stack[make_stack] and stack or nil
return stack and type(stack) == "table" and stack[make_stack] and stack or nil
end
end
 
--[==[
--[==[
<span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
<span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
Line 1,860: Line 1,978:
return data
return data
end
end
 
function Language:loadInExtraData()
function Language:loadInExtraData()
-- Only full languages have extra data.
-- Only full languages have extra data.
Line 1,880: Line 1,998:
local modulename = get_extra_data_module_name(code)
local modulename = get_extra_data_module_name(code)
-- No data cached as false.
-- No data cached as false.
stack[0] = modulename and load_data("Module:" .. modulename)[code] or false
stack[0] = modulename and load_data(modulename)[code] or false
end
end
 
--[==[Returns the name of the module containing the language's data. Currently, this is always [[Module:scripts/data]].]==]
function Language:getDataModuleName()
local name = self._dataModuleName
if name == nil then
name = self:hasType("etymology-only") and etymology_languages_data_module or
get_data_module_name(self._mainCode or self._code)
self._dataModuleName = name
end
return name
end
 
--[==[Returns the name of the module containing the language's data. Currently, this is always [[Module:scripts/data]].]==]
function Language:getExtraDataModuleName()
local name = self._extraDataModuleName
if name == nil then
name = not self:hasType("etymology-only") and get_extra_data_module_name(self._mainCode or self._code) or false
self._extraDataModuleName = name
end
return name or nil
end
 
function export.makeObject(code, data, dontCanonicalizeAliases)
function export.makeObject(code, data, dontCanonicalizeAliases)
local data_type = type(data)
local data_type = type(data)
Line 1,907: Line 2,046:
lang._mainCode = code
lang._mainCode = code
end
end
 
local parent_data = parent._data
local parent_data = parent._data
if parent_data == nil then
if parent_data == nil then
Line 1,924: Line 2,063:
end
end
lang._data = data
lang._data = data
 
return setmetatable(lang, parent)
return setmetatable(lang, parent)
end
end
Line 1,933: Line 2,072:
function export.getByCode(code, paramForError, allowEtymLang, allowFamily)
function export.getByCode(code, paramForError, allowEtymLang, allowFamily)
-- Track uses of paramForError, ultimately so it can be removed, as error-handling should be done by [[Module:parameters]], not here.
-- Track uses of paramForError, ultimately so it can be removed, as error-handling should be done by [[Module:parameters]], not here.
 
if paramForError ~= nil then
track("paramForError")
end
if type(code) ~= "string" then
if type(code) ~= "string" then
local typ
local typ
Line 1,948: Line 2,089:
end
end


local m_data = load_data("Module:languages/data")
local m_data = load_data(languages_data_module)
if m_data.aliases[code] or m_data.track[code] then
track(code)
end


local norm_code = normalize_code(code)
local norm_code = normalize_code(code)
local modulename = get_data_module_name(norm_code)
 
-- If modulename is nil, the code is invalid.
if modulename == nil then
return nil
end
-- Get the data, checking for etymology-only languages if allowEtymLang is set.
-- Get the data, checking for etymology-only languages if allowEtymLang is set.
local data = load_data("Module:" .. modulename)[norm_code] or
local data = load_data(get_data_module_name(norm_code))[norm_code] or
allowEtymLang and load_data("Module:etymology languages/data")[norm_code]
allowEtymLang and load_data(etymology_languages_data_module)[norm_code]
 
-- If no data was found and allowFamily is set, check the family data. If the main family data was found, make the object with [[Module:families]] instead, as family objects have different methods. However, if it's an etymology-only family, use make_object in this module (which handles object inheritance), and the family-specific methods will be inherited from the parent object.
-- If no data was found and allowFamily is set, check the family data. If the main family data was found, make the object with [[Module:families]] instead, as family objects have different methods. However, if it's an etymology-only family, use make_object in this module (which handles object inheritance), and the family-specific methods will be inherited from the parent object.
if data == nil and allowFamily then
if data == nil and allowFamily then
data = load_data("Module:families/data")[norm_code]
data = load_data("Module:families/data")[norm_code]
if data ~= nil then
if data ~= nil then
return make_family_object(norm_code, data)
if data.parent == nil then
elseif allowEtymLang then
return make_family_object(norm_code, data)
data = load_data("Module:families/data/etymology")[norm_code]
elseif not allowEtymLang then
data = nil
end
end
end
end
end
Line 2,013: Line 2,154:
end
end


--[==[Used by [[Module:languages/data/2]] (et al.) and [[Module:etymology languages/data]], [[Module:families/data]], [[Module:families/data/etymology]], [[Module:scripts/data]] and [[Module:writing systems/data]] to finalize the data into the format that is actually returned.]==]
--[==[Used by [[Module:languages/data/2]] (et al.) and [[Module:etymology languages/data]], [[Module:families/data]], [[Module:scripts/data]] and [[Module:writing systems/data]] to finalize the data into the format that is actually returned.]==]
function export.finalizeData(data, main_type, variety)
function export.finalizeData(data, main_type, variety)
local fields = {"type"}
local fields = {"type"}
Line 2,031: Line 2,172:
entity.parent, entity[3], entity.family = entity[3], entity.family
entity.parent, entity[3], entity.family = entity[3], entity.family
-- Give the type "regular" iff not a variety and no other types are assigned.
-- Give the type "regular" iff not a variety and no other types are assigned.
elseif not entity.type then
elseif not (entity.type or entity.parent) then
entity.type = "regular"
entity.type = "regular"
end
end

Navigation menu