48,355
edits
No edit summary |
No edit summary |
||
| Line 74: | Line 74: | ||
their "bare display" form by taking the right part of two-part links and removing double brackets), but when this | their "bare display" form by taking the right part of two-part links and removing double brackets), but when this | ||
happens is unclear to me [FIXME]. Some languages have a chop-up-and-paste-together scheme that sends parts of the | happens is unclear to me [FIXME]. Some languages have a chop-up-and-paste-together scheme that sends parts of the | ||
text through the transliterate mechanism, and for others (those listed in { | text through the transliterate mechanism, and for others (those listed with "cont" in {substition} in | ||
[[Module:languages/data]]) they receive the full input text, but preprocessed in certain ways. (The wisdom of this is | [[Module:languages/data]]) they receive the full input text, but preprocessed in certain ways. (The wisdom of this is | ||
still unclear to me.) | still unclear to me.) | ||
| Line 110: | Line 110: | ||
local export = {} | local export = {} | ||
local debug_track_module = "Module:debug/track" | |||
local etymology_languages_data_module = "Module:etymology languages/data" | |||
local families_module = "Module:families" | local families_module = "Module:families" | ||
local json_module = "Module:JSON" | local json_module = "Module:JSON" | ||
local language_like_module = "Module:language-like" | local language_like_module = "Module:language-like" | ||
local languages_data_module = "Module:languages/data" | |||
local languages_data_patterns_module = "Module:languages/data/patterns" | |||
local links_data_module = "Module:links/data" | |||
local load_module = "Module:load" | local load_module = "Module:load" | ||
local patterns_module = "Module:patterns" | |||
local scripts_module = "Module:scripts" | local scripts_module = "Module:scripts" | ||
local scripts_data_module = "Module:scripts/data" | |||
local string_encode_entities_module = "Module:string/encode entities" | local string_encode_entities_module = "Module:string/encode entities" | ||
local string_utilities_module = "Module:string utilities" | local string_utilities_module = "Module:string utilities" | ||
| Line 137: | Line 144: | ||
local insert = table.insert | local insert = table.insert | ||
local ipairs = ipairs | local ipairs = ipairs | ||
local is_known_language_tag = mw.language.isKnownLanguageTag | |||
local make_object -- Defined below. | local make_object -- Defined below. | ||
local match = string.match | local match = string.match | ||
| Line 145: | Line 153: | ||
local select = select | local select = select | ||
local setmetatable = setmetatable | local setmetatable = setmetatable | ||
local sub = string.sub | |||
local type = type | local type = type | ||
local unstrip = mw.text.unstrip | local unstrip = mw.text.unstrip | ||
| Line 157: | Line 166: | ||
check_object = require(utilities_module).check_object | check_object = require(utilities_module).check_object | ||
return check_object(...) | return check_object(...) | ||
end | |||
local function debug_track(...) | |||
debug_track = require(debug_track_module) | |||
return debug_track(...) | |||
end | end | ||
| Line 225: | Line 239: | ||
local function pattern_escape(...) | local function pattern_escape(...) | ||
pattern_escape = require( | pattern_escape = require(patterns_module).pattern_escape | ||
return pattern_escape(...) | return pattern_escape(...) | ||
end | end | ||
| Line 235: | Line 249: | ||
local function replacement_escape(...) | local function replacement_escape(...) | ||
replacement_escape = require( | replacement_escape = require(patterns_module).replacement_escape | ||
return replacement_escape(...) | return replacement_escape(...) | ||
end | end | ||
| Line 288: | Line 302: | ||
return uupper(...) | return uupper(...) | ||
end | end | ||
local function track(page) | |||
debug_track("languages/" .. page) | |||
return true | |||
end | |||
local function normalize_code(code) | local function normalize_code(code) | ||
return load_data( | return load_data(languages_data_module).aliases[code] or code | ||
end | |||
local function check_inputs(self, check, default, ...) | |||
local n = select("#", ...) | |||
if n == 0 then | |||
return false | |||
end | |||
local ret = check(self, (...)) | |||
if ret ~= nil then | |||
return ret | |||
elseif n > 1 then | |||
local inputs = {...} | |||
for i = 2, n do | |||
ret = check(self, inputs[i]) | |||
if ret ~= nil then | |||
return ret | |||
end | |||
end | |||
end | |||
return default | |||
end | |||
local function make_link(self, target, display) | |||
local prefix, main | |||
if self:getFamilyCode() == "qfa-sub" then | |||
prefix, main = display:match("^(the )(.*)") | |||
if not prefix then | |||
prefix, main = display:match("^(a )(.*)") | |||
end | |||
end | |||
return (prefix or "") .. "[[" .. target .. "|" .. (main or display) .. "]]" | |||
end | end | ||
| Line 305: | Line 355: | ||
local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim) | local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim) | ||
-- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned. | -- Clone so that we don't insert any extra patterns into the table in package.loaded. For some reason, using require seems to keep memory use down; probably because the table is always cloned. | ||
local patterns = shallow_copy(require( | local patterns = shallow_copy(require(languages_data_patterns_module)) | ||
if keepCarets then | if keepCarets then | ||
insert(patterns, "((\\+)%^)") | insert(patterns, "((\\+)%^)") | ||
| Line 362: | Line 412: | ||
local function checkNoEntities(self, text) | local function checkNoEntities(self, text) | ||
local textNoEnc = decode_entities(text) | local textNoEnc = decode_entities(text) | ||
if textNoEnc ~= text and load_data( | if textNoEnc ~= text and load_data(links_data_module).unsupported_titles[text] then | ||
return text | return text | ||
else | else | ||
| Line 373: | Line 423: | ||
if not check_object("script", true, sc) or sc:getCode() == "None" then | if not check_object("script", true, sc) or sc:getCode() == "None" then | ||
return self:findBestScript(text) | return self:findBestScript(text) | ||
end | end | ||
return sc | |||
end | end | ||
| Line 435: | Line 484: | ||
local module = safe_require("Module:" .. substitution_data) | local module = safe_require("Module:" .. substitution_data) | ||
if module then | if module then | ||
-- TODO: translit functions should take objects, not codes. | |||
-- TODO: translit functions should be called with form NFD. | |||
if function_name == "tr" then | if function_name == "tr" then | ||
text, fail, cats = module[function_name](text, self | text, fail, cats = module[function_name](text, self._code, sc:getCode()) | ||
else | else | ||
text, fail, cats = module[function_name](sc:toFixedNFD(text), self: | text, fail, cats = module[function_name](sc:toFixedNFD(text), self, sc) | ||
end | |||
-- TODO: get rid of the `fail` and `cats` return values. | |||
if fail ~= nil then | |||
track("fail") | |||
track("fail/" .. self._code) | |||
end | |||
if cats ~= nil then | |||
track("cats") | |||
track("cats/" .. self._code) | |||
end | end | ||
else | else | ||
| Line 454: | Line 514: | ||
-- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them. | -- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them. | ||
local function iterateSectionSubstitutions(text, subbedChars, keepCarets | local function iterateSectionSubstitutions(self, text, sc, subbedChars, keepCarets, substitution_data, function_name) | ||
local fail, cats, sections = nil, {} | local fail, cats, sections = nil, {} | ||
-- See [[Module:languages/data]]. | -- See [[Module:languages/data]]. | ||
if not find(text, "\244") or load_data( | if not find(text, "\244") or (load_data(languages_data_module).substitution[self._code] == "cont") then | ||
sections = {text} | sections = {text} | ||
else | else | ||
| Line 469: | Line 529: | ||
if sub and subbedChars then | if sub and subbedChars then | ||
local noSub | local noSub | ||
for _, pattern in ipairs(require( | for _, pattern in ipairs(require(languages_data_patterns_module)) do | ||
if match(section, pattern .. "%z?") then | if match(section, pattern .. "%z?") then | ||
noSub = true | noSub = true | ||
| Line 557: | Line 617: | ||
-- Add article and " substrate" to substrates that lack them. | -- Add article and " substrate" to substrates that lack them. | ||
if self:getFamilyCode() == "qfa-sub" then | if self:getFamilyCode() == "qfa-sub" then | ||
if not ( | if not (sub(form, 1, 4) == "the " or sub(form, 1, 2) == "a ") then | ||
form = "a " .. form | form = "a " .. form | ||
end | end | ||
if not match(form, "[Ss]ubstrate") then | if not match(form, " [Ss]ubstrate") then | ||
form = form .. " substrate" | form = form .. " substrate" | ||
end | end | ||
| Line 668: | Line 728: | ||
Language.hasType = require(language_like_module).hasType | Language.hasType = require(language_like_module).hasType | ||
return self:hasType(...) | return self:hasType(...) | ||
end | end | ||
| Line 693: | Line 749: | ||
if wm_langs == nil then | if wm_langs == nil then | ||
wm_langs = self._data.wikimedia_codes | wm_langs = self._data.wikimedia_codes | ||
wm_langs = | if wm_langs then | ||
wm_langs = split(wm_langs, ",", true, true) | |||
else | |||
local code = self._code | |||
if is_known_language_tag(code) then | |||
wm_langs = {code} | |||
else | |||
-- Inherit, but only if no codes are specified in the data *and* | |||
-- the language code isn't a valid Wikimedia language code. | |||
local parent = self:getParent() | |||
wm_langs = parent and parent:getWikimediaLanguageCodes() or {} | |||
end | |||
end | |||
self._wikimediaLanguageCodes = wm_langs | self._wikimediaLanguageCodes = wm_langs | ||
end | end | ||
| Line 717: | Line 785: | ||
function Language:makeWikipediaLink() | function Language:makeWikipediaLink() | ||
return " | return make_link(self, "w:" .. self:getWikipediaArticle(), self:getCanonicalName()) | ||
end | end | ||
| Line 738: | Line 806: | ||
local codes = self:getScriptCodes() | local codes = self:getScriptCodes() | ||
if codes[1] == "All" then | if codes[1] == "All" then | ||
scripts = load_data( | scripts = load_data(scripts_data_module) | ||
else | else | ||
scripts = {} | scripts = {} | ||
| Line 809: | Line 877: | ||
text = get_plaintext(text) | text = get_plaintext(text) | ||
-- Remove all spaces and any ASCII punctuation. Some non-ASCII punctuation is script-specific, so can't be removed. | |||
text = ugsub(text, "[%s!\"#%%&'()*,%-./:;?@[\\%]_{}]+", "") | |||
if #text == 0 then | if #text == 0 then | ||
return get_script("None") | return get_script("None") | ||
| Line 815: | Line 885: | ||
-- Try to match every script against the text, | -- Try to match every script against the text, | ||
-- and return the one with the most matching characters. | -- and return the one with the most matching characters. | ||
local bestscript | local bestcount, bestscript, length = 0 | ||
for i = 1, codes_len do | for i = 1, codes_len do | ||
local sc = codes[i] | local sc = codes[i] | ||
| Line 906: | Line 975: | ||
end | end | ||
do | |||
function | local function check_family(self, family) | ||
if type(family) == "table" then | if type(family) == "table" then | ||
family = family:getCode() | family = family:getCode() | ||
end | end | ||
if self:getFamilyCode() == family then | |||
return true | return true | ||
end | end | ||
| Line 923: | Line 987: | ||
return true | return true | ||
-- If the family isn't a real family (e.g. creoles) check any ancestors. | -- If the family isn't a real family (e.g. creoles) check any ancestors. | ||
elseif self_family: | elseif self_family:inFamily("qfa-not") then | ||
local ancestors = self:getAncestors() | local ancestors = self:getAncestors() | ||
for _, ancestor in ipairs(ancestors) do | for _, ancestor in ipairs(ancestors) do | ||
| Line 932: | Line 996: | ||
end | end | ||
end | end | ||
return false | |||
end | --[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==] | ||
function Language:inFamily(...) | |||
if self:getFamilyCode() == nil then | |||
return false | |||
end | |||
return check_inputs(self, check_family, false, ...) | |||
end | |||
end | |||
function Language:getParent() | function Language:getParent() | ||
| Line 982: | Line 1,053: | ||
end | end | ||
do | |||
local function check_lang(self, lang) | |||
for _, parent in ipairs(self:getParentChain()) do | for _, parent in ipairs(self:getParentChain()) do | ||
if (type( | if (type(lang) == "string" and lang or lang:getCode()) == parent:getCode() then | ||
return true | return true | ||
end | end | ||
end | end | ||
end | end | ||
return false | |||
function Language:hasParent(...) | |||
return check_inputs(self, check_lang, false, ...) | |||
end | |||
end | end | ||
| Line 1,119: | Line 1,192: | ||
--[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==] | --[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==] | ||
function Language:hasAncestor(...) | function Language:hasAncestor(...) | ||
local function iterateOverAncestorTree(node, func, parent_check) | local function iterateOverAncestorTree(node, func, parent_check) | ||
local ancestors = node:getAncestors() | local ancestors = node:getAncestors() | ||
| Line 1,184: | Line 1,255: | ||
end | end | ||
function Language:getAncestorChain() | do | ||
local function construct_node(lang, memo) | |||
local branch, ancestors = {lang = lang:getCode()} | |||
memo[lang:getCode()] = branch | |||
for _, ancestor in ipairs(lang:getAncestors()) do | |||
if ancestors == nil then | |||
ancestors = {} | |||
end | |||
insert(ancestors, memo[ancestor:getCode()] or construct_node(ancestor, memo)) | |||
end | |||
branch.ancestors = ancestors | |||
return branch | |||
end | |||
function Language:getAncestorChain() | |||
local chain = self._ancestorChain | |||
if chain == nil then | |||
chain = construct_node(self, {}) | |||
self._ancestorChain = chain | |||
end | |||
return chain | |||
end | |||
end | |||
function Language:getAncestorChainOld() | |||
local chain = self._ancestorChain | local chain = self._ancestorChain | ||
if chain == nil then | if chain == nil then | ||
| Line 1,192: | Line 1,287: | ||
local ancestors = step:getAncestors() | local ancestors = step:getAncestors() | ||
step = #ancestors == 1 and ancestors[1] or nil | step = #ancestors == 1 and ancestors[1] or nil | ||
if not step then break end | if not step then | ||
insert(chain | break | ||
end | |||
insert(chain, step) | |||
end | end | ||
self._ancestorChain = chain | self._ancestorChain = chain | ||
| Line 1,260: | Line 1,357: | ||
end | end | ||
function | do | ||
local function check_lang(self, lang) | |||
if type(lang) == "string" then | if type(lang) == "string" then | ||
lang = get_by_code(lang, nil, true) | lang = get_by_code(lang, nil, true) | ||
| Line 1,269: | Line 1,366: | ||
end | end | ||
end | end | ||
return false | |||
function Language:hasDescendant(...) | |||
return check_inputs(self, check_lang, false, ...) | |||
end | |||
end | end | ||
local function fetch_children(self, fmt) | local function fetch_children(self, fmt) | ||
local m_etym_data = require( | local m_etym_data = require(etymology_languages_data_module) | ||
local self_code, children = self._code, {} | local self_code, children = self._code, {} | ||
for code, lang in pairs(m_etym_data) do | for code, lang in pairs(m_etym_data) do | ||
| Line 1,340: | Line 1,440: | ||
if name == nil then | if name == nil then | ||
name = self:getCanonicalName() | name = self:getCanonicalName() | ||
-- If a substrate, omit any leading article. | |||
if self:getFamilyCode() == "qfa-sub" then | |||
name = name:gsub("^the ", ""):gsub("^a ", "") | |||
end | |||
-- Only add " language" if a full language. | -- Only add " language" if a full language. | ||
if self:hasType("full") then | if self:hasType("full") then | ||
| Line 1,357: | Line 1,461: | ||
--[==[Creates a link to the category; the link text is the canonical name.]==] | --[==[Creates a link to the category; the link text is the canonical name.]==] | ||
function Language:makeCategoryLink() | function Language:makeCategoryLink() | ||
return " | return make_link(self, ":Category:" .. self:getCategoryName(), self:getDisplayForm()) | ||
end | end | ||
| Line 1,409: | Line 1,513: | ||
-- Check if the text is a listed unsupported title. | -- Check if the text is a listed unsupported title. | ||
local unsupportedTitles = load_data( | local unsupportedTitles = load_data(links_data_module).unsupported_titles | ||
if unsupportedTitles[text] then | if unsupportedTitles[text] then | ||
return "Unsupported titles/" .. unsupportedTitles[text], nil, {} | return "Unsupported titles/" .. unsupportedTitles[text], nil, {} | ||
| Line 1,418: | Line 1,522: | ||
local fail, cats | local fail, cats | ||
text = normalize(text, sc) | text = normalize(text, sc) | ||
text, fail, cats = iterateSectionSubstitutions(text, nil, nil | text, fail, cats = iterateSectionSubstitutions(self, text, sc, nil, nil, self._data.entry_name, "makeEntryName") | ||
text = umatch(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text | text = umatch(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text | ||
| Line 1,425: | Line 1,529: | ||
-- Escape unsupported characters so they can be used in titles. ` is used as a delimiter for this, so a raw use of it in an unsupported title is also escaped here to prevent interference; this is only done with unsupported titles, though, so inclusion won't in itself mean a title is treated as unsupported (which is why it's excluded from the earlier test). | -- Escape unsupported characters so they can be used in titles. ` is used as a delimiter for this, so a raw use of it in an unsupported title is also escaped here to prevent interference; this is only done with unsupported titles, though, so inclusion won't in itself mean a title is treated as unsupported (which is why it's excluded from the earlier test). | ||
if unsupported then | if unsupported then | ||
local unsupported_characters = load_data( | local unsupported_characters = load_data(links_data_module).unsupported_characters | ||
text = text:gsub("[#<>%[%]_`{|}\239]\191?\189?", unsupported_characters) | text = text:gsub("[#<>%[%]_`{|}\239]\191?\189?", unsupported_characters) | ||
:gsub("%f[^%z/]%.%.?%f[%z/]", function(m) | :gsub("%f[^%z/]%.%.?%f[%z/]", function(m) | ||
| Line 1,446: | Line 1,550: | ||
end | end | ||
sc = checkScript(text, self, sc) | sc = checkScript(text, self, sc) | ||
return require("Module:" .. self._data.generate_forms).generateForms(text, self | return require("Module:" .. self._data.generate_forms).generateForms(text, self, sc) | ||
end | end | ||
| Line 1,454: | Line 1,558: | ||
if (not text) or text == "" then | if (not text) or text == "" then | ||
return text, nil, {} | return text, nil, {} | ||
end | |||
if match(text, "<[^<>]+>") then | |||
track("track HTML tag") | |||
end | end | ||
-- Remove directional characters, soft hyphens, strip markers and HTML tags. | -- Remove directional characters, soft hyphens, strip markers and HTML tags. | ||
| Line 1,484: | Line 1,591: | ||
local sort_key = self._data.sort_key | local sort_key = self._data.sort_key | ||
text, fail, cats = iterateSectionSubstitutions(text, nil, nil | text, fail, cats = iterateSectionSubstitutions(self, text, sc, nil, nil, sort_key, "makeSortKey") | ||
if not sc:sortByScraping() then | if not sc:sortByScraping() then | ||
| Line 1,513: | Line 1,620: | ||
local fail, cats | local fail, cats | ||
text = normalize(text, sc) | text = normalize(text, sc) | ||
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, keepCarets | text, fail, cats, subbedChars = iterateSectionSubstitutions(self, text, sc, subbedChars, keepCarets, self._data.display_text, "makeDisplayText") | ||
text = removeCarets(text, sc) | text = removeCarets(text, sc) | ||
| Line 1,528: | Line 1,635: | ||
return gsub(m1, "\244[\128-\191]*", "") | return gsub(m1, "\244[\128-\191]*", "") | ||
end) | end) | ||
if not prefix or prefix == text | -- Check if the prefix is an interwiki, though ignore capitalised Wiktionary:, which is a namespace. | ||
if not prefix or prefix == text or prefix == "Wiktionary" | |||
or not (load_data("Module:data/interwikis")[ulower(prefix)] or prefix == "") then | |||
break | break | ||
end | end | ||
| Line 1,546: | Line 1,651: | ||
:gsub("\4", ":") | :gsub("\4", ":") | ||
end | end | ||
return text, fail, cats, subbedChars | return text, fail, cats, subbedChars | ||
end | end | ||
| Line 1,590: | Line 1,689: | ||
if not (sc:isTransliterated() or module_override) then | if not (sc:isTransliterated() or module_override) then | ||
-- temporary tracking to see if/when this gets triggered | -- temporary tracking to see if/when this gets triggered | ||
track("non-transliterable") | |||
track("non-transliterable/" .. self._code) | |||
track("non-transliterable/" .. sc:getCode()) | |||
track("non-transliterable/" .. sc:getCode() .. "/" .. self._code) | |||
return nil, true, {} | return nil, true, {} | ||
end | end | ||
| Line 1,595: | Line 1,698: | ||
-- Remove any strip markers. | -- Remove any strip markers. | ||
text = unstrip(text) | text = unstrip(text) | ||
-- Do not process the formatting into PUA characters for certain languages. | |||
local processed = load_data(languages_data_module).substitution[self._code] ~= "none" | |||
-- Get the display text with the keepCarets flag set. | -- Get the display text with the keepCarets flag set. | ||
local fail, cats, subbedChars | local fail, cats, subbedChars | ||
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true) | if processed then | ||
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true) | |||
end | |||
-- Transliterate (using the module override if applicable). | -- Transliterate (using the module override if applicable). | ||
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true | text, fail, cats, subbedChars = iterateSectionSubstitutions(self, text, sc, subbedChars, true, module_override or self._data.translit, "tr") | ||
if not text then | if not text then | ||
| Line 1,618: | Line 1,726: | ||
end | end | ||
text = escape_risky_characters(text) | if processed then | ||
text = escape_risky_characters(text) | |||
text = undoTempSubstitutions(text, subbedChars) | |||
end | |||
-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret). | -- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret). | ||
| Line 1,626: | Line 1,736: | ||
return m1 .. uupper(m2) | return m1 .. uupper(m2) | ||
end) | end) | ||
end | |||
-- Track module overrides. | |||
if module_override ~= nil then | |||
track("module_override") | |||
end | end | ||
| Line 1,706: | Line 1,821: | ||
function export.getDataModuleName(code) | function export.getDataModuleName(code) | ||
local letter = match(code, "^(%l)%l%l?$") | local letter = match(code, "^(%l)%l%l?$") | ||
return letter == nil and "languages/data/exceptional" or | return "Module:" .. ( | ||
letter == nil and "languages/data/exceptional" or | |||
#code == 2 and "languages/data/2" or | #code == 2 and "languages/data/2" or | ||
"languages/data/3/" .. letter | "languages/data/3/" .. letter | ||
) | |||
end | end | ||
get_data_module_name = export.getDataModuleName | get_data_module_name = export.getDataModuleName | ||
| Line 1,726: | Line 1,843: | ||
varieties = "unique", | varieties = "unique", | ||
wikipedia_article = "unique", | wikipedia_article = "unique", | ||
wikimedia_codes = "unique" | |||
} | } | ||
local function __index(self, k) | local function __index(self, k) | ||
local stack, key_type = getmetatable(self), key_types[k] | local stack, key_type = getmetatable(self), key_types[k] | ||
| Line 1,766: | Line 1,884: | ||
end | end | ||
end | end | ||
local function __newindex() | local function __newindex() | ||
error("table is read-only") | error("table is read-only") | ||
end | end | ||
local function __pairs(self) | local function __pairs(self) | ||
-- Iterate down the stack, caching keys to avoid duplicate returns. | -- Iterate down the stack, caching keys to avoid duplicate returns. | ||
| Line 1,802: | Line 1,920: | ||
end | end | ||
end | end | ||
local __ipairs = require(table_module).indexIpairs | local __ipairs = require(table_module).indexIpairs | ||
function make_stack(data) | function make_stack(data) | ||
local stack = { | local stack = { | ||
| Line 1,817: | Line 1,935: | ||
return setmetatable({}, stack), stack | return setmetatable({}, stack), stack | ||
end | end | ||
return make_stack(data) | return make_stack(data) | ||
end | end | ||
local function get_stack(data) | local function get_stack(data) | ||
local stack = getmetatable(data) | local stack = getmetatable(data) | ||
return stack and type(stack) == "table" and stack[make_stack] and stack or nil | return stack and type(stack) == "table" and stack[make_stack] and stack or nil | ||
end | end | ||
--[==[ | --[==[ | ||
<span style="color: #BA0000">This function is not for use in entries or other content pages.</span> | <span style="color: #BA0000">This function is not for use in entries or other content pages.</span> | ||
| Line 1,860: | Line 1,978: | ||
return data | return data | ||
end | end | ||
function Language:loadInExtraData() | function Language:loadInExtraData() | ||
-- Only full languages have extra data. | -- Only full languages have extra data. | ||
| Line 1,880: | Line 1,998: | ||
local modulename = get_extra_data_module_name(code) | local modulename = get_extra_data_module_name(code) | ||
-- No data cached as false. | -- No data cached as false. | ||
stack[0] = modulename and load_data( | stack[0] = modulename and load_data(modulename)[code] or false | ||
end | end | ||
--[==[Returns the name of the module containing the language's data. Currently, this is always [[Module:scripts/data]].]==] | |||
function Language:getDataModuleName() | |||
local name = self._dataModuleName | |||
if name == nil then | |||
name = self:hasType("etymology-only") and etymology_languages_data_module or | |||
get_data_module_name(self._mainCode or self._code) | |||
self._dataModuleName = name | |||
end | |||
return name | |||
end | |||
--[==[Returns the name of the module containing the language's data. Currently, this is always [[Module:scripts/data]].]==] | |||
function Language:getExtraDataModuleName() | |||
local name = self._extraDataModuleName | |||
if name == nil then | |||
name = not self:hasType("etymology-only") and get_extra_data_module_name(self._mainCode or self._code) or false | |||
self._extraDataModuleName = name | |||
end | |||
return name or nil | |||
end | |||
function export.makeObject(code, data, dontCanonicalizeAliases) | function export.makeObject(code, data, dontCanonicalizeAliases) | ||
local data_type = type(data) | local data_type = type(data) | ||
| Line 1,907: | Line 2,046: | ||
lang._mainCode = code | lang._mainCode = code | ||
end | end | ||
local parent_data = parent._data | local parent_data = parent._data | ||
if parent_data == nil then | if parent_data == nil then | ||
| Line 1,924: | Line 2,063: | ||
end | end | ||
lang._data = data | lang._data = data | ||
return setmetatable(lang, parent) | return setmetatable(lang, parent) | ||
end | end | ||
| Line 1,933: | Line 2,072: | ||
function export.getByCode(code, paramForError, allowEtymLang, allowFamily) | function export.getByCode(code, paramForError, allowEtymLang, allowFamily) | ||
-- Track uses of paramForError, ultimately so it can be removed, as error-handling should be done by [[Module:parameters]], not here. | -- Track uses of paramForError, ultimately so it can be removed, as error-handling should be done by [[Module:parameters]], not here. | ||
if paramForError ~= nil then | |||
track("paramForError") | |||
end | |||
if type(code) ~= "string" then | if type(code) ~= "string" then | ||
local typ | local typ | ||
| Line 1,948: | Line 2,089: | ||
end | end | ||
local m_data = load_data( | local m_data = load_data(languages_data_module) | ||
if m_data.aliases[code] or m_data.track[code] then | |||
track(code) | |||
end | |||
local norm_code = normalize_code(code) | local norm_code = normalize_code(code) | ||
-- Get the data, checking for etymology-only languages if allowEtymLang is set. | -- Get the data, checking for etymology-only languages if allowEtymLang is set. | ||
local data = load_data( | local data = load_data(get_data_module_name(norm_code))[norm_code] or | ||
allowEtymLang and load_data( | allowEtymLang and load_data(etymology_languages_data_module)[norm_code] | ||
-- If no data was found and allowFamily is set, check the family data. If the main family data was found, make the object with [[Module:families]] instead, as family objects have different methods. However, if it's an etymology-only family, use make_object in this module (which handles object inheritance), and the family-specific methods will be inherited from the parent object. | -- If no data was found and allowFamily is set, check the family data. If the main family data was found, make the object with [[Module:families]] instead, as family objects have different methods. However, if it's an etymology-only family, use make_object in this module (which handles object inheritance), and the family-specific methods will be inherited from the parent object. | ||
if data == nil and allowFamily then | if data == nil and allowFamily then | ||
data = load_data("Module:families/data")[norm_code] | data = load_data("Module:families/data")[norm_code] | ||
if data ~= nil then | if data ~= nil then | ||
return make_family_object(norm_code, data) | if data.parent == nil then | ||
return make_family_object(norm_code, data) | |||
elseif not allowEtymLang then | |||
data = nil | |||
end | |||
end | end | ||
end | end | ||
| Line 2,013: | Line 2,154: | ||
end | end | ||
--[==[Used by [[Module:languages/data/2]] (et al.) and [[Module:etymology languages/data]], [[Module:families/data | --[==[Used by [[Module:languages/data/2]] (et al.) and [[Module:etymology languages/data]], [[Module:families/data]], [[Module:scripts/data]] and [[Module:writing systems/data]] to finalize the data into the format that is actually returned.]==] | ||
function export.finalizeData(data, main_type, variety) | function export.finalizeData(data, main_type, variety) | ||
local fields = {"type"} | local fields = {"type"} | ||
| Line 2,031: | Line 2,172: | ||
entity.parent, entity[3], entity.family = entity[3], entity.family | entity.parent, entity[3], entity.family = entity[3], entity.family | ||
-- Give the type "regular" iff not a variety and no other types are assigned. | -- Give the type "regular" iff not a variety and no other types are assigned. | ||
elseif not entity.type then | elseif not (entity.type or entity.parent) then | ||
entity.type = "regular" | entity.type = "regular" | ||
end | end | ||