|
|
Line 1: |
Line 1: |
| local export = {} | | local export = {} |
|
| |
|
| local checkObject = require("Module:utilities").check_object
| | --[=[ |
| | Throw an error for an invalid language code or script code. |
|
| |
|
| local function make_language(code, data, useRequire)
| | `lang_code` (required) is the bad code and can be nil or a non-string. |
| local function conditionalRequire(modulename) | | |
| if useRequire then
| | `param` (required) is the name of the parameter in which the code was contained. It can be a string, a number |
| return require(modulename)
| | (for a numeric param, in which case the param will show up in the error message as an ordinal such as |
| else
| | "first" or "second"), or `true` if no parameter can be clearly identified. |
| return mw.loadData(modulename)
| | |
| end | | `code_desc` (optional) is text describing what the code is; by default, "language code". |
| end | | |
| | `template_text` (optional) is a string specifying the template that generated the error, or a function |
| | to generate this string. If given, it will be displayed in the error message. |
| | |
| | `not_real_lang` (optional), if given, indicates that the code is not in the form of a language code |
| | (e.g. it's a script code). Normally, this function checks for things that could plausibly be a language code: |
| | two or three lowercase letters, two or three groups of three lowercase letters with hyphens between them. |
| | If such a pattern is found, a different error message is displayed (indicating an invalid code) than otherwise |
| | (indicating a missing code). If `not_real_lang` is given, this check is suppressed. |
| | ]=] |
| | |
| | function export.err(lang_code, param, code_desc, template_tag, not_real_lang) |
| | local ordinals = { |
| | "first", "second", "third", "fourth", "fifth", "sixth", |
| | "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth", |
| | "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", |
| | "eighteenth", "nineteenth", "twentieth" |
| | } |
| | |
| | code_desc = code_desc or "language code" |
| | | |
| -- Temporarily convert various formatting characters to PUA to prevent them from being disrupted by the substitution process. | | if not template_tag then |
| local function doTempSubstitutions(text, subbedChars, keepCarets, noTrim)
| | template_tag = "" |
| -- Clone so that we don't insert any extra patterns into the table in package.loaded.
| | else |
| local patterns = require("Module:table").shallowcopy(require("Module:languages/data/patterns")) | | if type(template_tag) ~= "string" then |
| if keepCarets then | | template_tag = template_tag() |
| table.insert(patterns, "((\\+)%^)")
| |
| table.insert(patterns, "((%^))") | |
| end | | end |
| -- Ensure any whitespace at the beginning and end is temp substituted, to prevent it from being accidentally trimmed. We only want to trim any final spaces added during the substitution process (e.g. by a module), which means we only do this during the first round of temp substitutions. | | template_tag = " (Original template: " .. template_tag .. ")" |
| if not noTrim then
| |
| table.insert(patterns, "^([\128-\191\244]*(%s+))")
| |
| table.insert(patterns, "((%s+)[\128-\191\244]*)$")
| |
| end
| |
| -- Pre-substitution, of "[[" and "]]", which makes pattern matching more accurate.
| |
| text = text
| |
| :gsub("%f[%[]%[%[", "\1")
| |
| :gsub("%f[%]]%]%]", "\2")
| |
| local i, pe = #subbedChars, require("Module:utilities").pattern_escape
| |
| for j, pattern in ipairs(patterns) do
| |
| -- Patterns ending in \0 stand are for things like "[[" or "]]"), so the inserted PUA are treated as breaks between terms by modules that scrape info from pages.
| |
| local term_divider
| |
| pattern = pattern:gsub("%z$", function(divider)
| |
| term_divider = divider == "\0"
| |
| return ""
| |
| end)
| |
| text = text:gsub(pattern, function(...)
| |
| local m = {...}
| |
| local m1New = m[1]
| |
| for k = 2, #m do
| |
| local n = i + k - 1
| |
| subbedChars[n] = m[k]
| |
| local byte2 = math.floor(n / 4096) % 64 + (term_divider and 128 or 136)
| |
| local byte3 = math.floor(n / 64) % 64 + 128
| |
| local byte4 = n % 64 + 128
| |
| m1New = m1New:gsub(pe(m[k]), "\244" .. string.char(byte2) .. string.char(byte3) .. string.char(byte4), 1)
| |
| end
| |
| i = i + #m - 1
| |
| return m1New
| |
| end)
| |
| end
| |
| text = text
| |
| :gsub("\1", "%[%[")
| |
| :gsub("\2", "%]%]")
| |
| return text, subbedChars
| |
| end | | end |
|
| | local function err(msg) |
| -- Reinsert any formatting that was temporarily substituted.
| | error(msg .. template_tag, 3) |
| local function undoTempSubstitutions(text, subbedChars) | |
| local pe = require("Module:utilities").pattern_escape | |
| for i = 1, #subbedChars do
| |
| local byte2 = math.floor(i / 4096) % 64 + 128
| |
| local byte3 = math.floor(i / 64) % 64 + 128
| |
| local byte4 = i % 64 + 128
| |
| text = text:gsub("\244[" .. string.char(byte2) .. string.char(byte2+8) .. "]" .. string.char(byte3) .. string.char(byte4), pe(subbedChars[i]))
| |
| end
| |
| text = text
| |
| :gsub("\1", "%[%[")
| |
| :gsub("\2", "%]%]")
| |
| return text
| |
| end | | end |
| | | local param_type = type(param) |
| -- Convert any HTML entities. | | local in_the_param |
| local function noEntities(text) | | if param == true then |
| if text:match("&[^;]+;") then | | -- handled specially below |
| return require("Module:utilities").get_entities(text) | | in_the_param = "" |
| | else |
| | if param_type == "number" then |
| | param = ordinals[param] .. " parameter" |
| | elseif param_type == "string" then |
| | param = 'parameter "' .. param .. '"' |
| else | | else |
| return text | | err("The parameter name is " |
| | .. (param_type == "table" and "a table" or tostring(param)) |
| | .. ", but it should be a number or a string.") |
| end | | end |
| | in_the_param = " in the " .. param |
| end | | end |
| | | |
| -- Check if the raw text is an unsupported title, and if so return that. Otherwise, remove HTML entities. We do the pre-conversion to avoid loading the unsupported title list unnecessarily. | | if not lang_code or lang_code == "" then |
| local function checkNoEntities(text)
| | if param == true then |
| local textNoEnc = noEntities(text)
| | err("The " .. code_desc .. " is missing.") |
| if textNoEnc ~= text and conditionalRequire("Module:links/data").unsupported_titles[text] then | |
| return text
| |
| else | | else |
| return textNoEnc | | err("The " .. param .. " (" .. code_desc .. ") is missing.") |
| end | | end |
| | elseif type(lang_code) ~= "string" then |
| | err("The " .. code_desc .. in_the_param .. " is supposed to be a string but is a " .. type(lang_code) .. ".") |
| | -- Can use string.find because language codes only contain ASCII. |
| | elseif not_real_lang or lang_code:find("^%l%l%l?$") |
| | or lang_code:find("^%l%l%l%-%l%l%l$") |
| | or lang_code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then |
| | err("The " .. code_desc .. " \"" .. lang_code .. "\"" .. in_the_param .. " is not valid.") |
| | else |
| | err("Please specify a " .. code_desc .. in_the_param .. ". The value \"" .. lang_code .. "\" is not valid.") |
| end | | end |
|
| | end |
| -- If no script object is provided (or if it's invalid or None), get one.
| | |
| local function checkScript(text, self, sc)
| | local function do_entry_name_or_sort_key_replacements(text, replacements) |
| if not checkObject("script", true, sc) or sc:getCode() == "None" then | | if replacements.from then |
| return self:findBestScript(text) | | for i, from in ipairs(replacements.from) do |
| else
| | local to = replacements.to[i] or "" |
| return sc
| | text = mw.ustring.gsub(text, from, to) |
| end | | end |
| end | | end |
| | | |
| local function normalize(text, sc) | | if replacements.remove_diacritics then |
| text = sc:fixDiscouragedSequences(text) | | text = mw.ustring.toNFD(text) |
| return sc:toFixedNFD(text) | | text = mw.ustring.gsub(text, |
| | '[' .. replacements.remove_diacritics .. ']', |
| | '') |
| | text = mw.ustring.toNFC(text) |
| end | | end |
| | | |
| -- Split the text into sections, based on the presence of temporarily substituted formatting characters, then iterate over each one to apply substitutions. This avoids putting PUA characters through language-specific modules, which may be unequipped for them. | | return text |
| local function iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, substitution_data, function_name)
| | end |
| local pe = require("Module:utilities").pattern_escape
| | |
| local fail, cats, sections = nil, {}
| | local Language = {} |
| -- See [[Module:languages/data]].
| | |
| if not text:match("\244") or conditionalRequire("Module:languages/data").contiguous_substitution[self:getCode()] then
| | function Language:getCode() |
| sections = {text}
| | return self._code |
| else
| | end |
| sections = mw.text.split(text, "[-]")
| | |
| end
| | |
| for i, section in ipairs(sections) do
| | function Language:getCanonicalName() |
| -- Don't bother processing empty strings or whitespace (which may also not be handled well by dedicated modules).
| | return self._rawData[1] or self._rawData.canonicalName |
| if section:gsub("%s", "") ~= "" then
| | end |
| local sub, sub_fail, sub_cats = require("Module:languages/doSubstitutions")(section, self, sc, substitution_data, function_name)
| | |
| -- Second round of temporary substitutions, in case any formatting was added by the main substitution process. However, don't do this if the section contains formatting already (as it would have had to have been escaped to reach this stage, and therefore should be given as raw text).
| | |
| if sub and subbedChars then
| | function Language:getDisplayForm() |
| local noSub
| | return self:getCanonicalName() |
| for _, pattern in ipairs(require("Module:languages/data/patterns")) do
| | end |
| if section:match(pattern .. "%z?") then
| | |
| noSub = true
| | |
| end
| | function Language:getOtherNames(onlyOtherNames) |
| end
| | self:loadInExtraData() |
| if not noSub then
| | return require("Module:language-like").getOtherNames(self, onlyOtherNames) |
| sub, subbedChars = doTempSubstitutions(sub, subbedChars, keepCarets, true)
| | end |
| end
| | |
| end
| | |
| if (not sub) or sub_fail then
| | function Language:getAliases() |
| text = sub
| | self:loadInExtraData() |
| fail = sub_fail
| | return self._extraData.aliases or {} |
| cats = sub_cats or {}
| | end |
| break
| | |
| end
| | |
| text = sub and text:gsub(pe(section), pe(sub), 1) or text
| | function Language:getVarieties(flatten) |
| if type(sub_cats) == "table" then
| | self:loadInExtraData() |
| for _, cat in ipairs(sub_cats) do
| | return require("Module:language-like").getVarieties(self, flatten) |
| table.insert(cats, cat)
| | end |
| end
| | |
| end
| | |
| end
| | function Language:getType() |
| end
| | return self._rawData.type or "regular" |
|
| | end |
| -- Trim, unless there are only spacing characters, while ignoring any final formatting characters.
| | |
| text = text and text
| | |
| :gsub("^([\128-\191\244]*)%s+(%S)", "%1%2")
| | function Language:getWikimediaLanguages() |
| :gsub("(%S)%s+([\128-\191\244]*)$", "%1%2")
| | if not self._wikimediaLanguageObjects then |
| | local m_wikimedia_languages = require("Module:wikimedia languages") |
| | self._wikimediaLanguageObjects = {} |
| | local wikimedia_codes = self._rawData.wikimedia_codes or { self._code } |
| | | |
| -- Remove duplicate categories. | | for _, wlangcode in ipairs(wikimedia_codes) do |
| if #cats > 1 then
| | table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode)) |
| cats = require("Module:table").removeDuplicates(cats) | |
| end | | end |
|
| |
| return text, fail, cats, subbedChars
| |
| end | | end |
| | | |
| -- Process carets (and any escapes). Default to simple removal, if no pattern/replacement is given. | | return self._wikimediaLanguageObjects |
| local function processCarets(text, pattern, repl)
| | end |
| local rep
| | |
| repeat
| | function Language:getWikipediaArticle() |
| text, rep = text:gsub("\\\\(\\*^)", "\3%1")
| | if self._rawData.wikipedia_article then |
| until rep == 0
| | return self._rawData.wikipedia_article |
| return text
| | elseif self._wikipedia_article then |
| :gsub("\\^", "\4")
| |
| :gsub(pattern or "%^", repl or "")
| |
| :gsub("\3", "\\")
| |
| :gsub("\4", "^")
| |
| end
| |
|
| |
| -- Remove carets if they are used to capitalize parts of transliterations (unless they have been escaped).
| |
| local function removeCarets(text, sc)
| |
| if not sc:hasCapitalization() and sc:isTransliterated() and text:match("%^") then
| |
| return processCarets(text)
| |
| else
| |
| return text
| |
| end
| |
| end
| |
|
| |
| local Language = {}
| |
|
| |
| --[==[Returns the language code of the language. Example: {{code|lua|"fr"}} for French.]==]
| |
| function Language:getCode()
| |
| return self._code
| |
| end
| |
|
| |
| --[==[Returns the canonical name of the language. This is the name used to represent that language on Wiktionary, and is guaranteed to be unique to that language alone. Example: {{code|lua|"French"}} for French.]==]
| |
| function Language:getCanonicalName()
| |
| return self._rawData[1]
| |
| end
| |
|
| |
| --[==[Returns the display form of the language. The display form of a language, family or script is the form it takes when appearing as the ''SOURCE'' in categories such as <code>English terms derived from ''SOURCE''</code> or <code>English given names from ''SOURCE''</code>, and is also the displayed text in <code>:makeCategoryLink</code> links. For regular and etymology languages, this is the same as the canonical name, but for families, it reads "NAME languages" (e.g. {{code|lua|"Indo-Iranian languages"}}), and for scripts, it reads "NAME script" (e.g. {{code|lua|"Arabic script"}}).]==]
| |
| function Language:getDisplayForm() | |
| if not self._displayForm then
| |
| local form = self:getCanonicalName()
| |
| -- Add article and " substrate" if a substrate that lacks them.
| |
| if self:getFamilyCode() == "qfa-sub" then
| |
| if not (form:find("^[Tt]he ") or form:find("^[Aa] ")) then
| |
| form = "a " .. form
| |
| end
| |
| if not form:find("[Ss]ubstrate") then
| |
| form = form .. " substrate"
| |
| end
| |
| end
| |
| self._displayForm = form
| |
| end
| |
| return self._displayForm
| |
| end
| |
|
| |
| --[==[Returns a table of the "other names" that the language is known by, excluding the canonical name. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: {{code|lua|{"Manx Gaelic", "Northern Manx", "Southern Manx"} }} for [[:Category:Manx language|Manx]]. If <code>onlyOtherNames</code> is given and is non-{{code|lua|nil}}, only names explicitly listed in the <code>otherNames</code> field are returned; otherwise, names listed under <code>otherNames</code>, <code>aliases</code> and <code>varieties</code> are combined together and returned. For example, for Manx, Manx Gaelic is listed as an alias, while Northern Manx and Southern Manx are listed as varieties. It should be noted that the <code>otherNames</code> field itself is deprecated, and entries listed there should eventually be moved to either <code>aliases</code> or <code>varieties</code>.]==]
| |
| function Language:getOtherNames(onlyOtherNames)
| |
| if #self._stack == 1 then
| |
| self:loadInExtraData()
| |
| end
| |
| return require("Module:language-like").getOtherNames(self, onlyOtherNames)
| |
| end
| |
|
| |
| --[==[Returns a table of the aliases that the language is known by, excluding the canonical name. Aliases are synonyms for the language in question. The names are not guaranteed to be unique, in that sometimes more than one language is known by the same name. Example: {{code|lua|{"High German", "New High German", "Deutsch"} }} for [[:Category:German language|German]].]==]
| |
| function Language:getAliases()
| |
| if #self._stack == 1 then
| |
| self:loadInExtraData()
| |
| end
| |
| return self._rawData.aliases or (self._extraData and self._extraData.aliases) or {}
| |
| end
| |
|
| |
| --[==[Returns a table of the known subvarieties of a given language, excluding subvarieties that have been given explicit etymology language codes. The names are not guaranteed to be unique, in that sometimes a given name refers to a subvariety of more than one language. Example: {{code|lua|{"Southern Aymara", "Central Aymara"} }} for [[:Category:Aymara language|Aymara]]. Note that the returned value can have nested tables in it, when a subvariety goes by more than one name. Example: {{code|lua|{"North Azerbaijani", "South Azerbaijani", {"Afshar", "Afshari", "Afshar Azerbaijani", "Afchar"}, {"Qashqa'i", "Qashqai", "Kashkay"}, "Sonqor"} }} for [[:Category:Azerbaijani language|Azerbaijani]]. Here, for example, Afshar, Afshari, Afshar Azerbaijani and Afchar all refer to the same subvariety, whose preferred name is Afshar (the one listed first). To avoid a return value with nested tables in it, specify a non-{{code|lua|nil}} value for the <code>flatten</code> parameter; in that case, the return value would be {{code|lua|{"North Azerbaijani", "South Azerbaijani", "Afshar", "Afshari", "Afshar Azerbaijani", "Afchar", "Qashqa'i", "Qashqai", "Kashkay", "Sonqor"} }}.]==]
| |
| function Language:getVarieties(flatten)
| |
| if #self._stack == 1 then
| |
| self:loadInExtraData()
| |
| end
| |
| return require("Module:language-like").getVarieties(self, flatten) | |
| end
| |
|
| |
| --[==[Given a list of types as strings, returns true if the language has all of them. Possible types are explained in [[Module:languages/data/2]] and [[Module:etymology languages/data]].]==]
| |
| function Language:hasType(...)
| |
| if not self._type then
| |
| self._type = {language = true}
| |
| if self:getNonEtymologicalCode() == self:getCode() then
| |
| self._type.full = true
| |
| else
| |
| self._type["etymology-only"] = true
| |
| end
| |
| for _, type in ipairs(mw.text.split(self._rawData.type, "%s*,%s*")) do
| |
| self._type[type] = true
| |
| end
| |
| end
| |
| for _, type in ipairs{...} do
| |
| if not self._type[type] then
| |
| return false
| |
| end
| |
| end
| |
| return true
| |
| end
| |
|
| |
| --[==[Returns a table containing <code>WikimediaLanguage</code> objects (see [[Module:wikimedia languages]]), which represent languages and their codes as they are used in Wikimedia projects for interwiki linking and such. More than one object may be returned, as a single Wiktionary language may correspond to multiple Wikimedia languages. For example, Wiktionary's single code <code>sh</code> (Serbo-Croatian) maps to four Wikimedia codes: <code>sh</code> (Serbo-Croatian), <code>bs</code> (Bosnian), <code>hr</code> (Croatian) and <code>sr</code> (Serbian).
| |
| The code for the Wikimedia language is retrieved from the <code>wikimedia_codes</code> property in the data modules. If that property is not present, the code of the current language is used. If none of the available codes is actually a valid Wikimedia code, an empty table is returned.]==]
| |
| function Language:getWikimediaLanguages()
| |
| if not self._wikimediaLanguageObjects then
| |
| local m_wikimedia_languages = require("Module:wikimedia languages")
| |
| self._wikimediaLanguageObjects = {}
| |
| local wikimedia_codes = self:getWikimediaLanguageCodes()
| |
| for _, wlangcode in ipairs(wikimedia_codes) do
| |
| table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode))
| |
| end
| |
| end
| |
| return self._wikimediaLanguageObjects
| |
| end
| |
|
| |
| function Language:getWikimediaLanguageCodes()
| |
| if not self._wikimediaLanguageCodes then
| |
| self._wikimediaLanguageCodes = self._rawData.wikimedia_codes or {self:getCode()}
| |
| end
| |
| return self._wikimediaLanguageCodes
| |
| end
| |
|
| |
| --[==[Returns the name of the Wikipedia article for the language. If the property <code>wikipedia_article</code> is present in the data module it will be used first, otherwise a sitelink will be generated from <code>:getWikidataItem</code> (if set). Otherwise <code>:getCategoryName</code> is used as fallback.]==]
| |
| function Language:getWikipediaArticle() | |
| if not self._wikipedia_article then
| |
| if self._rawData.wikipedia_article then
| |
| self._wikipedia_article = self._rawData.wikipedia_article
| |
| elseif self:getWikidataItem() and mw.wikibase then
| |
| self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki')
| |
| end
| |
| if not self._wikipedia_article then
| |
| self._wikipedia_article = self:getCategoryName():gsub("Creole language", "Creole")
| |
| end
| |
| end
| |
| return self._wikipedia_article | | return self._wikipedia_article |
| | elseif self:getWikidataItem() and mw.wikibase then |
| | self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem(), 'enwiki') |
| end | | end |
| | | if not self._wikipedia_article then |
| function Language:makeWikipediaLink()
| | self._wikipedia_article = mw.ustring.gsub(self:getCategoryName(), "Creole language", "Creole") |
| return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" | |
| end
| |
|
| |
| --[==[Returns the Wikidata item id for the language or <code>nil</code>. This corresponds to the the second field in the data modules.]==]
| |
| function Language:getWikidataItem()
| |
| if not self._WikidataItem then
| |
| local item = self._rawData[2]
| |
| if type(item) == "number" then
| |
| self._WikidataItem = "Q" .. item
| |
| else
| |
| self._WikidataItem = item
| |
| end
| |
| end
| |
| return self._WikidataItem
| |
| end | | end |
| | return self._wikipedia_article |
| | end |
| | |
| | function Language:makeWikipediaLink() |
| | return "[[w:" .. self:getWikipediaArticle() .. "|" .. self:getCanonicalName() .. "]]" |
| | end |
| | |
| | function Language:getWikidataItem() |
| | local item = self._rawData[2] |
| | | |
| --[==[Returns a table of <code>Script</code> objects for all scripts that the language is written in. See [[Module:scripts]].]==] | | if type(item) == "number" then |
| function Language:getScripts()
| | return "Q" .. item |
| if not self._scriptObjects then
| | else |
| self._scriptObjects = {}
| | return item |
| if self:getScriptCodes()[1] == "All" then
| |
| self._scriptObjects = conditionalRequire("Module:scripts/data")
| |
| else
| |
| for _, sc in ipairs(self:getScriptCodes()) do
| |
| table.insert(self._scriptObjects, require("Module:scripts").getByCode(sc, nil, nil, useRequire))
| |
| end
| |
| end
| |
| end
| |
| return self._scriptObjects | |
| end | | end |
|
| | end |
| --[==[Returns the table of script codes in the language's data file.]==]
| | |
| function Language:getScriptCodes()
| | function Language:getScripts() |
| if not self._scriptCodes then
| | if not self._scriptObjects then |
| self._scriptCodes = self._rawData[4] or {"None"}
| | local m_scripts = require("Module:scripts") |
| end
| | self._scriptObjects = {} |
| return self._scriptCodes | |
| end
| |
|
| |
| --[==[Given some text, this function iterates through the scripts of a given language and tries to find the script that best matches the text. It returns a {{code|lua|Script}} object representing the script. If no match is found at all, it returns the {{code|lua|None}} script object.]==]
| |
| function Language:findBestScript(text, forceDetect)
| |
| if (not text) or text == "" or text == "-" then
| |
| return require("Module:scripts").getByCode("None", nil, nil, useRequire)
| |
| end | |
|
| |
| if table.concat(self:getScriptCodes()) == "All" then
| |
| return require("Module:scripts").findBestScriptWithoutLang(text)
| |
| end
| |
| | | |
| local scripts = self:getScripts() | | for _, sc in ipairs(self:getScriptCodes()) do |
|
| | table.insert(self._scriptObjects, m_scripts.getByCode(sc)) |
| if not scripts[2] and not forceDetect then
| |
| -- Necessary, because Hani covers the entire Han range (while the Hant & Hans lists don't list shared characters). | |
| if scripts[1]:getCode():match("^Han") and require("Module:scripts").getByCode("Hani", nil, nil, useRequire):countCharacters(text) > 0 then
| |
| return scripts[1]
| |
| elseif scripts[1]:countCharacters(text) > 0 then
| |
| return scripts[1]
| |
| else
| |
| return require("Module:scripts").getByCode("None", nil, nil, useRequire)
| |
| end
| |
| end | | end |
|
| |
| return require("Module:languages/findBestScript")(export, self, text, scripts, forceDetect, useRequire)
| |
| end | | end |
| | | |
| --[==[Returns a <code>Family</code> object for the language family that the language belongs to. See [[Module:families]].]==] | | return self._scriptObjects |
| function Language:getFamily()
| | end |
| if self._familyObject == nil then
| | |
| local familyCode = self:getFamilyCode()
| | function Language:getScriptCodes() |
| if familyCode then
| | return self._rawData.scripts or self._rawData[4] or { "None" } |
| self._familyObject = require("Module:families").getByCode(familyCode, useRequire)
| | end |
| -- Still memoize a nil result.
| | |
| else
| | function Language:getFamily() |
| self._familyObject = false
| | if self._familyObject then |
| end
| | return self._familyObject |
| end
| |
| return self._familyObject or nil | |
| end | | end |
|
| | |
| --[==[Returns the family code in the language's data file.]==] | | local family = self._rawData[3] or self._rawData.family |
| function Language:getFamilyCode()
| | if family then |
| if not self._familyCode then
| | self._familyObject = require("Module:families").getByCode(family) |
| self._familyCode = self._rawData[3]
| |
| end
| |
| return self._familyCode | |
| end | | end |
| | | |
| function Language:getFamilyName() | | return self._familyObject |
| if self._familyName == nil then
| | end |
| local family = self:getFamily()
| | |
| if family then
| | |
| self._familyName = family:getCanonicalName()
| | function Language:getAncestors() |
| else
| | if not self._ancestorObjects then |
| self._familyName = false | | self._ancestorObjects = {} |
| | |
| | if self._rawData.ancestors then |
| | for _, ancestor in ipairs(self._rawData.ancestors) do |
| | table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor)) |
| end | | end |
| end | | else |
| return self._familyName or nil
| | local fam = self:getFamily() |
| end
| | local protoLang = fam and fam:getProtoLanguage() or nil |
|
| | |
| --[==[Check whether the language belongs to `family` (which can be a family code or object). A list of objects can be given in place of `family`; in that case, return true if the language belongs to any of the specified families. Note that some languages (in particular, certain creoles) can have multiple immediate ancestors potentially belonging to different families; in that case, return true if the language belongs to any of the specified families.]==]
| | -- For the case where the current language is the proto-language |
| function Language:inFamily(...)
| | -- of its family, we need to step up a level higher right from the start. |
| --checkObject("family", nil, ...)
| | if protoLang and protoLang:getCode() == self:getCode() then |
| for _, family in ipairs{...} do
| | fam = fam:getFamily() |
| if type(family) == "table" then | | protoLang = fam and fam:getProtoLanguage() or nil |
| family = family:getCode() | |
| end | | end |
| if not self:getFamilyCode() then | | |
| return false
| | while not protoLang and not (not fam or fam:getCode() == "qfa-not") do |
| elseif self:getFamilyCode() == family or self:getFamily():inFamily(family) then
| | fam = fam:getFamily() |
| return true | | protoLang = fam and fam:getProtoLanguage() or nil |
| else
| |
| local ancestors = self:getAncestors()
| |
| for _, ancestor in ipairs(ancestors) do | |
| if ancestor:inFamily(family) then
| |
| return true
| |
| end
| |
| end
| |
| end | | end |
| | |
| | table.insert(self._ancestorObjects, protoLang) |
| end | | end |
| return false
| |
| end | | end |
| | | |
| function Language:getParent() | | return self._ancestorObjects |
| if self._parentObject == nil then | | end |
| local parentCode = self:getParentCode() | | |
| if parentCode then
| | local function iterateOverAncestorTree(node, func) |
| self._parentObject = export.getByCode(parentCode, nil, true, true, useRequire)
| | for _, ancestor in ipairs(node:getAncestors()) do |
| else | | if ancestor then |
| self._parentObject = false | | local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func) |
| | if ret then |
| | return ret |
| end | | end |
| end | | end |
| return self._parentObject or nil
| |
| end | | end |
|
| | end |
| function Language:getParentCode()
| | |
| if not self._parentCode then
| | function Language:getAncestorChain() |
| self._parentCode = self._rawData[5]
| | if not self._ancestorChain then |
| | self._ancestorChain = {} |
| | local step = #self:getAncestors() == 1 and self:getAncestors()[1] or nil |
| | |
| | while step do |
| | table.insert(self._ancestorChain, 1, step) |
| | step = #step:getAncestors() == 1 and step:getAncestors()[1] or nil |
| end | | end |
| return self._parentCode
| |
| end | | end |
| | | |
| function Language:getParentName() | | return self._ancestorChain |
| if self._parentName == nil then | | end |
| local parent = self:getParent()
| | |
| if parent then
| | |
| self._parentName = parent:getCanonicalName()
| | function Language:hasAncestor(otherlang) |
| else
| | local function compare(ancestor) |
| self._parentName = false
| | return ancestor:getCode() == otherlang:getCode() |
| end
| |
| end
| |
| return self._parentName or nil
| |
| end | | end |
| | | |
| function Language:getParentChain() | | return iterateOverAncestorTree(self, compare) or false |
| if not self._parentChain then
| | end |
| self._parentChain = {}
| | |
| local parent = self:getParent()
| | |
| while parent do
| | function Language:getCategoryName(nocap) |
| table.insert(self._parentChain, parent)
| | local name = self:getCanonicalName() |
| parent = parent:getParent()
| |
| end
| |
| end
| |
| return self._parentChain
| |
| end
| |
| | | |
| function Language:hasParent(...) | | -- If the name already has "language" in it, don't add it. |
| --checkObject("language", nil, ...)
| | if not name:find("[Ll]anguage$") then |
| for _, otherlang in ipairs{...} do
| | name = name .. " language" |
| for _, parent in ipairs(self:getParentChain()) do
| |
| if type(otherlang) == "string" then
| |
| if otherlang == parent:getCode() then return true end
| |
| else
| |
| if otherlang:getCode() == parent:getCode() then return true end
| |
| end
| |
| end
| |
| end
| |
| return false
| |
| end | | end |
| | | if not nocap then |
| --[==[If the language is an etymology language, this iterates through parents until a regular language or family is found, and the corresponding object is returned. If the language is a regular language, then it simply returns the language.]==]
| | name = mw.getContentLanguage():ucfirst(name) |
| function Language:getNonEtymological()
| |
| if not self._nonEtymologicalObject then
| |
| local nonEtymologicalCode = self:getNonEtymologicalCode()
| |
| if nonEtymologicalCode ~= self:getCode() then
| |
| self._nonEtymologicalObject = export.getByCode(nonEtymologicalCode, nil, nil, nil, useRequire)
| |
| else
| |
| self._nonEtymologicalObject = self
| |
| end
| |
| end
| |
| return self._nonEtymologicalObject
| |
| end | | end |
| | return name |
| | end |
| | |
| | |
| | function Language:makeCategoryLink() |
| | return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]" |
| | end |
| | |
| | |
| | function Language:getStandardCharacters() |
| | return self._rawData.standardChars |
| | end |
| | |
| | |
| | function Language:makeEntryName(text) |
| | text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$") or text |
| | | |
| function Language:getNonEtymologicalCode() | | if self:getCode() == "ar" then |
| return self._nonEtymologicalCode or self:getCode() | | local U = mw.ustring.char |
| end
| | local taTwiil = U(0x640) |
|
| | local waSla = U(0x671) |
| function Language:getNonEtymologicalName()
| | -- diacritics ordinarily removed by entry_name replacements |
| if self._nonEtymologicalName == nil then | | local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) |
| local nonEtymological = self:getNonEtymological()
| | |
| if nonEtymological then
| | if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then |
| self._nonEtymologicalName = nonEtymological:getCanonicalName()
| | return text |
| else | |
| self._nonEtymologicalName = false
| |
| end
| |
| end | | end |
| return self._nonEtymologicalName or nil
| |
| end | | end |
| | | |
| --[==[Returns a table of <code class="nf">Language</code> objects for all languages that this language is directly descended from. Generally this is only a single language, but creoles, pidgins and mixed languages can have multiple ancestors.]==] | | if type(self._rawData.entry_name) == "table" then |
| function Language:getAncestors()
| | text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name) |
| if not self._ancestorObjects then
| |
| self._ancestorObjects = {}
| |
| local ancestors = require("Module:table").shallowcopy(self:getAncestorCodes())
| |
| if #ancestors > 0 then
| |
| for _, ancestor in ipairs(ancestors) do
| |
| table.insert(self._ancestorObjects, export.getByCode(ancestor, nil, true, nil, useRequire))
| |
| end
| |
| else
| |
| local fam = self:getFamily()
| |
| local protoLang = fam and fam:getProtoLanguage() or nil
| |
| -- For the cases where the current language is the proto-language
| |
| -- of its family, or an etymology language that is ancestral to that
| |
| -- proto-language, we need to step up a level higher right from the
| |
| -- start.
| |
| if protoLang and (
| |
| protoLang:getCode() == self:getCode() or
| |
| (self:hasType("etymology-only") and protoLang:hasAncestor(self))
| |
| ) then
| |
| fam = fam:getFamily()
| |
| protoLang = fam and fam:getProtoLanguage() or nil
| |
| end
| |
| while not protoLang and not (not fam or fam:getCode() == "qfa-not") do
| |
| fam = fam:getFamily()
| |
| protoLang = fam and fam:getProtoLanguage() or nil
| |
| end
| |
| table.insert(self._ancestorObjects, protoLang)
| |
| end
| |
| end
| |
| return self._ancestorObjects
| |
| end | | end |
| | | |
| function Language:getAncestorCodes() | | return text |
| if not self._ancestorCodes then
| | end |
| local function get_codes(lang)
| | |
| return lang._rawData.ancestors or {}
| | |
| end
| | -- Return true if the language has display processing enabled, i.e. lang:makeDisplayText() |
| local codes = get_codes(self)
| | -- does non-trivial processing. |
| -- Avoid a language being its own ancestor via class inheritance. We only need to check for this if the language has inherited an ancestor table from its parent, because we never want to drop ancestors that have been explicitly set in the data.
| | function Language:hasDisplayProcessing() |
| -- Recursively iterate over ancestors until we find a loop/run out. If a loop is found that involves the language, drop that ancestor.
| | return not not self._rawData.display |
| if #codes > 0 and #self._stack > 1 and not self._stack[#self._stack].ancestors then
| | end |
| local function check_ancestor(i, code, seen)
| | |
| if seen[code] then
| | |
| if code == self:getCode() then
| | -- Apply display-text replacements to `text`, if any. |
| table.remove(codes, i)
| | function Language:makeDisplayText(text) |
| end
| | if type(self._rawData.display) == "table" then |
| else
| | text = do_entry_name_or_sort_key_replacements(text, self._rawData.display) |
| seen[code] = true
| |
| local ancestor = export.getByCode(code, nil, true, nil, useRequire)
| |
| for _, ancestorCode in ipairs(get_codes(ancestor)) do
| |
| check_ancestor(i, ancestorCode, seen)
| |
| end
| |
| end
| |
| end
| |
| for i, ancestorCode in ipairs(codes) do
| |
| local seen = {[self:getCode()] = true}
| |
| check_ancestor(i, ancestorCode, seen)
| |
| end
| |
| end
| |
| self._ancestorCodes = codes
| |
| end
| |
| return self._ancestorCodes
| |
| end | | end |
| | | |
| --[==[Given a list of language objects or codes, returns true if at least one of them is an ancestor. This includes any etymology-only children of that ancestor. If the language's ancestor(s) are etymology-only languages, it will also return true for those language parent(s) (e.g. if Vulgar Latin is the ancestor, it will also return true for its parent, Latin). However, a parent is excluded from this if the ancestor is also ancestral to that parent (e.g. if Classical Persian is the ancestor, Persian would return false, because Classical Persian is also ancestral to Persian).]==] | | return text |
| function Language:hasAncestor(...) | | end |
| --checkObject("language", nil, ...)
| | |
|
| | |
| local function iterateOverAncestorTree(node, func, parent_check)
| | -- Add to data tables? |
| local ancestors = node:getAncestors()
| | local has_dotted_undotted_i = { |
| local ancestorsParents = {}
| | ["az"] = true, |
| for _, ancestor in ipairs(ancestors) do
| | ["crh"] = true, |
| local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func, parent_check)
| | ["gag"] = true, |
| if ret then return ret end
| | ["kaa"] = true, |
| end
| | ["tt"] = true, |
| -- Check the parents of any ancestors. We don't do this if checking the parents of the other language, so that we exclude any etymology-only children of those parents that are not directly related (e.g. if the ancestor is Vulgar Latin and we are checking New Latin, we want it to return false because they are on different ancestral branches. As such, if we're already checking the parent of New Latin (Latin) we don't want to compare it to the parent of the ancestor (Latin), as this would be a false positive; it should be one or the other).
| | ["tr"] = true, |
| if not parent_check then
| | ["zza"] = true, |
| return nil
| | } |
| end
| | |
| for _, ancestor in ipairs(ancestors) do
| | function Language:makeSortKey(name, sc) |
| local ancestorParents = ancestor:getParentChain()
| | if has_dotted_undotted_i[self:getCode()] then |
| for _, ancestorParent in ipairs(ancestorParents) do
| | name = name:gsub("I", "ı") |
| if ancestorParent:getCode() == self:getCode() or ancestorParent:hasAncestor(ancestor) then
| |
| break
| |
| else
| |
| table.insert(ancestorsParents, ancestorParent)
| |
| end
| |
| end
| |
| end
| |
| for _, ancestorParent in ipairs(ancestorsParents) do
| |
| local ret = func(ancestorParent)
| |
| if ret then return ret end
| |
| end
| |
| end | |
|
| |
| local parent_check = true
| |
| for _, otherlang in ipairs{...} do
| |
| repeat
| |
| if iterateOverAncestorTree(
| |
| self,
| |
| function(ancestor)
| |
| if type(otherlang) == "string" then
| |
| return ancestor:getCode() == otherlang
| |
| else
| |
| return ancestor:getCode() == otherlang:getCode()
| |
| end
| |
| end,
| |
| parent_check
| |
| ) then
| |
| return true
| |
| elseif type(otherlang) == "string" then
| |
| otherlang = export.getByCode(otherlang, nil, true, nil, useRequire)
| |
| end
| |
| otherlang = otherlang:getParent()
| |
| parent_check = false
| |
| until not otherlang
| |
| end
| |
| return false
| |
| end | | end |
| | | |
| function Language:getAncestorChain() | | name = mw.ustring.lower(name) |
| if not self._ancestorChain then
| |
| self._ancestorChain = {}
| |
| local step = self
| |
| while true do
| |
| local ancestors = step:getAncestors()
| |
| step = #ancestors == 1 and ancestors[1] or nil
| |
| if not step then break end
| |
| table.insert(self._ancestorChain, 1, step)
| |
| end
| |
| end
| |
| return self._ancestorChain
| |
| end
| |
| | | |
| local function fetch_descendants(self, format) | | -- Remove initial hyphens and * |
| local languages = require("Module:languages/code to canonical name")
| | local hyphens_regex = "^[-־ـ*]+(.)" |
| local etymology_languages = require("Module:etymology languages/code to canonical name")
| | name = mw.ustring.gsub(name, hyphens_regex, "%1") |
| local families = require("Module:families/code to canonical name")
| |
| local descendants = {}
| |
| local family = self:getFamily()
| |
| -- Iterate over all three datasets.
| |
| for _, data in ipairs{languages, etymology_languages, families} do
| |
| for code in pairs(data) do
| |
| local lang = export.getByCode(code, nil, true, true, useRequire)
| |
| -- Test for a descendant. Earlier tests weed out most candidates, while the more intensive tests are only used sparingly.
| |
| if (
| |
| ( -- Not an alias code.
| |
| (not lang._rawData.main_code) or
| |
| lang._rawData.main_code == code
| |
| ) and
| |
| code ~= self:getCode() and -- Not self.
| |
| lang:inFamily(family) and -- In the same family.
| |
| (
| |
| family:getProtoLanguageCode() == self:getCode() or -- Self is the protolanguage.
| |
| self:hasDescendant(lang) or -- Full hasDescendant check.
| |
| (lang:getNonEtymologicalCode() == self:getCode() and not self:hasAncestor(lang)) -- Etymology-only child which isn't an ancestor.
| |
| )
| |
| ) then
| |
| if format == "object" then
| |
| table.insert(descendants, lang)
| |
| elseif format == "code" then
| |
| table.insert(descendants, code)
| |
| elseif format == "name" then
| |
| table.insert(descendants, lang:getCanonicalName())
| |
| end
| |
| end
| |
| end
| |
| end
| |
| return descendants
| |
| end
| |
| | | |
| function Language:getDescendants() | | -- If there are language-specific rules to generate the key, use those |
| if not self._descendantObjects then | | if type(self._rawData.sort_key) == "table" then |
| self._descendantObjects = fetch_descendants(self, "object")
| | name = do_entry_name_or_sort_key_replacements(name, self._rawData.sort_key) |
| end | | elseif type(self._rawData.sort_key) == "string" then |
| return self._descendantObjects
| | name = require("Module:" .. self._rawData.sort_key).makeSortKey(name, self:getCode(), sc and sc:getCode()) |
| end | | end |
| | | |
| function Language:getDescendantCodes() | | -- Remove parentheses, as long as they are either preceded or followed by something |
| if not self._descendantCodes then
| | name = mw.ustring.gsub(name, "(.)[()]+", "%1") |
| self._descendantCodes = fetch_descendants(self, "code")
| | name = mw.ustring.gsub(name, "[()]+(.)", "%1") |
| end
| |
| return self._descendantCodes
| |
| end
| |
| | | |
| function Language:getDescendantNames() | | if has_dotted_undotted_i[self:getCode()] then |
| if not self._descendantNames then | | name = name:gsub("i", "İ") |
| self._descendantNames = fetch_descendants(self, "name")
| |
| end
| |
| return self._descendantNames
| |
| end | | end |
| | | |
| function Language:hasDescendant(...) | | return mw.ustring.upper(name) |
| for _, lang in ipairs{...} do
| | end |
| if type(lang) == "string" then
| | |
| lang = export.getByCode(lang, nil, true, nil, useRequire)
| | function Language:overrideManualTranslit() |
| end
| | if self._rawData.override_translit then |
| if lang:hasAncestor(self) then
| | return true |
| return true
| | else |
| end
| |
| end
| |
| return false | | return false |
| end | | end |
|
| | end |
| --[==[Returns the name of the main category of that language. Example: {{code|lua|"French language"}} for French, whose category is at [[:Category:French language]]. Unless optional argument <code>nocap</code> is given, the language name at the beginning of the returned value will be capitalized. This capitalization is correct for category names, but not if the language name is lowercase and the returned value of this function is used in the middle of a sentence.]==]
| | |
| function Language:getCategoryName(nocap)
| | |
| if not self._categoryName then
| | function Language:transliterate(text, sc, module_override) |
| local name = self:getCanonicalName()
| | local m = self._rawData.translit_module |
| -- Only add " language" if a regular language.
| | if not ((module_override or m) and text) then |
| if #self._stack == 1 then
| | return nil |
| -- If the name already has "language" in it, don't add it.
| |
| if not name:match("[Ll]anguage$") then
| |
| name = name .. " language"
| |
| end
| |
| end
| |
| self._categoryName = name
| |
| end | |
| if nocap then
| |
| return self._categoryName
| |
| else
| |
| return mw.getContentLanguage():ucfirst(self._categoryName)
| |
| end
| |
| end | | end |
| | | |
| --[==[Creates a link to the category; the link text is the canonical name.]==] | | return require("Module:" .. (module_override or m)).tr(text, self:getCode(), sc and sc:getCode() or nil) |
| function Language:makeCategoryLink()
| | end |
| return "[[:Category:" .. self:getCategoryName() .. "|" .. self:getDisplayForm() .. "]]"
| | |
| end
| | function Language:hasTranslit() |
|
| | return self._rawData.translit_module and true or false |
| function Language:getStandardCharacters(sc)
| | end |
| if type(self._rawData.standardChars) ~= "table" then
| | |
| return self._rawData.standardChars
| | |
| else
| | function Language:link_tr() |
| if sc and type(sc) ~= "string" then
| | return self._rawData.link_tr and true or false |
| checkObject("script", nil, sc)
| | end |
| sc = sc:getCode()
| | |
| end
| | |
| if (not sc) or sc == "None" then
| | function Language:toJSON() |
| local scripts = {}
| | local entryNamePatterns = nil |
| for _, script in pairs(self._rawData.standardChars) do
| | local entryNameRemoveDiacritics = nil |
| table.insert(scripts, script)
| |
| end
| |
| return table.concat(scripts)
| |
| end
| |
| if self._rawData.standardChars[sc] then
| |
| return self._rawData.standardChars[sc] .. (self._rawData.standardChars[1] or "")
| |
| end
| |
| end
| |
| end
| |
|
| |
| --[==[Make the entry name (i.e. the correct page name).]==]
| |
| function Language:makeEntryName(text, sc)
| |
| if (not text) or text == "" then
| |
| return text, nil, {}
| |
| end
| |
|
| |
| -- Set `unsupported` as true if certain conditions are met.
| |
| local unsupported
| |
| -- If there's an underscore.
| |
| if text:find("_") then
| |
| unsupported = true
| |
| -- If it looks like an interwiki link.
| |
| elseif text:find(":") and text ~= ":" then
| |
| local m_utildata = conditionalRequire("Module:utilities/data")
| |
| local prefix = text:gsub("^:*(.-):.*", mw.ustring.lower)
| |
| if m_utildata.interwikis[prefix] or m_utildata.namespaces[prefix] then
| |
| unsupported = true
| |
| end
| |
| end
| |
|
| |
| -- Check if the text is a listed unsupported title.
| |
| local unsupportedTitles = conditionalRequire("Module:links/data").unsupported_titles
| |
| if unsupportedTitles[text] then
| |
| return "Unsupported titles/" .. unsupportedTitles[text], nil, {}
| |
| end
| |
|
| |
| sc = checkScript(text, self, sc)
| |
|
| |
| local fail, cats
| |
| text = normalize(text, sc)
| |
| text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.entry_name, "makeEntryName")
| |
|
| |
| text = mw.ustring.gsub(text, "^[¿¡]?([^%s%p]+)%s*[؟?!;՛՜ ՞ ՟?!︖︕।॥။၊་།]?$", "%1") or text
| |
| text = unsupported and "Unsupported titles/" .. text or text
| |
|
| |
| return text, fail, cats
| |
| end
| |
|
| |
| --[==[Generates alternative forms using a specified method, and returns them as a table. If no method is specified, returns a table containing only the input term.]==]
| |
| function Language:generateForms(text, sc)
| |
| if self._rawData.generate_forms then
| |
| sc = checkScript(text, self, sc)
| |
| return require("Module:" .. self._rawData.generate_forms).generateForms(text, self:getCode(), sc:getCode())
| |
| else
| |
| return {text}
| |
| end
| |
| end
| |
|
| |
| --[==[Creates a sort key for the given entry name, following the rules appropriate for the language. This removes diacritical marks from the entry name if they are not considered significant for sorting, and may perform some other changes. Any initial hyphen is also removed, and anything parentheses is removed as well.
| |
| The <code>sort_key</code> setting for each language in the data modules defines the replacements made by this function, or it gives the name of the module that takes the entry name and returns a sortkey.]==]
| |
| function Language:makeSortKey(text, sc)
| |
| if (not text) or text == "" then
| |
| return text, nil, {}
| |
| end
| |
| -- Remove soft hyphens, strip markers and HTML tags.
| |
| text = text:gsub("", "")
| |
| text = mw.text.unstrip(text)
| |
| :gsub("<[^<>]+>", "")
| |
|
| |
| text = mw.uri.decode(text, "PATH")
| |
| text = checkNoEntities(text)
| |
|
| |
| -- Remove initial hyphens and *.
| |
| text = mw.ustring.gsub(text, "^([-]*)[-־ـ᠊*]+([-]*)(.)", "%1%2%3")
| |
|
| |
| sc = checkScript(text, self, sc)
| |
|
| |
| text = normalize(text, sc)
| |
| text = removeCarets(text, sc)
| |
|
| |
| -- For languages with dotted dotless i, ensure that "İ" is sorted as "i", and "I" is sorted as "ı".
| |
| if self._rawData.dotted_dotless_i then
| |
| text = text
| |
| :gsub(mw.ustring.toNFD("İ"), "i")
| |
| :gsub("I", "ı")
| |
| text = sc:toFixedNFD(text)
| |
| end
| |
| -- Convert to lowercase, make the sortkey, then convert to uppercase. Where the language has dotted dotless i, it is usually not necessary to convert "i" to "İ" and "ı" to "I" first, because "I" will always be interpreted as conventional "I" (not dotless "İ") by any sorting algorithms, which will have been taken into account by the sortkey substitutions themselves. However, if no sortkey substitutions have been specified, then conversion is necessary so as to prevent "i" and "ı" both being sorted as "I".
| |
| -- An exception is made for scripts that (sometimes) sort by scraping page content, as that means they are sensitive to changes in capitalization (as it changes the target page).
| |
| local fail, cats
| |
| if not sc:sortByScraping() then
| |
| text = mw.ustring.lower(text)
| |
| end
| |
|
| |
| text, fail, cats = iterateSectionSubstitutions(text, nil, nil, self, sc, self._rawData.sort_key, "makeSortKey")
| |
|
| |
| if not sc:sortByScraping() then
| |
| if self._rawData.dotted_dotless_i and not self._rawData.sort_key then
| |
| text = text
| |
| :gsub("ı", "I")
| |
| :gsub("i", "İ")
| |
| text = sc:toFixedNFC(text)
| |
| end
| |
| text = mw.ustring.upper(text)
| |
| end
| |
|
| |
| -- Remove parentheses, as long as they are either preceded or followed by something.
| |
| text = text
| |
| :gsub("(.)[()]+", "%1")
| |
| :gsub("[()]+(.)", "%1")
| |
|
| |
| text = require("Module:string utilities").escape_risky_characters(text)
| |
| return text, fail, cats
| |
| end
| |
| | | |
| --[==[Create the form used as as a basis for display text and transliteration.]==] | | if self._rawData.entry_name then |
| local function processDisplayText(text, self, sc, keepCarets, keepPrefixes)
| | entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics |
| local subbedChars = {}
| | if self._rawData.entry_name.from then |
| text, subbedChars = doTempSubstitutions(text, subbedChars, keepCarets)
| | entryNamePatterns = {} |
|
| | for i, from in ipairs(self._rawData.entry_name.from) do |
| text = mw.uri.decode(text, "PATH")
| | local to = self._rawData.entry_name.to[i] or "" |
| text = checkNoEntities(text) | | table.insert(entryNamePatterns, { from = from, to = to }) |
|
| |
| sc = checkScript(text, self, sc)
| |
| local fail, cats | |
| text = normalize(text, sc)
| |
| text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, keepCarets, self, sc, self._rawData.display_text, "makeDisplayText")
| |
|
| |
| text = removeCarets(text, sc)
| |
|
| |
| -- Remove any interwiki link prefixes (unless they have been escaped or this has been disabled).
| |
| if text:match(":") and not keepPrefixes then
| |
| local m_utildata, rep = conditionalRequire("Module:utilities/data") | |
| repeat | |
| text, rep = text:gsub("\\\\(\\*:)", "\3%1")
| |
| until rep == 0
| |
| text = text
| |
| :gsub("\\:", "\4")
| |
| while true do
| |
| local prefix = text:gsub("^(.-):.+", function(m1)
| |
| return m1:gsub("\244[\128-\191]*", "")
| |
| end)
| |
| if not prefix or prefix == text then
| |
| break
| |
| end
| |
| local lower_prefix = mw.ustring.lower(prefix) | |
| if not (m_utildata.interwikis[lower_prefix] or prefix == "") then
| |
| break
| |
| end | |
| text = text:gsub("^(.-):(.*)", function(m1, m2)
| |
| local ret = {}
| |
| for subbedChar in m1:gmatch("\244[\128-\191]*") do
| |
| table.insert(ret, subbedChar)
| |
| end
| |
| return table.concat(ret) .. m2
| |
| end)
| |
| end | | end |
| text = text
| |
| :gsub("\3", "\\")
| |
| :gsub("\4", ":")
| |
| end | | end |
|
| |
| return text, fail, cats, subbedChars
| |
| end | | end |
| | | |
| --[==[Make the display text (i.e. what is displayed on the page).]==] | | local ret = { |
| function Language:makeDisplayText(text, sc, keepPrefixes)
| | ancestors = self._rawData.ancestors, |
| if (not text) or text == "" then | | canonicalName = self:getCanonicalName(), |
| return text, nil, {}
| | categoryName = self:getCategoryName("nocap"), |
| end | | code = self._code, |
| | | entryNamePatterns = entryNamePatterns, |
| local fail, cats, subbedChars | | entryNameRemoveDiacritics = entryNameRemoveDiacritics, |
| text, fail, cats, subbedChars = processDisplayText(text, self, sc, nil, keepPrefixes) | | family = self._rawData[3] or self._rawData.family, |
| | | otherNames = self:getOtherNames(true), |
| text = require("Module:string utilities").escape_risky_characters(text) | | aliases = self:getAliases(), |
| return undoTempSubstitutions(text, subbedChars), fail, cats | | varieties = self:getVarieties(), |
| end | | scripts = self._rawData.scripts or self._rawData[4], |
| | type = self:getType(), |
| | wikimediaLanguages = self._rawData.wikimedia_codes, |
| | wikidataItem = self:getWikidataItem(), |
| | } |
| | | |
| --[==[Transliterates the text from the given script into the Latin script (see [[Wiktionary:Transliteration and romanization]]). The language must have the <code>translit_module</code> property for this to work; if it is not present, {{code|lua|nil}} is returned. | | return require("Module:JSON").toJSON(ret) |
| The <code>sc</code> parameter is handled by the transliteration module, and how it is handled is specific to that module. Some transliteration modules may tolerate {{code|lua|nil}} as the script, others require it to be one of the possible scripts that the module can transliterate, and will show an error if it's not one of them. For this reason, the <code>sc</code> parameter should always be provided when writing non-language-specific code.
| | end |
| The <code>module_override</code> parameter is used to override the default module that is used to provide the transliteration. This is useful in cases where you need to demonstrate a particular module in use, but there is no default module yet, or you want to demonstrate an alternative version of a transliteration module before making it official. It should not be used in real modules or templates, only for testing. All uses of this parameter are tracked by [[Template:tracking/module_override]].
| | |
| '''Known bugs''':
| | |
| * This function assumes {tr(s1) .. tr(s2) == tr(s1 .. s2)}. When this assertion fails, wikitext markups like <nowiki>'''</nowiki> can cause wrong transliterations.
| | -- Do NOT use these methods! |
| * HTML entities like <code>&apos;</code>, often used to escape wikitext markups, do not work.]==]
| | -- All uses should be pre-approved on the talk page! |
| function Language:transliterate(text, sc, module_override)
| | function Language:getRawData() |
| -- If there is no text, or the language doesn't have transliteration data and there's no override, return nil.
| | return self._rawData |
| if not (self._rawData.translit or module_override) then
| | end |
| return nil, true, {}
| |
| elseif (not text) or text == "" or text == "-" then
| |
| return text, nil, {}
| |
| end
| |
| -- If the script is not transliteratable (and no override is given), return nil.
| |
| sc = checkScript(text, self, sc)
| |
| if not (sc:isTransliterated() or module_override) then
| |
| return nil, true, {}
| |
| end
| |
|
| |
| -- Remove any strip markers.
| |
| text = mw.text.unstrip(text)
| |
|
| |
| -- Get the display text with the keepCarets flag set.
| |
| local fail, cats, subbedChars
| |
| text, fail, cats, subbedChars = processDisplayText(text, self, sc, true)
| |
|
| |
| -- Transliterate (using the module override if applicable).
| |
| text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._rawData.translit, "tr")
| |
|
| |
| -- Incomplete transliterations return nil.
| |
| if text then
| |
| local best_sc = require("Module:scripts").findBestScriptWithoutLang(text):getCode()
| |
| if best_sc ~= "Latn" and best_sc ~= "Latinx" and best_sc ~= "None" then
| |
| return nil, true, cats
| |
| end
| |
| else
| |
| return nil, true, cats
| |
| end
| |
|
| |
| text = require("Module:string utilities").escape_risky_characters(text)
| |
| text = undoTempSubstitutions(text, subbedChars)
| |
|
| |
| -- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret).
| |
| if text and not sc:hasCapitalization() and text:match("%^") then
| |
| text = processCarets(text, "%^([\128-\191\244]*%*?)([^\128-\191\244][\128-\191]*)", function(m1, m2)
| |
| return m1 .. mw.ustring.upper(m2)
| |
| end)
| |
| end
| |
|
| |
| return text, fail, cats
| |
| end
| |
|
| |
| function Language:overrideManualTranslit()
| |
| return not not self._rawData.override_translit
| |
| end
| |
|
| |
| --[==[Returns {{code|lua|true}} if the language has a transliteration module, {{code|lua|false}} if it doesn't.]==]
| |
| function Language:hasTranslit()
| |
| return not not self._rawData.translit
| |
| end
| |
|
| |
| function Language:link_tr()
| |
| return not not self._rawData.link_tr
| |
| end
| |
|
| |
| function Language:toJSON(returnTable)
| |
| local entryNamePatterns = nil
| |
| local entryNameRemoveDiacritics = nil
| |
|
| |
| if self._rawData.entry_name then
| |
| entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics
| |
| if self._rawData.entry_name.from then
| |
| entryNamePatterns = {}
| |
| for i, from in ipairs(self._rawData.entry_name.from) do
| |
| table.insert(entryNamePatterns, {from = from, to = self._rawData.entry_name.to[i] or ""})
| |
| end
| |
| end
| |
| end
| |
|
| |
| if not self._type then
| |
| self:hasType()
| |
| end
| |
| local types = {}
| |
| for type in pairs(self._type) do
| |
| table.insert(types, type)
| |
| end
| |
|
| |
| local ret = {
| |
| ancestors = self:getAncestorCodes(),
| |
| canonicalName = self:getCanonicalName(),
| |
| categoryName = self:getCategoryName("nocap"),
| |
| code = self:getCode(),
| |
| entryNamePatterns = entryNamePatterns,
| |
| entryNameRemoveDiacritics = entryNameRemoveDiacritics,
| |
| family = self:getFamilyCode(),
| |
| otherNames = self:getOtherNames(true),
| |
| aliases = self:getAliases(),
| |
| varieties = self:getVarieties(),
| |
| scripts = self:getScriptCodes(),
| |
| parent = self._parentCode or nil,
| |
| nonEtymological = self._nonEtymologicalCode or nil,
| |
| type = types,
| |
| wikimediaLanguages = self:getWikimediaLanguageCodes(),
| |
| wikidataItem = self:getWikidataItem(),
| |
| }
| |
|
| |
| ret = require("Module:table").deepcopy(ret)
| |
|
| |
| if returnTable then
| |
| return ret
| |
| else
| |
| return require("Module:JSON").toJSON(ret)
| |
| end
| |
| end
| |
|
| |
| --[==[
| |
| <span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
| |
| Returns a blob of data about the language. The format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes.
| |
| -- Do NOT use these methods!
| |
| -- All uses should be pre-approved on the talk page!
| |
| ]==]
| |
| function Language:getRawData()
| |
| local rawData = {}
| |
| for _, element in ipairs(self._stack) do
| |
| for k, v in pairs(element) do
| |
| rawData[k] = v
| |
| end
| |
| end
| |
| return rawData
| |
| end
| |
|
| |
| --[==[<span style="color: #BA0000">This function is not for use in entries or other content pages.</span>
| |
| Returns a blob of data about the language that contains the "extra data". Much like with getRawData, the format of this blob is undocumented, and perhaps unstable; it's intended for things like the module's own unit-tests, which are "close friends" with the module and will be kept up-to-date as the format changes.]==] | |
| function Language:getRawExtraData()
| |
| if #self._stack == 1 then
| |
| self:loadInExtraData()
| |
| end
| |
| return self._extraData
| |
| end
| |
|
| |
| local function getRawExtraLanguageData(code)
| |
| local modulename = export.getExtraDataModuleName(code)
| |
| return modulename and conditionalRequire("Module:" .. modulename)[code] or nil
| |
| end
| |
|
| |
|
| function Language:loadInExtraData()
| | function Language:getRawExtraData() |
| if not self._extraData then
| | self:loadInExtraData() |
| -- load extra data from module and assign to _extraData field
| | return self._extraData |
| -- use empty table as a fallback if extra data is nil
| |
| self._extraData = getRawExtraLanguageData(self:getCode()) or {}
| |
| end
| |
| end
| |
|
| |
| return Language | |
| end | | end |
|
| |
|
| local function make_stack(code, data, parent, useRequire)
| | Language.__index = Language |
| parent.__index = parent
| | |
|
| | |
| local lang = {_code = code}
| | function export.getDataModuleName(code) |
|
| | if code:find("^%l%l$") then |
| -- Full language.
| | return "languages/data2" |
| if not parent._stack then
| | elseif code:find("^%l%l%l$") then |
| -- Create stack, accessed with rawData metamethod.
| | local prefix = code:sub(1, 1) |
| lang._stack = parent._rawData and {parent._rawData, data} or {data}
| | return "languages/data3/" .. prefix |
| lang._rawData = setmetatable({}, {
| | elseif code:find("^[%l-]+$") then |
| __index = function(t, k)
| | return "languages/datax" |
| -- Data that isn't inherited from the parent.
| |
| local function no_inherit(lang, t, k)
| |
| if (
| |
| k == "aliases" or
| |
| k == "varieties" or
| |
| k == "otherNames"
| |
| ) then
| |
| return lang._stack[#lang._stack][k]
| |
| end
| |
| end
| |
| -- Data that is appended by each generation.
| |
| local function append_data(lang, t, k)
| |
| if k == "type" then
| |
| local parts = {}
| |
| for i = 1, #lang._stack do
| |
| table.insert(parts, lang._stack[i][k])
| |
| end
| |
| if type(parts[1]) == "string" then
| |
| return table.concat(parts, ", ")
| |
| end
| |
| end
| |
| end
| |
| -- Otherwise, iterate down the stack, looking for a match.
| |
| local function inherit_data(lang, t, k)
| |
| local i = #lang._stack
| |
| while not lang._stack[i][k] and i > 1 do
| |
| i = i - 1
| |
| end
| |
| return lang._stack[i][k]
| |
| end
| |
| local ret = no_inherit(lang, t, k) or
| |
| append_data(lang, t, k) or
| |
| inherit_data(lang, t, k)
| |
| if (
| |
| k == 4 or
| |
| k == "ancestors" or
| |
| k == "wikimedia_codes"
| |
| ) then
| |
| if type(ret) == "table" then
| |
| return ret
| |
| elseif type(ret) == "string" then
| |
| return mw.text.split(ret, "%s*,%s*")
| |
| end
| |
| else
| |
| return ret
| |
| end
| |
| end,
| |
| -- Retain immutability (as writing to rawData will break functionality).
| |
| __newindex = function()
| |
| error("not allowed to edit rawData")
| |
| end
| |
| })
| |
| -- Non-etymological code is the parent code.
| |
| lang._nonEtymologicalCode = parent._code or code
| |
| -- Etymology-only.
| |
| else | | else |
| -- Copy over rawData and stack to the new object, and add new layer to stack. | | return nil |
| lang._rawData = parent._rawData
| |
| lang._stack = parent._stack
| |
| table.insert(lang._stack, data)
| |
| -- Copy non-etymological code.
| |
| lang._nonEtymologicalCode = parent._nonEtymologicalCode
| |
| end | | end |
|
| |
| return setmetatable(lang, parent)
| |
| end | | end |
|
| |
|
| function export.getDataModuleName(code) | | |
| if code:match("^%l%l$") then | | function export.getExtraDataModuleName(code) |
| return "languages/data/2" | | if code:find("^%l%l$") then |
| elseif code:match("^%l%l%l$") then | | return "languages/extradata2" |
| | elseif code:find("^%l%l%l$") then |
| local prefix = code:sub(1, 1) | | local prefix = code:sub(1, 1) |
| return "languages/data/3/" .. prefix | | return "languages/extradata3/" .. prefix |
| elseif code:match("^[%l-]+$") then | | elseif code:find("^[%l-]+$") then |
| return "languages/data/exceptional" | | return "languages/extradatax" |
| else | | else |
| return nil | | return nil |
Line 1,168: |
Line 486: |
| end | | end |
|
| |
|
| function export.getExtraDataModuleName(code) | | |
| local dataModule = export.getDataModuleName(code) | | local function getRawLanguageData(code) |
| return dataModule and dataModule .. "/extra" or nil | | local modulename = export.getDataModuleName(code) |
| | return modulename and mw.loadData("Module:" .. modulename)[code] or nil |
| | end |
| | |
| | |
| | local function getRawExtraLanguageData(code) |
| | local modulename = export.getExtraDataModuleName(code) |
| | return modulename and mw.loadData("Module:" .. modulename)[code] or nil |
| end | | end |
|
| |
|
| function export.makeObject(code, data, useRequire) | | |
| if not data then | | function Language:loadInExtraData() |
| return nil | | if not self._extraData then |
| end
| | -- load extra data from module and assign to meta table |
|
| | -- use empty table as a fallback if extra data is nil |
| code = data.main_code or code
| | local meta = getmetatable(self) |
|
| | meta._extraData = getRawExtraLanguageData(self._code) or {} |
| if data.type:find("family") and not data[5] then
| | setmetatable(self, meta) |
| return require("Module:families").makeObject(code, data, useRequire) | |
| else
| |
| local parent | |
| if data[5] then
| |
| parent = export.getByCode(data[5], nil, true, true, useRequire)
| |
| else | |
| parent = make_language(code, data, useRequire)
| |
| end | |
| return make_stack(code, data, parent, useRequire)
| |
| end | | end |
| end | | end |
|
| |
|
| --[==[Finds the language whose code matches the one provided. If it exists, it returns a <code class="nf">Language</code> object representing the language. Otherwise, it returns {{code|lua|nil}}, unless <code class="n">paramForError</code> is given, in which case an error is generated. If <code class="n">paramForError</code> is {{code|lua|true}}, a generic error message mentioning the bad code is generated; otherwise <code class="n">paramForError</code> should be a string or number specifying the parameter that the code came from, and this parameter will be mentioned in the error message along with the bad code. If <code class="n">allowEtymLang</code> is specified, etymology language codes are allowed and looked up along with normal language codes. If <code class="n">allowFamily</code> is specified, language family codes are allowed and looked up along with normal language codes.]==]
| | |
| function export.getByCode(code, paramForError, allowEtymLang, allowFamily, useRequire) | | function export.makeObject(code, data) |
| | return data and setmetatable({ _rawData = data, _code = code }, Language) or nil |
| | end |
| | |
| | |
| | function export.getByCode(code, paramForError, allowEtymLang, allowFamily) |
| if type(code) ~= "string" then | | if type(code) ~= "string" then |
| local typ
| | error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".") |
| if not code then
| |
| typ = "nil"
| |
| elseif checkObject("language", true, code) then
| |
| typ = "a language object"
| |
| elseif checkObject("family", true, code) then
| |
| typ = "a family object"
| |
| else
| |
| typ = "a " .. type(code)
| |
| end
| |
| error("The function getByCode expects a string as its first argument, but received " .. typ .. ".") | |
| end | | end |
| | | |
| local function conditionalRequire(modulename) | | local retval = export.makeObject(code, getRawLanguageData(code)) |
| if useRequire then | | if not retval and allowEtymLang then |
| return require(modulename)
| | retval = require("Module:etymology languages").getByCode(code) |
| | end |
| | if not retval and allowFamily then |
| | retval = require("Module:families").getByCode(code) |
| | end |
| | if not retval and paramForError then |
| | local codetext = nil |
| | if allowEtymLang and allowFamily then |
| | codetext = "language, etymology language or family code" |
| | elseif allowEtymLang then |
| | codetext = "language or etymology language code" |
| | elseif allowFamily then |
| | codetext = "language or family code" |
| else | | else |
| return mw.loadData(modulename) | | codetext = "language code" |
| end | | end |
| | export.err(code, paramForError, codetext) |
| end | | end |
|
| |
| local modulename = export.getDataModuleName(code)
| |
| local data = modulename and
| |
| conditionalRequire("Module:" .. modulename)[code] or
| |
| (allowEtymLang and conditionalRequire("Module:etymology languages/data")[code]) or
| |
| (allowFamily and conditionalRequire("Module:families/data")[code]) or
| |
| (allowEtymLang and allowFamily and conditionalRequire("Module:families/data/etymology")[code])
| |
|
| |
| local retval = code and data and export.makeObject(code, data, useRequire)
| |
|
| |
| if not retval and paramForError then
| |
| require("Module:languages/errorGetBy").code(code, paramForError, allowEtymLang, allowFamily)
| |
| end
| |
|
| |
| return retval | | return retval |
| end | | end |
|
| |
|
| --[==[Like {{code|lua|getByCanonicalName()}}, except it also looks at the <code class="n">otherNames</code> listed in the non-etymology language data modules, and does not (currently) have options to look up etymology languages and families.]==]
| | |
| function export.getByName(name, errorIfInvalid) | | function export.getByName(name, errorIfInvalid) |
| local byName = mw.loadData("Module:languages/by name") | | local byName = mw.loadData("Module:languages/by name") |
Line 1,240: |
Line 550: |
| if not code then | | if not code then |
| if errorIfInvalid then | | if errorIfInvalid then |
| error("The language name \"" .. name .. "\" is not valid. See [[Wiktionary:List of languages]].") | | error("The language name \"" .. name .. "\" is not valid.") |
| else | | else |
| return nil | | return nil |
Line 1,246: |
Line 556: |
| end | | end |
| | | |
| return export.getByCode(code) | | return export.makeObject(code, getRawLanguageData(code)) |
| end | | end |
|
| |
|
| --[==[Finds the language whose canonical name (the name used to represent that language on Wiktionary) or other name matches the one provided. If it exists, it returns a <code class="nf">Language</code> object representing the language. Otherwise, it returns {{code|lua|nil}}, unless <code class="n">paramForError</code> is given, in which case an error is generated. If <code class="n">allowEtymLang</code> is specified, etymology language codes are allowed and looked up along with normal language codes. If <code class="n">allowFamily</code> is specified, language family codes are allowed and looked up along with normal language codes.
| | function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily) |
| The canonical name of languages should always be unique (it is an error for two languages on Wiktionary to share the same canonical name), so this is guaranteed to give at most one result.
| | local byName = mw.loadData("Module:languages/canonical names") |
| This function is powered by [[Module:languages/canonical names]], which contains a pre-generated mapping of non-etymology-language canonical names to codes. It is generated by going through the [[:Category:Language data modules]] for non-etymology languages. When <code class="n">allowEtymLang</code> is specified for the above function, [[Module:etymology languages/canonical names]] may also be used, and when <code class="n">allowFamily</code> is specified for the above function, [[Module:families/canonical names]] may also be used.]==]
| |
| function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily, useRequire) | |
| local function conditionalRequire(modulename) | |
| if useRequire then
| |
| return require(modulename)
| |
| else
| |
| return mw.loadData(modulename)
| |
| end
| |
| end
| |
|
| |
| local byName = conditionalRequire("Module:languages/canonical names")
| |
| local code = byName and byName[name] | | local code = byName and byName[name] |
| | | |
| if not code and allowEtymLang then | | local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil |
| byName = conditionalRequire("Module:etymology languages/canonical names") | | if not retval and allowEtymLang then |
| code = byName and byName[name] or
| | retval = require("Module:etymology languages").getByCanonicalName(name) |
| byName[name:gsub(" [Ss]ubstrate$", "")] or
| |
| byName[name:gsub("^a ", "")] or
| |
| byName[name:gsub("^a ", ""):gsub(" [Ss]ubstrate$", "")] or
| |
| -- For etymology families like "ira-pro".
| |
| -- FIXME: This is not ideal, as it allows " languages" to be appended to any etymology-only language, too.
| |
| byName[name:match("^(.*) languages$")]
| |
| end | | end |
|
| | if not retval and allowFamily then |
| if not code and allowFamily then | | local famname = name:match("^(.*) languages$") |
| byName = conditionalRequire("Module:families/canonical names") | | famname = famname or name |
| code = byName and byName[name] or
| | retval = require("Module:families").getByCanonicalName(famname) |
| byName[name:match("^(.*) languages$")]
| |
| end | | end |
|
| |
| local retval = code and export.getByCode(code, errorIfInvalid, allowEtymLang, allowFamily, useRequire)
| |
|
| |
| if not retval and errorIfInvalid then | | if not retval and errorIfInvalid then |
| require("Module:languages/errorGetBy").canonicalName(name, allowEtymLang, allowFamily) | | local text |
| | if allowEtymLang and allowFamily then |
| | text = "language, etymology language or family name" |
| | elseif allowEtymLang then |
| | text = "language or etymology language name" |
| | elseif allowFamily then |
| | text = "language or family name" |
| | else |
| | text = "language name" |
| | end |
| | error("The " .. text .. " \"" .. name .. "\" is not valid.") |
| end | | end |
|
| |
| return retval | | return retval |
| end | | end |
|
| |
|
| --[==[Used by [[Module:languages/data/2]] (et al.) to add default types to the entities returned.]==]
| | function export.iterateAll() |
| function export.addDefaultTypes(data, regular, ...)
| | mw.incrementExpensiveFunctionCount() |
| for _, entity in pairs(data) do | | local m_data = mw.loadData("Module:languages/alldata") |
| -- "regular" encompasses everything that doesn't have another type already assigned.
| | local func, t, var = pairs(m_data) |
| if regular then
| | |
| entity.type = entity.type or "regular"
| | return function() |
| end
| | local code, data = func(t, var) |
| local types = table.concat({...}, ", ") | | return export.makeObject(code, data) |
| if #types > 0 then | |
| entity.type = types .. (entity.type and (", " .. entity.type) or "")
| |
| end
| |
| end | | end |
| return data
| |
| end | | end |
|
| |
|
| --[==[Used by [[Module:etymology languages/data]] and [[Module:families/data/etymology]] to finalize the data into the format that is actually returned.]==] | | --[[ If language is an etymology language, iterates through parent languages |
| function export.finalizeEtymologyData(data) | | until it finds a non-etymology language. ]] |
| local aliases = {} | | function export.getNonEtymological(lang) |
| for code, entity in pairs(data) do
| | while lang:getType() == "etymology language" do |
| -- Move parent to 5 and family to 3. | | local parentCode = lang:getParentCode() |
| data[code][5] = data[code][3]
| | lang = export.getByCode(parentCode) |
| data[code][3] = data[code].family | | or require("Module:etymology languages").getByCode(parentCode) |
| data[code].family = nil
| | or require("Module:families").getByCode(parentCode) |
| -- Assign any alias codes listed in alias_codes. The main_code field is used to make sure objects always use that to identify themselves, which means all aliases are fungible with their counterparts.
| |
| if entity.alias_codes then
| |
| entity.main_code = code | |
| for _, alias in ipairs(entity.alias_codes) do | |
| aliases[alias] = entity
| |
| end
| |
| entity.alias_codes = nil
| |
| end
| |
| end | | end |
| for code, alias in pairs(aliases) do | | |
| data[code] = alias
| | return lang |
| end
| |
| return data
| |
| end
| |
| | |
| --[==[For backwards compatibility only; modules should require the /error themselves.]==]
| |
| function export.err(lang_code, param, code_desc, template_tag, not_real_lang)
| |
| return require("Module:languages/error")(lang_code, param, code_desc, template_tag, not_real_lang) | |
| end | | end |
|
| |
|
| return export | | return export |