45,646
edits
No edit summary |
No edit summary |
||
Line 60: | Line 60: | ||
(indicating a missing code). If `not_real_lang` is given, this check is suppressed. | (indicating a missing code). If `not_real_lang` is given, this check is suppressed. | ||
]=] | ]=] | ||
-- If no script object is provided (or if it's invalid or None), get one. | |||
local function checkScript(text, self, sc) | |||
if not check_object("script", true, sc) or sc:getCode() == "None" then | |||
return self:findBestScript(text) | |||
else | |||
return sc | |||
end | |||
end | |||
local function normalize(text, sc) | |||
text = sc:fixDiscouragedSequences(text) | |||
return sc:toFixedNFD(text) | |||
end | |||
function export.err(lang_code, param, code_desc, template_tag, not_real_lang) | function export.err(lang_code, param, code_desc, template_tag, not_real_lang) | ||
Line 667: | Line 681: | ||
function Language:transliterate(text, sc, module_override) | function Language:transliterate(text, sc, module_override) | ||
-- If there is no text, or the language doesn't have transliteration data and there's no override, return nil. | |||
if not ((module_override or | if not (self._rawData.translit or module_override) then | ||
return nil | return nil, false, {} | ||
elseif (not text) or text == "" or text == "-" then | |||
return text, false, {} | |||
end | |||
-- If the script is not transliteratable (and no override is given), return nil. | |||
sc = checkScript(text, self, sc) | |||
if not (sc:isTransliterated() or module_override) then | |||
return nil, true, {} | |||
end | |||
-- Remove any strip markers. | |||
text = mw.text.unstrip(text) | |||
-- Get the display text with the keepCarets flag set. | |||
local fail, cats, subbedChars | |||
text, fail, cats, subbedChars = processDisplayText(text, self, sc, true) | |||
-- Transliterate (using the module override if applicable). | |||
text, fail, cats, subbedChars = iterateSectionSubstitutions(text, subbedChars, true, self, sc, module_override or self._rawData.translit, "tr") | |||
if not text then | |||
return nil, true, cats | |||
end | end | ||
-- Incomplete transliterations return nil. | |||
local charset = sc.characters | |||
if charset and umatch(text, "[" .. charset .. "]") then | |||
-- Remove any characters in Latin, which includes Latin characters also included in other scripts (as these are false positives), as well as any PUA substitutions. Anything remaining should only be script code "None" (e.g. numerals). | |||
local check_text = ugsub(text, "[" .. require("Module:scripts").getByCode("Latn").characters .. "-]+", "") | |||
-- Set none_is_last_resort_only flag, so that any non-None chars will cause a script other than "None" to be returned. | |||
if require("Module:scripts").findBestScriptWithoutLang(check_text, true):getCode() ~= "None" then | |||
return nil, true, cats | |||
end | |||
end | |||
text = escape_risky_characters(text) | |||
text = undoTempSubstitutions(text, subbedChars) | |||
-- If the script does not use capitalization, then capitalize any letters of the transliteration which are immediately preceded by a caret (and remove the caret). | |||
if text and not sc:hasCapitalization() and text:find("^", 1, true) then | |||
text = processCarets(text, "%^([\128-\191\244]*%*?)([^\128-\191\244][\128-\191]*)", function(m1, m2) | |||
return m1 .. uupper(m2) | |||
end) | |||
end | |||
-- Track module overrides. | |||
if module_override ~= nil then | |||
track("module_override") | |||
end | |||
fail = text == nil and (not not fail) or false | |||
return | return text, fail, cats | ||
end | end | ||