Module:headword: Difference between revisions

m 1 revision imported
No edit summary
 
(2 intermediate revisions by the same user not shown)
Line 2: Line 2:


-- Named constants for all modules used, to make it easier to swap out sandbox versions.
-- Named constants for all modules used, to make it easier to swap out sandbox versions.
local debug_track_module = "Module:debug/track"
local en_utilities_module = "Module:en-utilities"
local en_utilities_module = "Module:en-utilities"
local gender_and_number_module = "Module:getn"
local gender_and_number_module = "Module:gender and number"
local headword_data_module = "Module:headword/data"
local headword_data_module = "Module:headword/data"
local headword_page_module = "Module:headword/page"
local headword_page_module = "Module:headword/page"
Line 37: Line 38:
--[==[
--[==[
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
Loaders for functions in other modules, which overwrite themselves with the target function when called. This ensures modules are only loaded when needed, retains the speed/convenience of locally-declared pre-loaded functions, and has no overhead after the first call, since the target functions are called directly in any subsequent calls.]==]
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
local function encode_entities(...)
local function encode_entities(...)
encode_entities = require(string_utilities_module).encode_entities
encode_entities = require(string_utilities_module).encode_entities
Line 174: Line 180:
-- If set to true, categories always appear, even in non-mainspace pages
-- If set to true, categories always appear, even in non-mainspace pages
local test_force_categories = false
local test_force_categories = false
-- Add a tracking category to track entries with certain (unusually undesirable) properties. `track_id` is an identifier
-- for the particular property being tracked and goes into the tracking page. Specifically, this adds a link in the
-- page text to [[Wiktionary:Tracking/headword/TRACK_ID]], meaning you can find all entries with the `track_id` property
-- by visiting [[Special:WhatLinksHere/Wiktionary:Tracking/headword/TRACK_ID]].
--
-- If `lang` (a language object) is given, an additional tracking page [[Wiktionary:Tracking/headword/TRACK_ID/CODE]] is
-- linked to where CODE is the language code of `lang`, and you can find all entries in the combination of `track_id`
-- and `lang` by visiting [[Special:WhatLinksHere/Wiktionary:Tracking/headword/TRACK_ID/CODE]]. This makes it possible to
-- isolate only the entries with a specific tracking property that are in a given language. Note that if `lang`
-- references at etymology-only language, both that language's code and its full parent's code are tracked.
local function track(track_id, lang)
local tracking_page = "headword/" .. track_id
if lang and lang:hasType("etymology-only") then
debug_track{tracking_page, tracking_page .. "/" .. lang:getCode(),
tracking_page .. "/" .. lang:getFullCode()}
elseif lang then
debug_track{tracking_page, tracking_page .. "/" .. lang:getCode()}
else
debug_track(tracking_page)
end
return true
end




Line 363: Line 392:
else
else
head_parts = concat(head_parts)
head_parts = concat(head_parts)
end
if has_manual_translits then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/manual-tr]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/manual-tr/LANGCODE]]
track("manual-tr", data.lang)
end
end


Line 401: Line 436:
if not saw_translit_page and data.lang:hasType("etymology-only") then
if not saw_translit_page and data.lang:hasType("etymology-only") then
langname = data.lang:getFullName()
langname = data.lang:getFullName()
transliteration_page = new_title(langname .. " transliteration")
transliteration_page = new_title(langname .. " transliteration", "Wiktionary")


if transliteration_page and transliteration_page:getContent() then
if transliteration_page and transliteration_page:getContent() then
translits_formatted = " [[" .. langname .. " transliteration|•]]" .. translits_formatted
translits_formatted = " [[Wiktionary:" .. langname .. " transliteration|•]]" .. translits_formatted
end
end
end
end
Line 468: Line 503:
-- right into the 'data' table to disable inflection links of the entire headword
-- right into the 'data' table to disable inflection links of the entire headword
-- when inflected forms aren't entry-worthy, e.g.: in Vulgar Latin
-- when inflected forms aren't entry-worthy, e.g.: in Vulgar Latin
local nolinkinfl = part.face == "hypothetical" or part.nolinkinfl or data.nolinkinfl
local nolinkinfl = part.face == "hypothetical" or (part.nolink and track("nolink") or part.nolinkinfl) or (
data.nolink and track("nolink") or data.nolinkinfl)


local formatted
local formatted
Line 791: Line 827:
if get_current_L2() ~= canonical then
if get_current_L2() ~= canonical then
insert(lang_cats, canonical .. " entries with incorrect language header")
insert(lang_cats, canonical .. " entries with incorrect language header")
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/incorrect language header]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/incorrect language header/LANGCODE]]
track("incorrect language header", lang)
end
end
end
end
Line 888: Line 927:
local escaped_langname = pattern_escape(full_langname)
local escaped_langname = pattern_escape(full_langname)
local matches_lang_pattern = "^" .. escaped_langname .. " "
local matches_lang_pattern = "^" .. escaped_langname .. " "
for _, cat in ipairs(data.categories) do
-- Does the category begin with the language name? If not, tag it with a tracking category.
if not cat:find(matches_lang_pattern) then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/no lang category]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/no lang category/LANGCODE]]
track("no lang category", data.lang)
end
end


-- If `pos_category` not given, try to infer it from the first specified category. If this doesn't work, we
-- If `pos_category` not given, try to infer it from the first specified category. If this doesn't work, we
Line 918: Line 965:
-- add an appropriate category.
-- add an appropriate category.
local postype = export.pos_lemma_or_nonlemma(data.pos_category)
local postype = export.pos_lemma_or_nonlemma(data.pos_category)
local main_cat = data.lang:getMainCategoryName()
if not postype then
if not postype then
-- We don't know what this category is, so tag it with a tracking category.
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/LANGCODE]]
track("unrecognized pos", data.lang)
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/POS]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/POS/LANGCODE]]
track("unrecognized pos/pos/" .. data.pos_category, data.lang)
elseif not data.noposcat then
elseif not data.noposcat then
if postype:match("^lemma") and main_cat ~= "lemma" then
postype = main_cat
end
insert(data.categories, 1, full_langname .. " " .. postype .. "s")
insert(data.categories, 1, full_langname .. " " .. postype .. "s")
end
end
Line 1,028: Line 1,078:
-- Track uses of sc parameter.
-- Track uses of sc parameter.
if head.sc:getCode() == auto_sc:getCode() then
if head.sc:getCode() == auto_sc:getCode() then
track("redundant script code", data.lang)
if not data.no_script_code_cat then
if not data.no_script_code_cat then
insert(data.categories, full_langname .. " terms with redundant script codes")
insert(data.categories, full_langname .. " terms with redundant script codes")
end
end
else
else
track("non-redundant manual script code", data.lang)
if not data.no_script_code_cat then
if not data.no_script_code_cat then
insert(data.categories, full_langname .. " terms with non-redundant manual script codes")
insert(data.categories, full_langname .. " terms with non-redundant manual script codes")
Line 1,167: Line 1,219:


------------ 9. Insert additional categories. ------------
------------ 9. Insert additional categories. ------------
if data.force_cat_output then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/force cat output]]
track("force cat output")
end


if has_redundant_head_param then
if has_redundant_head_param then
Line 1,430: Line 1,487:
and is_palindrome(page.pagename, data.lang, data.heads[1].sc) then
and is_palindrome(page.pagename, data.lang, data.heads[1].sc) then
insert(data.categories, full_langname .. " palindromes")
insert(data.categories, full_langname .. " palindromes")
end
if namespace == "" and not lang_reconstructed then
for _, head in ipairs(data.heads) do
if page.full_raw_pagename ~= get_link_page(remove_links(head.term), data.lang, head.sc) then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/pagename spelling mismatch]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/pagename spelling mismatch/LANGCODE]]
track("pagename spelling mismatch", data.lang)
break
end
end
end
end


Line 1,440: Line 1,508:
-- Add to various maintenance categories.
-- Add to various maintenance categories.
export.maintenance_cats(page, data.lang, data.categories, data.whole_page_categories)
export.maintenance_cats(page, data.lang, data.categories, data.whole_page_categories)
if data.affix then
for _, aff in ipairs(data.affix) do
if mw.ustring.match(aff, "^%-[^-]*%-$") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms interfixed with " .. aff)
elseif mw.ustring.match(aff, "%-%s%-") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms circumfixed with " .. aff)
elseif mw.ustring.match(aff, "%-$") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms prefixed with " .. aff)
elseif mw.ustring.match(aff, "^%-") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms suffixed with " .. aff)
end
end
end


------------ 10. Format and return headwords, genders, inflections and categories. ------------
------------ 10. Format and return headwords, genders, inflections and categories. ------------