Module:headword: Difference between revisions

No edit summary
No edit summary
 
(5 intermediate revisions by the same user not shown)
Line 620: Line 620:
local title = mw.title.new(stripped_physical_term)
local title = mw.title.new(stripped_physical_term)
if title and not title:getContent() then
if title and not title:getContent() then
insert(data.categories, data.lang:getFullName() .. " " .. plpos .. " with red links in their headword lines")
return true
return true
end
end
Line 789: Line 788:
-- that.
-- that.
if tbl == true then
if tbl == true then
if page.raw_defaultsort ~= sortkey then
insert(lang_cats, lang:getFullName() .. " terms with non-redundant non-automated sortkeys")
end
return
return
end
end
Line 801: Line 797:
different = true
different = true
end
end
end
if redundant then
insert(lang_cats, lang:getFullName() .. " terms with redundant sortkeys")
end
if different then
insert(lang_cats, lang:getFullName() .. " terms with non-redundant non-automated sortkeys")
end
end
return sortkey
return sortkey
Line 818: Line 808:
if tbl then
if tbl then
sortkey = handle_raw_sortkeys(tbl, sortkey, page, lang, lang_cats)
sortkey = handle_raw_sortkeys(tbl, sortkey, page, lang, lang_cats)
insert(lang_cats, canonical .. " entries with topic categories using raw markup")
end
end
tbl = page.wikitext_langname_cat[canonical]
tbl = page.wikitext_langname_cat[canonical]
if tbl then
if tbl then
handle_raw_sortkeys(tbl, sortkey, page, lang, lang_cats)
handle_raw_sortkeys(tbl, sortkey, page, lang, lang_cats)
insert(lang_cats, canonical .. " entries with language name categories using raw markup")
end
if get_current_L2() ~= canonical then
insert(lang_cats, canonical .. " entries with incorrect language header")
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/incorrect language header]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/incorrect language header/LANGCODE]]
track("incorrect language header", lang)
end
end
end
end
Line 965: Line 947:
-- add an appropriate category.
-- add an appropriate category.
local postype = export.pos_lemma_or_nonlemma(data.pos_category)
local postype = export.pos_lemma_or_nonlemma(data.pos_category)
if not postype then
    local main_cat = data.lang:getMainCategoryName()
-- We don't know what this category is, so tag it with a tracking category.
    if not postype then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos]]
    elseif not data.noposcat then
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/LANGCODE]]
        if postype:match("^lemma") and main_cat ~= "lemma" then
track("unrecognized pos", data.lang)
            postype = main_cat
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/POS]]
        end
-- [[Special:WhatLinksHere/Wiktionary:Tracking/headword/unrecognized pos/POS/LANGCODE]]
        insert(data.categories, 1, full_langname .. " " .. postype .. "s")
track("unrecognized pos/pos/" .. data.pos_category, data.lang)
    end
elseif not data.noposcat then
   
insert(data.categories, 1, full_langname .. " " .. postype .. "s")
    insert(data.categories, 1, "Contionary")
end


-- EXPERIMENTAL: see [[Wiktionary:Beer parlour/2024/June#Decluttering the altform mess]]
-- EXPERIMENTAL: see [[Wiktionary:Beer parlour/2024/June#Decluttering the altform mess]]
Line 1,011: Line 992:
end
end


if is_reconstructed then
if is_reconstructed and not data.lang:hasType("conlang") then
default_head = "*" .. default_head
default_head = "*" .. default_head
end
end
Line 1,064: Line 1,045:


local auto_sc = data.lang:findBestScript(head.term)
local auto_sc = data.lang:findBestScript(head.term)
if (
auto_sc:getCode() == "None" and
find_best_script_without_lang(head.term):getCode() ~= "None"
) then
insert(data.categories, full_langname .. " terms in nonstandard scripts")
end
if not (head.sc or data.sc) then -- No script code given, so use autodetected script.
if not (head.sc or data.sc) then -- No script code given, so use autodetected script.
head.sc = auto_sc
head.sc = auto_sc
Line 1,075: Line 1,050:
if not head.sc then -- Overall script code given.
if not head.sc then -- Overall script code given.
head.sc = data.sc
head.sc = data.sc
end
-- Track uses of sc parameter.
if head.sc:getCode() == auto_sc:getCode() then
track("redundant script code", data.lang)
if not data.no_script_code_cat then
insert(data.categories, full_langname .. " terms with redundant script codes")
end
else
track("non-redundant manual script code", data.lang)
if not data.no_script_code_cat then
insert(data.categories, full_langname .. " terms with non-redundant manual script codes")
end
end
end
end
end
Line 1,124: Line 1,087:
if automated_tr then
if automated_tr then
local manual_tr = head.tr
local manual_tr = head.tr
if manual_tr then
if remove_links(manual_tr) == remove_links(automated_tr) then
insert(data.categories, full_langname .. " terms with redundant transliterations")
else
insert(data.categories, full_langname .. " terms with non-redundant manual transliterations")
end
end


if not manual_tr then
if not manual_tr then
Line 1,470: Line 1,425:
insert(data.categories, full_langname .. " terms spelled with " .. character)
insert(data.categories, full_langname .. " terms spelled with " .. character)
end
end
end
end
if data.heads[1].sc:isSystem("alphabet") then
local pagename, i = page.pagename:ulower(), 2
while umatch(pagename, "(%a)" .. ("%1"):rep(i)) do
i = i + 1
insert(data.categories, full_langname .. " terms with " .. i .. " consecutive instances of the same letter")
end
end
end
end
Line 1,508: Line 1,455:
-- Add to various maintenance categories.
-- Add to various maintenance categories.
export.maintenance_cats(page, data.lang, data.categories, data.whole_page_categories)
export.maintenance_cats(page, data.lang, data.categories, data.whole_page_categories)
if data.affix then
for _, aff in ipairs(data.affix) do
if mw.ustring.match(aff, "^%-[^-]*%-$") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms interfixed with " .. aff)
elseif mw.ustring.match(aff, "%-%s%-") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms circumfixed with " .. aff)
elseif mw.ustring.match(aff, "%-$") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms prefixed with " .. aff)
elseif mw.ustring.match(aff, "^%-") then
table.insert(data.categories, data.lang:getCanonicalName() .. " terms suffixed with " .. aff)
end
end
end


------------ 10. Format and return headwords, genders, inflections and categories. ------------
------------ 10. Format and return headwords, genders, inflections and categories. ------------