45,660
edits
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
local export = {} | local export = {} | ||
local spacingPunctuation = "[%s%p]+" | |||
--[[ List of punctuation or spacing characters that are found inside of words. | |||
Used to exclude characters from the regex above. ]] | |||
local wordPunc = "-־׳״'.·*’་" | |||
local notWordPunc = "[^" .. wordPunc .. "]+" | |||
local isLemma = { | |||
"abbreviations", | |||
"acronyms", | |||
"adjectives", | |||
"adnominals", | |||
"adpositions", | |||
"adverbs", | |||
"affixes", | |||
"ambipositions", | |||
"articles", | |||
"circumfixes", | |||
"circumpositions", | |||
"classifiers", | |||
"cmavo", | |||
"cmavo clusters", | |||
"cmene", | |||
"combining forms", | |||
"conjunctions", | |||
"counters", | |||
"determiners", | |||
"diacritical marks", | |||
"equative adjectives", | |||
"fu'ivla", | |||
"gismu", | |||
"Han characters", | |||
"Han tu", | |||
"hanzi", | |||
"hanja", | |||
"ideophones", | |||
"idioms", | |||
"infixes", | |||
"interfixes", | |||
"initialisms", | |||
"interjections", | |||
"kanji", | |||
"letters", | |||
"ligatures", | |||
"lujvo", | |||
"morphemes", | |||
"non-constituents", | |||
"nouns", | |||
"numbers", | |||
"numeral symbols", | |||
"numerals", | |||
"particles", | |||
"phrases", | |||
"postpositions", | |||
"postpositional phrases", | |||
"predicatives", | |||
"prefixes", | |||
"prepositions", | |||
"prepositional phrases", | |||
"preverbs", | |||
"pronominal adverbs", | |||
"pronouns", | |||
"proverbs", | |||
"proper nouns", | |||
"punctuation marks", | |||
"relatives", | |||
"roots", | |||
"stems", | |||
"suffixes", | |||
"syllables", | |||
"symbols", | |||
"verbs", | |||
} | |||
local isNonLemma = { | |||
"active participles", | |||
"adjectival participles", | |||
"adjective forms", | |||
"adjective feminine forms", | |||
"adjective plural forms", | |||
"adverb forms", | |||
"adverbial participles", | |||
"agent participles", | |||
"article forms", | |||
"circumfix forms", | |||
"combined forms", | |||
"comparative adjective forms", | |||
"comparative adjectives", | |||
"comparative adverb forms", | |||
"comparative adverbs", | |||
"contractions", | |||
"converbs", | |||
"determiner comparative forms", | |||
"determiner forms", | |||
"determiner superlative forms", | |||
"diminutive nouns", | |||
"equative adjective forms", | |||
"equative adjectives", | |||
"future participles", | |||
"gerunds", | |||
"infinitive forms", | |||
"infinitives", | |||
"interjection forms", | |||
"jyutping", | |||
"kanji readings", | |||
"misspellings", | |||
"negative participles", | |||
"nominal participles", | |||
"noun case forms", | |||
"noun dual forms", | |||
"noun forms", | |||
"noun plural forms", | |||
"noun possessive forms", | |||
"noun singulative forms", | |||
"numeral forms", | |||
"participles", | |||
"participle forms", | |||
"particle forms", | |||
"passive participles", | |||
"past active participles", | |||
"past participles", | |||
"past participle forms", | |||
"past passive participles", | |||
"perfect active participles", | |||
"perfect participles", | |||
"perfect passive participles", | |||
"pinyin", | |||
"plurals", | |||
"postposition forms", | |||
"prefix forms", | |||
"preposition contractions", | |||
"preposition forms", | |||
"prepositional pronouns", | |||
"present active participles", | |||
"present participles", | |||
"present passive participles", | |||
"pronoun forms", | |||
"pronoun possessive forms", | |||
"proper noun forms", | |||
"proper noun plural forms", | |||
"rafsi", | |||
"romanizations", | |||
"root forms", | |||
"singulatives", | |||
"suffix forms", | |||
"superlative adjective forms", | |||
"superlative adjectives", | |||
"superlative adverb forms", | |||
"superlative adverbs", | |||
"verb forms", | |||
"verbal nouns", | |||
} | |||
-- The main entry point. | -- The main entry point. | ||
Line 16: | Line 169: | ||
local postype = args["type"]; if postype == "" then postype = nil end | local postype = args["type"]; if postype == "" then postype = nil end | ||
local data = { | local data = {pos_category = (postype and postype .. " " or "") .. poscat, categories = {}, heads = {head}, genders = {}, inflections = {}} | ||
if poscat == "adjectives" then | if poscat == "adjectives" then | ||
Line 114: | Line 267: | ||
end | end | ||
function | |||
-- Format a headword with transliterations | |||
local function format_headword(data) | |||
for i, head in ipairs(data.heads) do | |||
-- Apply processing to the headword, for formatting links and such | |||
if head:find("[[", nil, true) then | |||
head = {term = head, lang = data.lang} | |||
end | |||
data.heads[i] = head | |||
end | |||
-- | return table.concat(data.heads, " <i>or</i> ") | ||
end | |||
local | |||
-- Add links to a multiword head. | |||
function export.add_multiword_links(head) | |||
local function workaround_to_exclude_chars(s) | |||
return mw.ustring.gsub(s, notWordPunc, "]]%1[[Contionary:") | |||
end | |||
head = "[[Contionary:" | |||
.. mw.ustring.gsub( | |||
head, | |||
spacingPunctuation, | |||
workaround_to_exclude_chars | |||
) | |||
.. "]]" | |||
head = mw.ustring.gsub(head, "%[%[%]%]", "") | |||
return head | |||
end | |||
-- Return true if the given head is multiword according to the algorithm used | |||
-- in full_headword(). | |||
function export.head_is_multiword(head) | |||
for possibleWordBreak in mw.ustring.gmatch(head, spacingPunctuation) do | |||
if mw.ustring.find(possibleWordBreak, notWordPunc) then | |||
return true | |||
end | end | ||
end | |||
return false | |||
end | |||
local function preprocess(data, postype) | |||
if type(data.heads) ~= "table" then | |||
data.heads = { data.heads } | |||
end | end | ||
if not data.heads or #data.heads == 0 then | |||
data.heads = {""} | |||
if | |||
end | end | ||
local default_head = mw.title.getCurrentTitle().text | |||
local unmodified_default_head = default_head | |||
-- Add links to multi-word page names when appropriate | |||
if export.head_is_multiword(default_head) then | |||
) | default_head = export.add_multiword_links(default_head) | ||
end | end | ||
-- If a head is the empty string "", then replace it with the default | |||
--[ | for i, head in ipairs(data.heads) do | ||
if head == "" then | |||
head = default_head | |||
end | |||
data.heads[i] = head | |||
end | |||
end | |||
-- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil | |||
-- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.). | |||
-- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to | |||
-- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'. | |||
-- | |||
-- If `best_guess` is given and the POS is in neither the lemma nor non-lemma list, guess | |||
-- based on whether it ends in " forms"; otherwise, return nil. | |||
function pos_lemma_or_nonlemma(plpos, best_guess) | |||
-- Is it a lemma category? | |||
if isLemma[plpos] or isLemma[plpos:gsub("^reconstructed ", "")] then | |||
return "lemma" | |||
-- Is it a nonlemma category? | |||
elseif isNonLemma[plpos] then | |||
return "non-lemma form" | |||
elseif best_guess then | |||
return plpos:find(" forms$") and "non-lemma form" or "lemma" | |||
else | |||
return nil | |||
end | |||
end | |||
local function show_headword_line(data) | |||
local namespace = mw.title.getCurrentTitle().nsText | |||
if not data.noposcat then | |||
local pos_category = "[sS]iwa " .. data.pos_category | |||
end | end | ||
if data. | -- Is it a lemma category? | ||
local postype = pos_lemma_or_nonlemma(data.pos_category) | |||
if not data.noposcat then | |||
table.insert(data.categories, 1, "[sS]iwa " .. postype .. "s") | |||
end | end | ||
-- Preprocess | |||
preprocess(data, postype) | |||
-- Format and return all the gathered information | |||
return | |||
format_headword(data) .. | |||
format_genders(data) .. | |||
format_inflections(data) .. | |||
require("Module:utilities").format_categories( | |||
tracking_categories, data.lang, data.sort_key, nil, | |||
data.force_cat_output or test_force_categories, data.sc | |||
) | |||
end | |||
function full_headword(data) | |||
local tracking_categories = {} | |||
-- Were any categories specified? | -- Were any categories specified? | ||
if data.categories and #data.categories > 0 then | if data.categories and #data.categories > 0 then | ||
if not data.pos_category | if not data.pos_category | ||
and mw.ustring.find(data.categories[1], "^" | and mw.ustring.find(data.categories[1], "^[sS]iwa") | ||
then | then | ||
data.pos_category = mw.ustring.gsub(data.categories[1], "^ | data.pos_category = mw.ustring.gsub(data.categories[1], "^[sS]iwa ", "") | ||
table.remove(data.categories, 1) | table.remove(data.categories, 1) | ||
end | end | ||
Line 225: | Line 412: | ||
end | end | ||
-- This may add more categories (e.g. gender categories), so make sure it gets | -- This may add more categories (e.g. gender categories), so make sure it gets | ||
-- evaluated first. | -- evaluated first. | ||
Line 257: | Line 418: | ||
text .. | text .. | ||
require("Module:utilities").format_categories( | require("Module:utilities").format_categories( | ||
data.categories | data.categories, nil, | ||
data.force_cat_output | data.force_cat_output | ||
) .. | ) .. | ||
require("Module:utilities").format_categories( | require("Module:utilities").format_categories( | ||
tracking_categories | tracking_categories, nil, | ||
data.force_cat_output | data.force_cat_output | ||
) | ) | ||
end | end | ||
return export | return export |