|
|
Line 6: |
Line 6: |
| local wordPunc = "-־׳״'.·*’་" | | local wordPunc = "-־׳״'.·*’་" |
| local notWordPunc = "[^" .. wordPunc .. "]+" | | local notWordPunc = "[^" .. wordPunc .. "]+" |
|
| |
| local isLemma = {
| |
| "abbreviations",
| |
| "acronyms",
| |
| "adjectives",
| |
| "adnominals",
| |
| "adpositions",
| |
| "adverbs",
| |
| "affixes",
| |
| "ambipositions",
| |
| "articles",
| |
| "circumfixes",
| |
| "circumpositions",
| |
| "classifiers",
| |
| "cmavo",
| |
| "cmavo clusters",
| |
| "cmene",
| |
| "combining forms",
| |
| "conjunctions",
| |
| "counters",
| |
| "determiners",
| |
| "diacritical marks",
| |
| "equative adjectives",
| |
| "fu'ivla",
| |
| "gismu",
| |
| "Han characters",
| |
| "Han tu",
| |
| "hanzi",
| |
| "hanja",
| |
| "ideophones",
| |
| "idioms",
| |
| "infixes",
| |
| "interfixes",
| |
| "initialisms",
| |
| "interjections",
| |
| "kanji",
| |
| "letters",
| |
| "ligatures",
| |
| "lujvo",
| |
| "morphemes",
| |
| "non-constituents",
| |
| "nouns",
| |
| "numbers",
| |
| "numeral symbols",
| |
| "numerals",
| |
| "particles",
| |
| "phrases",
| |
| "postpositions",
| |
| "postpositional phrases",
| |
| "predicatives",
| |
| "prefixes",
| |
| "prepositions",
| |
| "prepositional phrases",
| |
| "preverbs",
| |
| "pronominal adverbs",
| |
| "pronouns",
| |
| "proverbs",
| |
| "proper nouns",
| |
| "punctuation marks",
| |
| "relatives",
| |
| "roots",
| |
| "stems",
| |
| "suffixes",
| |
| "syllables",
| |
| "symbols",
| |
| "verbs",
| |
| }
| |
|
| |
| local isNonLemma = {
| |
| "active participles",
| |
| "adjectival participles",
| |
| "adjective forms",
| |
| "adjective feminine forms",
| |
| "adjective plural forms",
| |
| "adverb forms",
| |
| "adverbial participles",
| |
| "agent participles",
| |
| "article forms",
| |
| "circumfix forms",
| |
| "combined forms",
| |
| "comparative adjective forms",
| |
| "comparative adjectives",
| |
| "comparative adverb forms",
| |
| "comparative adverbs",
| |
| "contractions",
| |
| "converbs",
| |
| "determiner comparative forms",
| |
| "determiner forms",
| |
| "determiner superlative forms",
| |
| "diminutive nouns",
| |
| "equative adjective forms",
| |
| "equative adjectives",
| |
| "future participles",
| |
| "gerunds",
| |
| "infinitive forms",
| |
| "infinitives",
| |
| "interjection forms",
| |
| "jyutping",
| |
| "kanji readings",
| |
| "misspellings",
| |
| "negative participles",
| |
| "nominal participles",
| |
| "noun case forms",
| |
| "noun dual forms",
| |
| "noun forms",
| |
| "noun plural forms",
| |
| "noun possessive forms",
| |
| "noun singulative forms",
| |
| "numeral forms",
| |
| "participles",
| |
| "participle forms",
| |
| "particle forms",
| |
| "passive participles",
| |
| "past active participles",
| |
| "past participles",
| |
| "past participle forms",
| |
| "past passive participles",
| |
| "perfect active participles",
| |
| "perfect participles",
| |
| "perfect passive participles",
| |
| "pinyin",
| |
| "plurals",
| |
| "postposition forms",
| |
| "prefix forms",
| |
| "preposition contractions",
| |
| "preposition forms",
| |
| "prepositional pronouns",
| |
| "present active participles",
| |
| "present participles",
| |
| "present passive participles",
| |
| "pronoun forms",
| |
| "pronoun possessive forms",
| |
| "proper noun forms",
| |
| "proper noun plural forms",
| |
| "rafsi",
| |
| "romanizations",
| |
| "root forms",
| |
| "singulatives",
| |
| "suffix forms",
| |
| "superlative adjective forms",
| |
| "superlative adjectives",
| |
| "superlative adverb forms",
| |
| "superlative adverbs",
| |
| "verb forms",
| |
| "verbal nouns",
| |
| }
| |
|
| |
|
|
| |
|
Line 337: |
Line 191: |
| end | | end |
| data.heads[i] = head | | data.heads[i] = head |
| end
| |
| end
| |
|
| |
| -- Return "lemma" if the given POS is a lemma, "non-lemma form" if a non-lemma form, or nil
| |
| -- if unknown. The POS passed in must be in its plural form ("nouns", "prefixes", etc.).
| |
| -- If you have a POS in its singular form, call pluralize() in [[Module:string utilities]] to
| |
| -- pluralize it in a smart fashion that knows when to add '-s' and when to add '-es'.
| |
|
| |
| function pos_lemma_or_nonlemma(plpos)
| |
| -- Is it a lemma category?
| |
| if isLemma[plpos] then
| |
| return "lemma"
| |
| -- Is it a nonlemma category?
| |
| elseif isNonLemma[plpos] then
| |
| return "non-lemma form"
| |
| else
| |
| return nil
| |
| end | | end |
| end | | end |
Line 364: |
Line 201: |
| end | | end |
| | | |
| -- Is it a lemma category?
| |
| local postype = pos_lemma_or_nonlemma(data.pos_category)
| |
| if not data.noposcat then
| |
| table.insert(data.categories, 1, "[sS]iwa " .. postype .. "s")
| |
| end
| |
|
| |
| -- Preprocess | | -- Preprocess |
| preprocess(data, postype) | | preprocess(data, postype) |