Module:headword/data: Difference between revisions

← Older edit

@@ Line 23: / Line 23: @@
 	"pinyin",
 	"rafsi",
-	"romaji",
+}, function(_, item)
-}, function(item)
 	return item
 end)
 -- Irregular non-zero plurals AND any regular plurals where the singular ends in "s",
--- because the module assumes that inputs ending in "s" are plurals.
+-- because the module assumes that inputs ending in "s" are plurals. The singular and
-for k, v in next, {
+-- plural both need to be added, as the module will generate a default plural if
+-- the input doesn't match a key in this table.
+for sg, pl in next, {
 	mora = "morae"
 } do
-	irregular_plurals[k] = v
+	irregular_plurals[sg], irregular_plurals[pl] = pl, pl
-	irregular_plurals[v] = v -- Ensures singular and plural inputs work as expected.
 end
-data.invariable = irregular_plurals -- To be removed.
 data.irregular_plurals = irregular_plurals
@@ Line 150: / Line 149: @@
 	"nominal participles",
 	"noun case forms",
+	"noun construct forms",
 	"noun dual forms",
 	"noun forms",
@@ Line 162: / Line 162: @@
 	"passive participles",
 	"past active participles",
+	"past adverbial participles",
 	"past participles",
 	"past participle forms",
@@ Line 176: / Line 177: @@
 	"prepositional pronouns",
 	"present active participles",
+	"present adverbial participles",
 	"present participles",
 	"present passive participles",
+	"preverb forms",
 	"pronoun forms",
 	"pronoun possessive forms",
@@ Line 413: / Line 416: @@
 	"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
 	"hil", -- Hiligaynon; hyphens for mid-word glottal stops
+	"hnn", -- Hanunoo; too many false positives
 	"ilo", -- Ilocano; hyphens for mid-word glottal stops
 	"kne", -- Kankanaey; hyphens for mid-word glottal stops
@@ Line 437: / Line 441: @@
 	"bbc",
 	"bug",
+	"cdo",
 	"cia",
 	"cjm",
+	"cjy",
 	"cmn",
+	"cnp",
 	"cpi",
+	"cpx",
+	"csp",
+	"czh",
+	"czo",
+	"gan",
 	"hak",
+	"hnm",
+	"hsn",
 	"ja",
 	"kzg",
 	"lad",
+	"ltc",
+	"luh",
 	"lzh",
+	"mnp",
 	"ms",
 	"mul",
 	"mvi",
 	"nan",
+	"nan-dat",
 	"nan-hbl",
-	"nan-hnm",
+	"nan-hlh",
-	"nan-luh",
+	"nan-lnx",
 	"nan-tws",
+	"nan-zhe",
+	"nan-zsh",
+	"och",
 	"oj",
 	"okn",
@@ Line 460: / Line 481: @@
 	"ryu",
 	"sh",
+	"sjc",
 	"tgt",
 	"th",
@@ Line 467: / Line 489: @@
 	"und",
 	"vi",
+	"wuu",
 	"xug",
 	"yoi",
@@ Line 473: / Line 496: @@
 	"za",
 	"zh",
+	"zhx-sic",
+	"zhx-tai",
 }
@@ Line 656: / Line 681: @@
 	adv = "adverb",
 	art = "article",
+	aug = "augmentative",
 	det = "determiner",
-	cnum = "cardinal number",
+	dim = "diminutive",
+	compadj = "comparative adjective",
+	compadv = "comparative adverb",
 	conj = "conjunction",
+	contr = "contraction",
 	conv = "converb",
 	int = "interjection",
@@ Line 664: / Line 693: @@
 	intj = "interjection",
 	n = "noun",
+	-- the next two support Algonquian languages; see also vii/vai/vti/vta below
+	na = "animate noun",
+	ni = "inanimate noun",
 	num = "numeral",
 	part = "participle",
@@ Line 670: / Line 702: @@
 	pn = "proper noun",
 	postp = "postposition",
-	pre = "preposition",
+	pref = "prefix",
 	prep = "preposition",
-	pro = "pronoun",
+	prepphr = "prepositional phrase",
 	pron = "pronoun",
 	prop = "proper noun",
 	proper = "proper noun",
-	onum = "ordinal number",
+	propn = "proper noun",
+	rom = "romanization",
+	suf = "suffix",
+	supadj = "superlative adjective",
+	supadv = "superlative adverb",
+	sym = "symbol",
 	v = "verb",
 	vb = "verb",
 	vi = "intransitive verb",
 	vt = "transitive verb",
-	vti = "transitive and intransitive verb",
+	-- the next four support Algonquian languages
+	vii = "inanimate intransitive verb",
+	vai = "animate intransitive verb",
+	vti = "transitive inanimate verb",
+	vta = "transitive animate verb",
 }
@@ Line 692: / Line 733: @@
 	-- We include verbs because impf and pf are valid "genders".
 	["verbs"] = "verbs",
+}
+-- Lower limit for a "long" word in a particular language.
+-- Used to categorize terms into e.g. [[:Category:Long English words]] automatically.
+-- Languages with no mapping here do not get categorized.
+data.long_word_thresholds = {
+    ["af"] = 20,
+    ["bg"] = 20,
+    ["cy"] = 25,
+    ["de"] = 20,
+    ["en"] = 25,
+    ["es"] = 20,
+    ["fr"] = 20,
+    ["ka"] = 20,
+    ["sv"] = 20,
+    ["tl"] = 25,
 }