Module:gender and number: Difference between revisions

Created page
 
No edit summary
 
(27 intermediate revisions by 3 users not shown)
Line 1: Line 1:
--[=[
local export = {}
This module creates standardised displays for gender and number.
 
It converts a gender specification into Wiki/HTML format.
local debug_track_module = "Module:debug/track"
local load_module = "Module:load"
A gender specification is a list of one of the elements listed below,
local pron_qualifier_module = "Module:pron qualifier"
separated by hyphens. Examples are: "c", "n", "f-p", "m-an-p"
local parameters_module = "Module:parameters"
]=]--
local string_utilities_module = "Module:string utilities"
local table_module = "Module:table"
local utilities_module = "Module:utilities"
 
local concat = table.concat
local insert = table.insert
 
local function debug_track(...)
debug_track = require(debug_track_module)
return debug_track(...)
end
 
local function deep_copy(...)
deep_copy = require(table_module).deepCopy
return deep_copy(...)
end


local export = {}
local function format_categories(...)
format_categories = require(utilities_module).format_categories
return format_categories(...)
end


local codes = {}
local function format_pron_qualifiers(...)
format_pron_qualifiers = require(pron_qualifier_module).format_qualifiers
return format_pron_qualifiers(...)
end


-- A list of all possible "parts" that a specification can be made out of.
local function load_data(...)
load_data = require(load_module).load_data
return load_data(...)
end


codes["?"] = {type = "other", display = '<abbr title="gender incomplete">?</abbr>'}
local function process_params(...)
process_params = require(parameters_module).process
return process_params(...)
end


-- Genders
local function split(...)
codes["m"] = {type = "gender", display = '<abbr title="masculine gender">m</abbr>'}
split = require(string_utilities_module).split
codes["f"] = {type = "gender", display = '<abbr title="feminine gender">f</abbr>'}
return split(...)
codes["n"] = {type = "gender", display = '<abbr title="neuter gender">n</abbr>'}
end
codes["c"] = {type = "gender", display = '<abbr title="common gender">c</abbr>'}


-- Animacy
local gender_and_number_data
codes["an"] = {type = "animacy", display = '<abbr title="animate">anim</abbr>'}
local function get_gender_and_number_data()
codes["in"] = {type = "animacy", display = '<abbr title="inanimate">inan</abbr>'}
gender_and_number_data, get_gender_and_number_data = load_data("Module:gender and number/data"), nil
return gender_and_number_data
end


-- Personal
codes["pr"] = {type = "personal", display = '<abbr title="personal">pers</abbr>'}
codes["np"] = {type = "personal", display = '<abbr title="non-personal">npers</abbr>'}


-- Numbers
--[==[ intro:
codes["s"] = {type = "number", display = '<abbr title="singular number">sg</abbr>'}
This module creates standardised displays for gender and number. It converts a gender specification into Wiki/HTML format.
codes["d"] = {type = "number", display = '<abbr title="dual number">du</abbr>'}
codes["p"] = {type = "number", display = '<abbr title="plural number">pl</abbr>'}


-- Verb qualifiers
A gender/number specification consists of one or more gender/number elements, separated by hyphens. Examples are:
codes["impf"] = {type = "perfectivity", display = '<abbr title="imperfective aspect">impf</abbr>'}
{"n"} (neuter gender), {"f-p"} (feminine plural), {"m-an-p"} (masculine animate plural),
codes["pf"] = {type = "perfectivity", display = '<abbr title="perfective aspect">pf</abbr>'}
{"pf"} (perfective aspect). Each gender/number element has the following properties:
# A code, as used in the spec, e.g. {"f"} for feminine, {"p"} for plural.
# A type, e.g. `gender`, `number` or `animacy`. Each element in a given spec must be of a different type.
# A display form, which in turn consists of a display code and a tooltip gloss. The display code
  may not be the same as the spec code, e.g. the spec code {"an"} has display code {"anim"} and tooltip
  gloss ''animate''.
# A category into which lemmas of the right part of speech are placed if they have a gender/number
  spec containing the given element. For example, a noun with gender/number spec {"m-an-p"} is placed
  into the categories `<var>lang</var> masculine nouns`, `<var>lang</var> animate nouns` and `<var>lang</var> pluralia tantum`.
]==]


-- Version of format_list that can be invoked from a template.
--[==[
Version of format_list that can be invoked from a template.
]==]
function export.show_list(frame)
function export.show_list(frame)
local args = frame.args
local params = {
local lang = args["lang"]; if lang == "" then lang = nil end
[1] = {list = true},
local list = {}
["lang"] = {type = "language"},
local i = 1
}
local iargs = process_params(frame.args, params)
while args[i] and args[i] ~= "" do
return export.format_list(iargs[1], iargs.lang)
table.insert(list, args[i])
end
i = i + 1
 
 
--[==[
Older entry point; equivalent to format_genders() except that it formats the
categories and returns them appended to the formatted gender text rather than
returning the formatted text and categories separately.
]==]
function export.format_list(specs, lang, pos_for_cat, sort_key)
debug_track("gender and number/old-format-list")
local text, cats = export.format_genders(specs, lang, pos_for_cat)
if not cats then
return text
end
end
return text .. format_categories(cats, lang, sort_key)
return export.format_list(list, lang)
end
end


-- Format one or more gender specifications, in the form of a table of specifications.
 
function export.format_list(list, lang)
local function autoadd_abbr(display)
local is_nounclass = nil
if not display then
error("Internal error: '.display' for gender/number code is missing")
-- Iterate over each specification and format it
end
for key, spec in ipairs(list) do
if display:find("<abbr", nil, true) then
local nc
return display
list[key], nc = export.format_specification(spec, lang)
end
return ('%s'):format(display, display)
-- Ensure that the specifications are either all noun classes, or none are.
end
if is_nounclass == nil then
 
is_nounclass = nc
 
elseif is_nounclass ~= nc then
-- Add qualifiers, labels and references to a formatted gender/number spec. `spec` is the object describing the
error("Noun classes and genders cannot be mixed. Please use either one or the other.")
-- gender/number spec, which should optionally contain:
-- * left qualifiers in `q` or (for compatibility) `qualifiers`, an array of strings;
-- * right qualifiers in `qq`, an array of strings;
-- * left labels in `l`, an array of strings;
-- * right labels in `ll`, an array of strings;
-- * references in `refs`, an array either of strings (formatted reference text) or objects containing fields `text`
--  (formatted reference text) and optionally `name` and/or `group`;
-- `formatted` is the formatted version of the term itself, and `lang` is the optional language object passed into
-- format_genders().
local function add_qualifiers_and_refs(formatted, spec, lang)
local function field_non_empty(field)
local list = spec[field]
if not list then
return nil
end
if type(list) ~= "table" then
error(("Internal error: Wrong type for `spec.%s`=%s, should be \"table\""):format(
field, mw.dumpObject(list)))
end
end
return list[1]
end
end
 
if is_nounclass then
if field_non_empty("q") or field_non_empty("qq") or field_non_empty("l") or field_non_empty("ll") or
-- Add the processed codes together with slashes
field_non_empty("qualifiers") or field_non_empty("refs") then
return "<span class=\"gender\">class " .. table.concat(list, "/") .. "</span>"
formatted = format_pron_qualifiers{
else
lang = lang,
-- Add the processed codes together with commas
text = formatted,
return "<span class=\"gender\">" .. table.concat(list, " or ") .. "</span>"
q = spec.q,
qq = spec.qq,
qualifiers = spec.qualifiers,
l = spec.l,
ll = spec.ll,
refs = spec.refs,
}
end
end
return formatted
end
end


-- Format the sub-parts of a single gender specification.
 
function export.format_specification(spec, lang)
--[==[
local categories = ""
Format one or more gender/number specifications. Each spec is either a string, e.g. {"f-p"}, or a table of the form
local ret = ""
{ {spec = "SPEC", qualifiers = {"QUALIFIER", "QUALIFIER", ...}}} where `.spec` is a gender/number spec such as {"f-p"}
local is_nounclass = false
and `.qualifiers` is a list of qualifiers to display before the formatted gender/number spec. `.spec` must be present
but `.qualifiers` may be omitted.
-- If the specification starts with cX, then it is a noun class specification.
 
if spec:find("^[1-9]") or spec:find("^c[^-]") then
The function returns two values:
is_nounclass = true
# the formatted text;
code = spec:gsub("^c", "")
# a list of the categories to add.
 
if code == "?" then
If `lang` (which should be a language object) and `pos_for_cat` (which should be a plural part of speech) are given,
ret = "<abbr class=\"noun-class\" title=\"noun class missing\">?</abbr>"
gender categories such as `German masculine nouns` or `Russian imperfective verbs` are added to the categories, and
else
request categories such as `Requests for gender in <var>lang</var> entries` or
ret = "<abbr class=\"noun-class\" title=\"noun class " .. code .. "\">" .. code .. "</abbr>"
`Requests for animacy in <var>lang</var> entries` may also be added. Otherwise, if only `lang` is given, only request
end
categories may be returned. If both are omitted, the returned list is empty.
else
]==]
function export.format_genders(specs, lang, pos_for_cat)
local formatted_specs, categories, seen_types = {}
local all_is_nounclass = nil
local full_langname = lang and lang:getFullName() or nil
 
local function do_gender_spec(spec, parts)
local types = {}
local types = {}
local codes = (gender_and_number_data or get_gender_and_number_data()).codes
-- Split the parts and iterate over each part, converting it into its display form
 
local parts = mw.text.split(spec, "-")
for key, code in ipairs(parts) do
for key, code in ipairs(parts) do
-- Is this code valid?
-- Is this code valid?
if not codes[code] then
if not codes[code] then
error("The gender specification \"" .. spec .. "\" is not valid.")
error('The tag "' .. code .. '" in the gender specification "' .. spec.spec .. '" is not valid. See [[Module:gender and number]] for a list of valid tags.')
end
end
if codes[code].type ~= "other" and types[codes[code].type] then
-- Check for multiple genders/numbers/animacies in a single spec.
--require("Module:debug").track("gender and number/multiple")
local typ = codes[code].type
--require("Module:debug").track("gender and number/multiple/" .. spec)
if typ ~= "other" and types[typ] then
error("The gender specification \"" .. spec .. "\" contains multiple tags of type \"" .. codes[code].type .. "\".")
error('The gender specification "' .. spec.spec .. '" contains multiple tags of type "' .. typ .. '".')
end
end
types[typ] = true
parts[key] = codes[code].display
parts[key] = autoadd_abbr(codes[code].display)
types[codes[code].type] = true
-- Generate categories if called for.
if lang and pos_for_cat then
local cat = codes[code].cat
if cat then
if not categories then
categories = {}
end
insert(categories, full_langname .. " " .. cat)
end
if not seen_types then
seen_types = {}
elseif seen_types[typ] and seen_types[typ] ~= code then
cat = (gender_and_number_data or get_gender_and_number_data()).multicode_cats[typ]
if cat then
if not categories then
categories = {}
end
insert(categories, full_langname .. " " .. cat)
end
end
seen_types[typ] = code
end
if lang and codes[code].req then
local type_for_req = typ
if code == "?" then
-- Keep in mind `pos_for_cat` may be nil here.
type_for_req = pos_for_cat == "verbs" and "aspect" or "gender"
end
if not categories then
categories = {}
end
insert(categories, "Requests for " .. type_for_req .. " in " .. full_langname .. " entries")
end
end
end
 
-- Add the processed codes together with non-breaking spaces
-- Add the processed codes together with non-breaking spaces
ret = table.concat(parts, "&nbsp;")
if not parts[2] and parts[1] then
return parts[1]
else
return concat(parts, "&nbsp;")
end
end
 
for _, spec in ipairs(specs) do
if type(spec) ~= "table" then
spec = {spec = spec}
end
local spec_spec, is_nounclass = spec.spec
-- If the specification starts with cX, then it is a noun class specification.
if spec_spec:match("^%d") or spec_spec:match("^c[^-]") then
is_nounclass = true
local code = spec_spec:gsub("^c", "")
local text
if code == "?" then
text = '<abbr class="noun-class" title="noun class missing">?</abbr>'
if lang then
if not categories then
categories = {}
end
insert(categories, "Requests for noun class in " .. full_langname .. " entries")
end
else
text = '<abbr class="noun-class" title="noun class ' .. code .. '">' .. code .. "</abbr>"
if lang and pos_for_cat then
if not categories then
categories = {}
end
insert(categories, full_langname .. " class " .. code .. " POS")
end
end
local text_with_qual = add_qualifiers_and_refs(text, spec, lang)
insert(formatted_specs, text_with_qual)
else
-- Split the parts and iterate over each part, converting it into its display form
local parts = split(spec.spec, "-", true, true)
local combined_codes = (gender_and_number_data or get_gender_and_number_data()).combinations
 
if lang then
-- Check if the specification is valid
--elseif langinfo.genders then
-- local valid_genders = {}
-- for _, g in ipairs(langinfo.genders) do valid_genders[g] = true end
--
-- if not valid_genders[spec.spec] then
-- local valid_string = {}
-- for i, g in ipairs(langinfo.genders) do valid_string[i] = g end
-- error('The gender specification "' .. spec.spec .. '" is not valid for ' .. langinfo.names[1] .. ". Valid are: " .. concat(valid_string, ", "))
-- end
--end
end
 
local has_combined = false
for _, code in ipairs(parts) do
if combined_codes[code] then
has_combined = true
break
end
end
 
if not has_combined then
if formatted_specs[1] then
insert(formatted_specs, "or")
end
insert(formatted_specs, add_qualifiers_and_refs(do_gender_spec(spec, parts), spec, lang))
else
-- This logic is to handle combined gender specs like 'mf' and 'mfbysense'.
local all_parts = {{}}
local extra_displays
local this_formatted_specs = {}
 
for _, code in ipairs(parts) do
if combined_codes[code] then
local new_all_parts = {}
for _, one_parts in ipairs(all_parts) do
for _, one_code in ipairs(combined_codes[code].codes) do
local new_combined_parts = deep_copy(one_parts)
insert(new_combined_parts, one_code)
insert(new_all_parts, new_combined_parts)
end
end
all_parts = new_all_parts
if lang and pos_for_cat then
local extra_cat = combined_codes[code].cat
if extra_cat then
if not categories then
categories = {}
end
insert(categories, full_langname .. " " .. extra_cat)
end
end
local extra_display = combined_codes[code].display
if extra_display then
if not extra_displays then
extra_displays = {}
end
insert(extra_displays, autoadd_abbr(extra_display))
end
else
for _, one_parts in ipairs(all_parts) do
insert(one_parts, code)
end
end
end
 
for _, parts in ipairs(all_parts) do
if this_formatted_specs[1] then
insert(this_formatted_specs, "or")
end
insert(this_formatted_specs, do_gender_spec(spec, parts))
end
 
if extra_displays then
for _, display in ipairs(extra_displays) do
insert(this_formatted_specs, display)
end
end
 
insert(formatted_specs, add_qualifiers_and_refs(
concat(this_formatted_specs, " "), spec, lang))
end
 
is_nounclass = false
end
 
-- Ensure that the specifications are either all noun classes, or none are.
if all_is_nounclass == nil then
all_is_nounclass = is_nounclass
elseif all_is_nounclass ~= is_nounclass then
error("Noun classes and genders cannot be mixed. Please use either one or the other.")
end
end
end
 
-- Do some additional checks if a language was given
if categories and lang and pos_for_cat then
if lang then
for i, cat in ipairs(categories) do
-- Is this an incomplete gender?
categories[i] = cat:gsub("POS", pos_for_cat)
if spec:find("?") then
local m_utilities = require("Module:utilities")
categories = m_utilities.format_categories({"Requests for gender in " .. lang:getCanonicalName() .. " entries"}, nil)
end
end
-- Check if the specification is valid
--elseif langinfo.genders then
-- local valid_genders = {}
-- for _, g in ipairs(langinfo.genders) do valid_genders[g] = true end
--
-- if not valid_genders[spec] then
-- local valid_string = {}
-- for i, g in ipairs(langinfo.genders) do valid_string[i] = g end
-- error("The gender specification \"" .. spec .. "\" is not valid for " .. langinfo.names[1] .. ". Valid are: " .. table.concat(valid_string, ", "))
-- end
--end
end
end
 
return ret .. categories, is_nounclass
if all_is_nounclass then
-- Add the processed codes together with slashes
return '<span class="gender">class ' .. concat(formatted_specs, "/") .. "</span>", categories
else
-- Add the processed codes together with spaces
return '<span class="gender">' .. concat(formatted_specs, " ") .. "</span>", categories
end
end
end


return export
return export