Module:number list

From Linguifex
Revision as of 22:00, 29 June 2023 by Sware (talk | contribs)
Jump to navigation Jump to search


local export = {}

local m_links = require("Module:links")

--[=[

Terminology:

Number = a bare number; a mathematical entity which has different form types (e.g. cardinal, ordinal)
Form type = a category of the forms that represent a number; examples are cardinal, ordinal, distributive, fractional
Form = a word or expression that represents a number in a given language
Tag = an identifier attached to a form that allows different logical subtypes of forms from the same form type to be
      identified; e.g. 'vuitanta-vuit<tag:Central>' vs. 'huitanta-huit<tag:Valencian>' to identify variants of
	  Catalan cardinal number 88 for different dialectal standards; there can be multiple tags per form, e.g.
	  'tair ar ddeg<tag:vigesimal><tag:feminine>' for the Welsh number 13 where there are both decimal/vigesimal and
	  masculine/feminine variants of this number
Tag list = a list of tags in the order they are specified in the data, e.g. {"vigesimal", "feminine"} for the example
           above
Combined tag = the string representation of a tag list, using ||| to separate individual tags
]=]

local form_types = {
	{key = "cardinal", display = "[[cardinal number|Cardinal]]"},
	{key = "ordinal", display = "[[ordinal number|Ordinal]]"},
	{key = "ordinal_abbr", display = "[[ordinal number|Ordinal]] [[abbreviation]]"},
	{key = "adverbial", display = "[[adverbial number|Adverbial]]"},
	{key = "multiplier", display = "[[multiplier|Multiplier]]"},
	{key = "distributive", display = "[[distributive number|Distributive]]"},
	{key = "collective", display = "[[collective number|Collective]]"},
	{key = "fractional", display = "[[fractional|Fractional]]"},
}

-- Keys in a `numbers` entry that aren't form types.
local non_form_types = {
	numeral = true,
	wplink = true,
	next = true,
	prev = true,
	next_outer = true,
	prev_outer = true,
	upper = true,
	lower = true,
}

--[=[
--
-- General set intersection
local function set_intersection(sets)
	local intersection = {}
	for key, _ in pairs(sets[1]) do
		intersection[key] = true
	end
	for i = 2, #sets do
		local this_set = sets[i]
		for key, _ in pairs(intersection) do
			if not this_set[key] then
				-- See https://stackoverflow.com/questions/6167555/how-can-i-safely-iterate-a-lua-table-while-keys-are-being-removed
				-- It is safe to modify or remove a key while iterating over the table.
				intersection[key] = nil
			end
		end
	end
	return intersection
end
]=]

local function set_intersection(set1, set2)
	local intersection = {}
	for key, _ in pairs(set1) do
		intersection[key] = true
	end
	for key, _ in pairs(intersection) do
		if not set2[key] then
			-- See https://stackoverflow.com/questions/6167555/how-can-i-safely-iterate-a-lua-table-while-keys-are-being-removed
			-- It is safe to modify or remove a key while iterating over the table.
			intersection[key] = nil
		end
	end
	return intersection
end

local function list_to_set(list)
	local set = {}
	for _, item in ipairs(list) do
		set[item] = true
	end
	return set
end

function export.get_data_module_name(langcode, must_exist)
	local module_name = "Module:number list/data/" .. langcode
	if must_exist and not mw.title.new(module_name).exists then
		error(("Data module [[%s]] for language code '%s' does not exist"):format(module_name, langcode))
	end
	return module_name
end

local function power_of(n)
	return "1" .. string.rep("0", n)
end

-- Format a number (either a Lua number or a string) in fixed point without any decimal point or scientific notation.
-- `tostring()` doesn't work because it converts large numbers such as 1000000000000000 to "1e+15".
function export.format_fixed(number)
	if type(number) == "string" then
		return number
	else
		return ("%.0f"):format(number)
	end
end

-- Parse a form with modifiers such as 'vuitanta-vuit<tag:Central>' or 'سیزده<tr:sizdah>'
-- or 'سیزده<tr:sizdah><tag:Iranian>' into its component parts. Return a form object, i.e. an object with fields
-- `form` for the form, and `tr`, `tag`, `q`, `qq` or `link` for the modifiers. The `tag` field is a tag list
-- (see above).
function export.parse_form_and_modifiers(form_with_modifiers)
	local retval = {}
	local form
	form = form_with_modifiers
	while true do
		local new_form, angle_bracketed = form:match("^(.-)(%b<>)$")
		if not new_form then
			break
		end
		local prefix, content = angle_bracketed:match "^<(%w+):(.+)>$"
		if not prefix then
			break
		end
		if prefix == "tag" then
			if retval.tag then
				table.insert(retval.tag, content)
			else
				retval.tag = {content}
			end
		elseif prefix == "q" or prefix == "qq" or prefix == "tr" or prefix == "link" then
			if retval[prefix] then
				error(("Duplicate modifier '%s' in data module form, already saw value '%s': %s"):format(prefix,
					retval[prefix], form_with_modifiers))
			else
				retval[prefix] = content
			end
		else
			error(("Unrecognized modifier '%s' in data module form: %s"):format(prefix, form_with_modifiers))
		end
		form = new_form
	end
	retval.form = form
	return retval
end

-- Find the `numbers` object for a given number (which should be in string representation).
function export.lookup_data(m_data, numstr)
	-- Don't try to convert very large numbers to Lua numbers because they may overflow.
	-- Powers of 10 >= 10^22 cannot be represented exactly as a Lua number.
	return m_data.numbers[numstr] or #numstr < 22 and m_data.numbers[tonumber(numstr)] or nil
end

-- Return true if a < b, where either may be a Lua number or the string representation of a number.
function export.numbers_less_than(a, b)
	a, b = export.format_fixed(a), export.format_fixed(b)
	local alen = #a
	local blen = #b
	if alen < blen then
		return true
	end
	if alen > blen then
		return false
	end
	return a < b
end

-- Return true if a > b, where either may be a Lua number or the string representation of a number.
function export.numbers_greater_than(a, b)
	return export.numbers_less_than(b, a)
end

-- Given a number form, convert it to its independent (un-affixed) form. This only makes sense for certain languages
-- where there is a difference between independent and affixed forms of numerals. Currently the only such language
-- is Swahili, where e.g. the cardinal number form for 3 is affixed [[-tatu]], independent [[tatu]], and the ordinal
-- number form is [[-a tatu]], independent [[tatu]]. We rely on a set of Lua pattern substitutions to convert from
-- affixed to independent form.
--
-- FIXME: This needs major rethinking in a way that isn't specific to Swahili.
local function maybe_unaffix(m_data, form)
	if not m_data.unaffix then
		return form
	end
	for _, entry in ipairs(m_data.unaffix) do
		local from, to = unpack(entry)
		form = mw.ustring.gsub(form, from, to)
	end
	return form
end

-- Convert the given number form (taken from the data for `lang`, after parsing the form for modifiers and stripping
-- the modifiers) to an entry name. The form may have links and/or accent/length marks that need to be stripped.
local function form_to_entry_name(form, lang)
	return (lang:makeEntryName(m_links.remove_links(form)))
end

-- Return true if the given number form object (taken from the data for `lang`, after parsing the form for modifiers)
-- matches `pagename`. If there is a <link:...> modifier, we check against it. Otherwise, we check against the form
-- itself. In this case, the form may have links and/or accent/length marks that need to be stripped, and we may need
-- to convert the form to its independent (un-affixed) form, if there is a difference between independent and affixed
-- forms (as in Swahili).
local function form_equals_pagename(formobj, pagename, m_data, lang)
	if formobj.link == pagename then
		return true
	end
	local entry_name = form_to_entry_name(formobj.form, lang)
	return entry_name == pagename or maybe_unaffix(m_data, entry_name) == pagename
end

-- Given the data for a language and a number (which should be in string representation), find the next and previous
-- numbers to display (in string representation).
local function get_next_and_prev_keys(m_data, numstr)
	local numdata = export.lookup_data(m_data, numstr)
	if not numdata then
		return nil, nil
	end
	local nextnum = numdata.next
	local prevnum = numdata.prev
	if not nextnum or not prevnum then
		-- Find the next/previous numbers by sorting all the keys and locating the number in question among them.
		local sorted_list = {}
		local index = 1
		for key, _ in pairs(m_data.numbers) do
			sorted_list[index] = key
			index = index + 1
		end

		table.sort(sorted_list, export.numbers_less_than)

		-- We could binary search to save time, but given that we already sort, which is supra-linear, it won't
		-- matter to search linearly.
		for i, key in ipairs(sorted_list) do
			if export.format_fixed(key) == numstr then
				nextnum = nextnum or sorted_list[i + 1]
				prevnum = prevnum or sorted_list[i - 1]
				break
			end
		end
	end

	if nextnum then
		nextnum = export.format_fixed(nextnum)
	end
	if prevnum then
		prevnum = export.format_fixed(prevnum)
	end

	return nextnum, prevnum
end

-- Find the "description objects" (a two-element list {NUMBER, TYPE}, where NUMBER is either a Lua number or a string,
-- depending on how it appears in the underlying data) that matches `pagename` and (if given) `matching_type`.
-- Return a list of such objects.
local function lookup_number_by_form(lang, m_data, pagename, matching_type)
	local retval = {}
	local function check_form(form, num, typ)
		local formobj = export.parse_form_and_modifiers(form)
		if form_equals_pagename(formobj, pagename, m_data, lang) and (not matching_type or typ == matching_type) then
			-- It's possible the same pagename occurs multiply for a given type and number, e.g. with different length
			-- or accent marks. The calling code is OK with multiple entries for a given number (which can also occur
			-- with different types, e.g. the ordinal and fractional forms for a given number are the same), but will
			-- throw an error if different numbers are seen.
			table.insert(retval, {num, typ})
		end
	end

	for num, numdata in pairs(m_data.numbers) do
		for numtype, forms in pairs(numdata) do
			if non_form_types[numtype] then
				-- do nothing
			elseif type(forms) == "table" then
				for _, form in ipairs(forms) do
					check_form(form, num, numtype)
				end
			else
				check_form(forms, num, numtype)
			end
		end
	end

	return retval
end

local function index_of_number_type(t, type)
	for i, subtable in ipairs(t) do
		if subtable.key == type then
			return i
		end
	end
end

-- additional_types is an array of tables like form_types,
-- but each table can contain the keys "before" or "after", which specify
-- the numeral type that the form should appear before or after.
-- The transformations are applied in order.
local function add_form_types(additional_types)
	local types = require("Module:table").deepcopy(form_types)
	for _, type in ipairs(additional_types) do
		type = require("Module:table").shallowcopy(type)
		local i
		if type.before or type.after then
			i = index_of_number_type(types, type.before or type.after)
		end
		-- For now, simply log an error message
		-- if the "before" or "after" number type was not found,
		-- and insert the number type at the end.
		if i then
			if type.before then
				table.insert(types, i - 1, type)
			else
				table.insert(types, i + 1, type)
			end
		else
			table.insert(types, type)
			if type.before or type.after then
				mw.log("Number type "
					.. (type.before or type.after)
					.. " was not found.")
			end
		end
		type.before, type.after = nil, nil
	end
	return types
end

-- Return all form types for the language in question, in order.
function export.get_number_types(m_data)
	local final_form_types = form_types
	if m_data.additional_number_types then
		final_form_types = add_form_types(m_data.additional_number_types)
	end
	return final_form_types
end

-- Convert a number type object (an object with `display` and `key` fields) to its displayed form.
function export.display_number_type(number_type)
	if number_type.display then
		return number_type.display
	else
		return (number_type.key:gsub("^.", string.upper):gsub("_", " "))
	end
end

-- Group digits with a separator, such as a comma or a period. See [[w:Digit grouping]].
local function add_separator(numstr, separator, group, start)
	start = start or group
	if start >= #numstr then
		return numstr
	end

	local parts = { numstr:sub(-start) }
	for i = start + 1, #numstr, group do
		table.insert(parts, 1, numstr:sub(-(i + group - 1), -i))
	end

	return table.concat(parts, separator)
end

function export.add_thousands_separator(numstr, separator)
	if #numstr < 4 then -- < 1000
		return numstr
	end
	return add_separator(numstr, separator or ",", 3)
end

local function add_Indic_separator(numstr, separator)
	return add_separator(numstr, separator, 2, 3)
end

-- Convert a number (represented as a string) to non-Arabic form based on the specs in `numeral_config`.
-- This is used, for example, to display the Hindu, Eastern Arabic or Roman form of a number along with the standard
-- Arabic form. Most of the code below assumes that the non-Arabic numerals are decimal, and the digits map one-to-one
-- with Arabic numerals. If this is not the case (e.g. for Roman numerals), a special module function is called to do
-- the conversion.
function export.generate_non_arabic_numeral(numeral_config, numstr)
	-- `numstr` is a number represented as a string. See comment near top of show_box().
	if numeral_config.module and numeral_config.func then
		return require("Module:" .. numeral_config.module)[numeral_config.func](numstr)
	end

	local thousands_separator, Indic_separator, zero_codepoint =
		numeral_config.thousands_separator,
		numeral_config.Indic_separator,
		numeral_config.zero_codepoint

	if not zero_codepoint then
		return nil
	end

	if thousands_separator then
		numstr = export.add_thousands_separator(numstr, thousands_separator)
	elseif Indic_separator then
		numstr = add_Indic_separator(numstr, Indic_separator)
	end

	return numstr:gsub("[0-9]", function (digit)
		return mw.ustring.char(zero_codepoint + tonumber(digit))
	end)
end


-- Format a number (either a Lua number or a string) for display. Sufficiently small numbers are displayed in fixed
-- point with thousands separators. Larger numbers are displayed in both fixed point and scientific notation using
-- superscripts, and sufficiently large numbers are displayed only in scientific notation.
function export.format_number_for_display(number)
	local MAX_NUM_DIGITS_FOR_FIXED_ONLY = 6
	local MIN_NUM_DIGITS_FOR_SCIENTIFIC_ONLY = 13
	local numstr = export.format_fixed(number)
	local fixed = export.add_thousands_separator(numstr)
	if #numstr <= MAX_NUM_DIGITS_FOR_FIXED_ONLY then
		return fixed
	end
	local kstr = numstr:match("^([0-9]*[1-9])0*$")
	if not kstr then
		error("Internal error: Unable to match number '" .. numstr .. "'")
	end
	local exponent = ("10<sup>%s</sup>"):format(#numstr - 1)
	local mantissa
	if kstr == "1" then
		mantissa = ""
	elseif #kstr == 1 then
		mantissa = kstr .. " x "
	else
		mantissa = kstr:gsub("^([0-9])", "%1.") .. " x "
	end
	local scientific = mantissa .. exponent
	if #numstr >= MIN_NUM_DIGITS_FOR_SCIENTIFIC_ONLY then
		return scientific
	else
		return fixed .. " (" .. scientific .. ")"
	end
end

-- Map a list of tags to a single string that is equivalent. We need to do this because we can't easily put lists in the
-- keys of tables.
local function tag_list_to_combined_tag(tag_list)
	return table.concat(tag_list, "|||")
end

-- Given a list of forms with attached inline modifiers (e.g. 'huitanta-huit<tag:Valencian>' or
-- 'tair ar ddeg<tag:vigesimal><tag:feminine>'), parse the forms into form objects (the return value of
-- parse_form_and_modifiers()) and group by the tag. Three values are returned:
-- `seen_forms`, `forms_by_tag`, `seen_tags` where:
-- (1) `seen_forms` is the list of parsed form objects;
-- (2) `forms_by_tag` is a table grouping the form objects by combined tag, where the key is the tag and the value is
--      a list of the form objects seen with that tag (forms without tag are grouped under the empty-string tag);
-- (3) `seen_tags` is a list of the combined tags encountered, in the order they were encountered;
-- (4) `combined_tags_to_tag_lists` is a map from combined tags to the corresponding tag lists.
function export.group_numeral_forms_by_tag(forms)
	local seen_forms = {}
	local forms_by_tag = {}
	local seen_tags = {}
	local combined_tags_to_tag_lists = {}

	for _, form in ipairs(forms) do
		local formobj = export.parse_form_and_modifiers(form)
		table.insert(seen_forms, formobj)
		local combined_tag = formobj.tag and tag_list_to_combined_tag(formobj.tag) or ""
		if not forms_by_tag[combined_tag] then
			table.insert(seen_tags, combined_tag)
			forms_by_tag[combined_tag] = {}
			combined_tags_to_tag_lists[combined_tag] = formobj.tag or {}
		end
		table.insert(forms_by_tag[combined_tag], formobj)
	end

	return seen_forms, forms_by_tag, seen_tags, combined_tags_to_tag_lists
end

-- Given a form object (as returned by parse_form_and_modifiers()), format as appropriate for the current language.
function export.format_formobj(formobj, m_data, lang)
	local left_q = formobj.q and require("Module:qualifier").format_qualifier(formobj.q) .. " " or ""
	local right_q = formobj.qq and " " .. require("Module:qualifier").format_qualifier(formobj.qq) or ""
	return left_q .. m_links.full_link({
		lang = lang, term = maybe_unaffix(m_data, formobj.form), alt = formobj.form, tr = formobj.tr,
	}) .. right_q
end

-- Implementation of {{number box}}.
function export.show_box(frame)
	local full_link = m_links.full_link

	local params = {
		[1] = {required = true},
		[2] = {},
		["pagename"] = {},
		["type"] = {},
	}

	local parent_args = frame:getParent().args
	local args = require("Module:parameters").process(parent_args, params, nil, "number list", "show_box")

	local langcode = args[1] or "und"
	local lang = require("Module:languages").getByCode(langcode, "1")

	-- Get the data from the data module. Some modules (e.g. currently [[Module:number list/data/ka]]) have to be
	-- loaded with require() because the exported numbers table has a metatable.
	local module_name = export.get_data_module_name(langcode, "must exist")
	local m_data = require(module_name)

	local pagename = args.pagename or (mw.title.getCurrentTitle().nsText == "Reconstruction" and "*" or "") .. mw.title.getCurrentTitle().subpageText
	-- Resolve any risky characters which makeEntryName will escape, so that any matches involving them work correctly.
	pagename = (lang:makeEntryName(pagename))
	local cur_type = args.type

	-- We represent all numbers as strings in this function to deal with the limited precision inherent in Lua numbers.
	-- These large numbers do occur, such as 100 trillion ([[རབ་བཀྲམ་ཆེན་པོ]]), 1 sextillion, etc. Lua represents all
	-- numbers as 64-bit floats, meaning that some numbers above 2^53 cannot be represented exactly. The first power of
	-- 10 that cannot be represented exactly is 10^22 (ten sextillion in short scale, ten thousand trillion in long
	-- scale), but the first power of ten whose neighboring numbers cannot be represented exactly is 10^16 (ten
	-- quadrillion or ten thousand billion). Ideally we would use a big integer library of some kind, but unfortunately
	-- Wiktionary does not seem to have any such library installed. MediaWiki docs make mention of bcmath, but
	-- mw.bcmath.new() throws an error.
	--
	-- In module data, we allow numbers to be indexed as Lua numbers or as strings. See lookup_data() above.
	local cur_num = args[2] or langcode == "und" and mw.title.getCurrentTitle().nsText == "Template" and "2" or nil

	-- If a current number wasn't specified, find it by looking through the data for the current language and matching
	-- forms against the pagename.
	if not cur_num then
		local nums_and_types = lookup_number_by_form(lang, m_data, pagename, cur_type)
		if #nums_and_types == 0 then
			error("The current page name '" .. pagename .. "' does not match the spelling of any known number in [[" ..
				module_name .. "]]. Check the data module or the spelling of the page.")
		end
		for _, num_and_type in ipairs(nums_and_types) do
			local num, typ = unpack(num_and_type)
			num = export.format_fixed(num)
			if cur_num and num ~= cur_num then
				local errparts = {}
				for _, num_and_type in ipairs(nums_and_types) do
					local num, typ = unpack(num_and_type)
					table.insert(errparts, ("%s (%s)"):format(num, typ))
				end
				error("The current page name '" .. pagename .. "' matches the spelling of multiple numbers in [[" ..
					module_name .. "]]: " .. table.concat(errparts, ",") .. ". Please specify the number explicitly.")
			else
				cur_num = num
			end
		end
	end

	cur_num = cur_num:gsub(",", "") -- remove thousands separators
	if not cur_num:find("^%d+$") then
		error("Extraneous characters in parameter 2: should be decimal number (integer): '" .. cur_num .. "'")
	end

	-- Wrapper around `export.lookup_data` that may throw an error if the number can't be found (specifically if
	-- param_for_error is given).
	local function lookup_data(numstr, param_for_error)
		local retval = export.lookup_data(m_data, numstr)
		if not retval and param_for_error then
			error(('The %s number "%s" specified in the "numbers" table entry for "%s" cannot be found in '
				.. "[[%s]]; please fix the module."):format(param_for_error, numstr, cur_num, module_name))
		end
		return retval
	end

	local cur_data = lookup_data(cur_num)
	if not cur_data then
		error('The number "' .. cur_num .. '" is not found in the "numbers" table in [[' .. module_name .. "]].")
	end

	local formatted_forms = {}

	if cur_type and not cur_data[cur_type] then
		error("The numeral type " .. cur_type .. " for " .. cur_num .. " is not found in [[" .. module_name .. "]].")
	end

	-- See above for the definition of "combined tag" and "tag list". The combined tag is just the concatenation of the
	-- tag list with ||| between the tags.
	local cur_tag_list, cur_combined_tag

	local form_types = export.get_number_types(m_data)

	-- LONG COMMENT EXPLAINING TAG HANDLING:
	--
	-- For each form type (see `form_types` at top of file), group the entries for that form type by tag and figure out
	-- what the current form type and tag is, i.e. the form type and tag for the form matching the pagename. Tags are
	-- e.g. as in 'vuitanta-vuit<tag:Central>' or 'huitanta-huit<tag:Valencian>' for Catalan and allow different
	-- logical sets of numbers for the same form type to be identified. There can potentially be multiple tags per
	-- form, e.g. 'tair ar ddeg<tag:vigesimal><tag:feminine>' for the Welsh number 13 where there are both decimal/
	-- vigesimal and masculine/feminine variants of this number.
	--
	-- We need to do two passes over all form types. In the first pass, for each form type we parse all the forms,
	-- group them by tag, and store the results in a per-form-type table. In the second pass, we then format all forms
	-- for all form types. The reason for doing two passes is because we need to know the current tag in order to
	-- display a form type correctly (because we display the forms for the current tag before the forms for any other
	-- tags), but we won't know the current tag until we have done a pass over all form types and forms of those form
	-- types in order to determine which one matches the pagename.
	--
	-- We use the current tag in two ways:
	-- 1. When displaying all the forms for a given number, we group both by form type and tag, and display the forms
	--    for a given form type/tag combination on a single line. For a given form type, we display the forms for each
	--    tag in the order the tags were specified in the data, except that the forms for the current tag are placed
	--    before all others (so e.g. for Catalan, if the current tag is "Valencian", we list the Valencian form(s)
	--    first even if the Central form(s) are listed first in the data file).
	-- 2. When displaying links to adjacent numbers in display_adjacent_number_links(), if there aren't form(s) for the
	--    current type, we don't display any links; but if there are mutiple tagged forms for the current type, we only
	--    display links for the forms for the current tag if there are any such forms, otherwise we display links for
	--    all forms of all tags.
	--
	-- In the presence of multiple tags, things get a bit more complicated:
	-- 1. When displaying links to adjacent numbers, say the current tag is vigesimal+feminine, we want to prefer an
	--    adjacent-number form that's both vigesimal and feminine, but otherwise we prefer one that's vigesimal or
	--    feminine over one that's neither. Say the current tag is just vigesimal; we of course prefer an
	--    adjacent-number form that's just vigesimal, but otherwise we prefer a tag that's vigesimal + either masculine
	--    or feminine to a tag that's not vigesimal. So it seems we want the form(s) that have the maximum intersection
	--    of tags, and if there are two different tag lists with the same number of intersecting tags (e.g. the current
	--    tag is vigesimal+feminine and we have a choice of decimal+feminine or just vigesimal), we should prefer the
	--    form that has fewer non-matching tags, hence we prefer the just-vigesimal form.
	-- 2. By the same logic, when displaying all the forms for a given number, we should order by the size of the
	--    intersection of the tag list in question with the current tag list, then inversely by the size of the tag list
	--    (so we prefer tag lists with fewer non-matching tags), then by the order of the tag lists in the data file.

	local forms_by_tag_per_form_type = {}
	local seen_tags_per_form_type = {}
	local combined_tags_to_tag_lists_per_form_type = {}

	for _, form_type in ipairs(form_types) do
		local numeral = cur_data[form_type.key]
		if numeral then
			local numerals
			if type(numeral) == "string" then
				numerals = {numeral}
			elseif type(numeral) == "table" then
				numerals = numeral
			end

			local seen_forms, forms_by_tag, seen_tags, combined_tags_to_tag_lists = export.group_numeral_forms_by_tag(numerals)
			forms_by_tag_per_form_type[form_type] = forms_by_tag
			seen_tags_per_form_type[form_type] = seen_tags
			combined_tags_to_tag_lists_per_form_type[form_type] = combined_tags_to_tag_lists
			for _, formobj in ipairs(seen_forms) do
				if not cur_tag_list and form_equals_pagename(formobj, pagename, m_data, lang) then
					cur_tag_list = formobj.tag or {}
					cur_combined_tag = tag_list_to_combined_tag(cur_tag_list)
					cur_type = cur_type or form_type.key
				end
			end
		end
	end

	-- Error if we couldn't locate the pagename among the forms for the current number. This only happens if the
	-- number if given explicitly in 2=.

	if not cur_type and mw.title.getCurrentTitle().nsText ~= "Template" then
		error("The current page name '" .. pagename .. "' does not match any of the numbers listed in [[" ..
			module_name .. "]] for " .. cur_num .. ". Check the data module or the spelling of the page.")
	end

	-- Now, format all the forms for all form types for the current number.

	local function sort_combined_tags(combined_tags, seen_tags, combined_tags_to_tag_lists)
		-- cur_tag_list should normally never be nil, but can be so in template space
		local cur_tag_set = list_to_set(cur_tag_list or {})
		local tags_to_order = {}
		for i, tag in ipairs(seen_tags) do
			tags_to_order[tag] = i
		end
		local function compare_tags(tag1, tag2)
			-- See long comment above.
			-- First compare by number of tags in common with the current tag list.
			local tag_list1 = combined_tags_to_tag_lists[tag1]
			local tag_list2 = combined_tags_to_tag_lists[tag2]
			local common1 = set_intersection(cur_tag_set, list_to_set(tag_list1))
			local common2 = set_intersection(cur_tag_set, list_to_set(tag_list2))
			if #common1 ~= #common2 then
				return #common1 < #common2
			end
			-- Then compare inversely by number of tags not in common with the current tag list (which is equivalent to
			-- comparing by total number of tags, since tags should be distinct).
			if #tag_list1 ~= #tag_list2 then
				return #tag_list1 > #tag_list2
			end
			-- Finally, compare by the original ordering in the number data, but if a tag is the same as the current
			-- tag, put it first, and if somehow we encounter a tag that's not in the original ordering, put it last.
			local index1 = tag1 == cur_combined_tag and 0 or tags_to_order[tag1] or #seen_tags + 1
			local index2 = tag2 == cur_combined_tag and 0 or tags_to_order[tag2] or #seen_tags + 1
			return index1 < index2
		end
		table.sort(combined_tags, compare_tags)
	end

	for _, form_type in ipairs(form_types) do
		local forms_by_tag = forms_by_tag_per_form_type[form_type]
		local seen_tags = seen_tags_per_form_type[form_type]
		local combined_tags_to_tag_lists = combined_tags_to_tag_lists_per_form_type[form_type]
		if forms_by_tag then
			local function insert_forms_by_tag(tag)
				local formatted_tag_forms = {}

				local pagename_among_forms = false
				for _, formobj in ipairs(forms_by_tag[tag]) do
					table.insert(formatted_tag_forms, export.format_formobj(formobj, m_data, lang))
					if form_equals_pagename(formobj, pagename, m_data, lang) then
						pagename_among_forms = true
					end
				end

				if tag ~= "" then
					local tag_list = combined_tags_to_tag_lists[tag]
					tag = table.concat(tag_list, " / ")
				end
				local displayed_number_type = export.display_number_type(form_type) .. (tag == "" and "" or (" (%s)"):format(tag))
				if pagename_among_forms then
					displayed_number_type = "'''" .. displayed_number_type .. "'''"
				end

				table.insert(formatted_forms, " &nbsp;&nbsp;&nbsp; ''" .. displayed_number_type .. "'': " ..
					table.concat(formatted_tag_forms, ", "))
			end

			sort_combined_tags(seen_tags, seen_tags, combined_tags_to_tag_lists)
			for _, tag in ipairs(seen_tags) do
				insert_forms_by_tag(tag)
			end
		end
	end

	-- Current number in header
	local cur_display = export.format_number_for_display(cur_num)

	local numeral
	if m_data.numeral_config then
		numeral = export.generate_non_arabic_numeral(m_data.numeral_config, cur_num)
	elseif cur_data["numeral"] then
		numeral = export.format_fixed(cur_data["numeral"])
	end

	if numeral then
		cur_display = full_link({lang = lang, alt = numeral, tr = "-"}) .. "<br/><span style=\"font-size: smaller;\">" .. cur_display .. "</span>"
	end

	--------------------- Determine next/prev, next/prev outer, and upper/lower numbers. ----------------------

	-- We have three series of numbers to determine:
	--
	-- 1. The next/previous numbers, which are always those in the sorted series of available numbers unless overridden
	--    by `next`/`prev` specs in an individual number.
	-- 2. The next/previous outer numbers, which are displayed to the outside of the next/previous numbers. These can
	--    be overridden for an individual number using `next_outer`/`prev_outer`. Otherwise, we try according to an
	--    algorithm described below in the code for computing the outer numbers.
	-- 3. The upper/lower numbers, which are displayed above or below the central number box. These can be overridden
	--    for an individual number using `upper`/`lower`. These are always 10x greater or less than the number in
	--    question, number not considering a number if it's the same as the next/previous number.

	local next_num, prev_num = get_next_and_prev_keys(m_data, cur_num)
	local next_data = next_num and lookup_data(next_num, "next")
	local prev_data = prev_num and lookup_data(prev_num, "previous")

	--------- Decompose number into mantissa (k) and exponent (m). ----------

	local k, m
	if cur_num == "0" then
		k = 0
		m = 1
	else
		local kstr, mstr = cur_num:match("^([0-9]*[1-9])(0*)$")
		if not kstr then
			error("Internal error: Unable to match number '" .. cur_num .. "'")
		elseif #kstr > 15 then
			-- This is because some numbers with 16 or more digits can't be represented exactly.
			error("Can't handle number with more than 15 digits before the trailing zeros: '" .. cur_num .. "'")
		end
		k = tonumber(kstr)
		m = #mstr
	end

	-- Find the next greater power of 10 for cur_num, up to 10^6. `try` should look up the data for a power of 10
	-- and return it if it's available and the number passes any checks, otherwise nil.
	local function make_greater_power_of_ten(power)
		return cur_num .. string.rep("0", power)
	end

	-- Find the next lesser power of 10 for cur_num, up to 10^6. `try` should look up the data for a power of 10
	-- and return it if it's available and the number passes any checks, otherwise nil.
	local function make_lesser_power_of_ten(power)
		local desired_zeros = m - power
		if desired_zeros < 0 then
			return nil
		end
		return k .. string.rep("0", desired_zeros)
	end


	local next_outer_data, prev_outer_data
	local next_outer_num, prev_outer_num = cur_data.next_outer, cur_data.prev_outer

	-- When trying to find then next/previous outer numbers, first, if the base-10 mantissa is not 1 or 0, we add 1 to
	-- or subtract 1 from the mantissa, keeping the same number of zeros. Hence, for 300, we try 400 for the next outer,
	-- 200 for the previous outer. For 900, we try 1000 for the next outer and 800 for the previous outer. If the
	-- mantissa is 1, the next outer is computed the same but for the previous outer we use 9 followed by one fewer
	-- zero. Hence, for 100 we try 200 for the next outer but 90 for the previous outer. If the mantissa is 0 (i.e. the
	-- entire number is 0), we try 10 for the next outer, and have no previous outer.
	--
	-- Next, if the number is an even power of 10, we try 10x, 1000x greater, 100x greater and 1,000,000x greater, in
	-- that sequence. Essentially, first we try the next power of 10; then we try the next short-scale number (billion,
	-- trillion, etc. where large numbers follow a 10^3 sequence); then we try the next long-scale number (where large
	-- numbers follow a 10^6 sequence); then we try the next Indic-scale number (where large numbers follow a 10^2
	-- sequence: lakh, crore, arab, ...). We don't just try powers of 10 in order because then if e.g. we have entries
	-- for one million, ten million, one hundred million and one billion, and the current number is one million, the
	-- next number will be ten million and the next outer number one hundred million, when it would be cleaner to have
	-- one billion as the outer number (and in many cases, there is no Wiktionary entry for one hundred million).
	--
	-- For the previous outer number, we do an analogous algorithm but make sure we don't try numbers less than 1.
	local power_of_10_sequence = { 1, 3, 2, 6 }

	--------- Determine next outer number. ----------
	if next_outer_num then
		next_outer_data = lookup_data(next_outer_num, "next outer")
	else
		local function try(num)
			local data = (not next_num or export.numbers_greater_than(num, next_num)) and lookup_data(num) or nil
			if data then
				next_outer_num = num
				next_outer_data = data
			end
			return data
		end
		if not try((k + 1) .. string.rep("0", m)) and k == 1 then
			-- Try looking up a greater power of ten instead.
			for _, power_of_10 in ipairs(power_of_10_sequence) do
				if try(make_greater_power_of_ten(power_of_10)) then
					break
				end
			end
		end
	end

	--------- Determine previous outer number. ----------
	if prev_outer_num then
		prev_outer_data = lookup_data(prev_outer_num, "previous outer")
	else
		local function try(num)
			local data = (not prev_num or export.numbers_less_than(num, prev_num)) and lookup_data(num) or nil
			if data then
				prev_outer_num = num
				prev_outer_data = data
			end
			return data
		end
		if k == 0 or m == 0 then
			-- less than 10; no previous outer num
		else
			local num_to_try
			if k == 1 then
				num_to_try = "9" .. string.rep("0", m - 1)
			else
				num_to_try = (k - 1) .. string.rep("0", m)
			end
			if not try(num_to_try) and k == 1 then
				-- Try looking up a smaller power of ten instead.
				for _, power_of_10 in ipairs(power_of_10_sequence) do
					local num_to_try = make_lesser_power_of_ten(power_of_10)
					if num_to_try and try(num_to_try) then
						break
					end
				end
			end
		end
	end

	local upper_data, lower_data
	local upper_num, lower_num = cur_data.upper, cur_data.lower

	--------- Determine upper number. ----------
	if upper_num then
		upper_data = lookup_data(upper_num, "upper")
	else
		-- Try looking up the next power of ten.
		upper_num = make_greater_power_of_ten(1)
		if upper_num == next_num then
			upper_num = nil
		else
			upper_data = lookup_data(upper_num)
		end
	end

	--------- Determine lower number. ----------
	if lower_num then
		lower_data = lookup_data(lower_num, "lower")
	elseif k == 0 or m == 0 then
		-- less than 10; no lower num
	else
		-- Try looking up the previous power or 10.
		lower_num = make_lesser_power_of_ten(1)
		if lower_num == prev_num then
			lower_num = nil
		else
			lower_data = lookup_data(lower_num)
		end
	end

	-- For a number `num` (an "adjacent" number to the current number, i.e. either next, previous, next/previous outer,
	-- or upper/lower) with corresponding entry data `num_data`, display link(s) to the form(s) for this number that
	-- are associated with the current type and tag. If there is a single form to be linked to, the form is linked
	-- using the number itself as the display text; otherwise, the multiple forms are linked with superscripted [a],
	-- [b], etc. and the number it displayed adjacent to the links. In either case, beside the number there may be an
	-- arrow. If `arrow` == "rarrow", the format is like this:
	--		if multiple entries:
	--			<numeral> → <sup>[a], [b], ...</sup>
	--		else
	--			<numeral> →
	-- If `arrow` == "larrow", the format is like this:
	--		if multiple entries:
	--			<sup>[a], [b], ...</sup> ← <numeral>
	--		else
	--			← <numeral>
	-- Otherwise, the format is like this:
	--		if multiple entries:
	--			<numeral><sup>[a], [b], ...</sup>
	--		else
	--			<numeral>
	--
	-- Returns nil if `num_data` is nil or there is no entry in `num_data` for the current number type.
	--
	-- For the handling of tags in this function, see the "LONG COMMENT EXPLAINING TAG HANDLING" above.
	local function display_adjacent_number_links(num, num_data, arrow)
		if not num_data then
			return nil
		end
		local num_type_data = num_data[cur_type]
		if not num_type_data then
			return nil
		end
		local forms = num_type_data
		if type(forms) ~= "table" then
			forms = {forms}
		end

		local seen_forms, forms_by_tag = export.group_numeral_forms_by_tag(forms)

		local forms_to_display
		if cur_tag and forms_by_tag[cur_tag] then
			forms_to_display = forms_by_tag[cur_tag]
		else
			forms_to_display = seen_forms
		end

		for i, form_to_display in ipairs(forms_to_display) do
			forms_to_display[i] = form_to_display.link or maybe_unaffix(m_data,
				form_to_entry_name(form_to_display.form, lang))
		end

		local seen_pagenames = {}
		local pagenames_to_display = {}
		for _, form in ipairs(forms_to_display) do
			if not seen_pagenames[form] then
				table.insert(pagenames_to_display, form)
				seen_pagenames[form] = true
			end
		end

		num = export.format_number_for_display(num)
		local num_arrow =
			arrow == "rarrow" and num .. "&nbsp;&nbsp;→&nbsp;" or
			arrow == "larrow" and "&nbsp;←&nbsp;&nbsp;" .. num or
			num
		if #pagenames_to_display > 1 then
			local a = ("a"):byte()
			local links = {}
			for i, term in ipairs(pagenames_to_display) do
				links[i] = m_links.language_link{lang = lang, term = term, alt = "[" .. string.char(a + i - 1) .. "]"}
			end
			links = "<sup>" .. table.concat(links, ", ") .. "</sup>"
			return arrow == "larrow" and links .. num_arrow or num_arrow .. links
		else
			return m_links.language_link {
				lang = lang,
				term = pagenames_to_display[1],
				alt = num_arrow,
			}
		end
	end

	-- Display links to previous/next numbers
	local prev_display = display_adjacent_number_links(prev_num, prev_data, "larrow") or ""
	local next_display = display_adjacent_number_links(next_num, next_data, "rarrow") or ""

	-- Display links to previous/next outer numbers
	local prev_outer_display = display_adjacent_number_links(prev_outer_num, prev_outer_data, "larrow")
	local next_outer_display = display_adjacent_number_links(next_outer_num, next_outer_data, "rarrow")

	-- Display links to upper/lower numbers
	local upper_display = display_adjacent_number_links(upper_num, upper_data)
	local lower_display = display_adjacent_number_links(lower_num, lower_data)

	local canonical_name = lang:getCanonicalName()

	local function format_cell(contents, font_size, background, colspan, bold)
		font_size = font_size and (" font-size:%s;"):format(font_size) or ""
		background = background and (" background:%s;"):format(background) or ""
		colspan = colspan and ('colspan="%s" '):format(colspan) or ""
		bold = bold and "!" or "|"
		return ('%s %sstyle="min-width: 6em;%s%s | %s\n'):format(bold, colspan, font_size, background, contents)
	end

	local has_outer_display = not not (prev_outer_display or next_outer_display)
	local function format_upper_lower_display_row(display)
		local blank_cell
		if has_outer_display then
			blank_cell = '| colspan="2" |\n'
		else
			blank_cell = "|\n"
		end
		local parts = {'|- style="text-align: center; background:#dddddd;"\n'}
		table.insert(parts, blank_cell)
		table.insert(parts, format_cell(display, "smaller"))
		table.insert(parts, blank_cell)
		return table.concat(parts)
	end

	upper_display = upper_display and format_upper_lower_display_row(upper_display) or ""
	lower_display = lower_display and format_upper_lower_display_row(lower_display) or ""

	local function format_display_cell(display)
		return format_cell(display, "smaller", "#dddddd")
	end

	prev_display = format_display_cell(prev_display)
	next_display = format_display_cell(next_display)
	prev_outer_display = has_outer_display and format_display_cell(prev_outer_display or "") or ""
	next_outer_display = has_outer_display and format_display_cell(next_outer_display or "") or ""
	cur_display = format_cell(cur_display, "larger", nil, nil, "bold")

	local forms_display = ('| colspan="%s" style="text-align: center;" | %s\n'):format(
		has_outer_display and 5 or 3, table.concat(formatted_forms, "<br/>"))

	local footer_display
	if cur_data.wplink then
		local footer =
			"[[w:" .. lang:getCode() .. ":Main Page|" .. lang:getCanonicalName() .. " Wikipedia]] article on " ..
			m_links.full_link({lang = lang, term = "w:" .. lang:getCode() .. ":" .. cur_data.wplink,
			alt = export.format_number_for_display(cur_num)})
		footer_display = '|- style="text-align: center;"\n' .. format_cell(footer, nil, "#dddddd", has_outer_display and 5 or 3)
	else
		footer_display = ""
	end

	local edit_link = ' <sup>(<span class="plainlinks">[' ..
		tostring(mw.uri.fullUrl(module_name, { action = "edit" })) ..
		" edit]</span>)</sup>"

	return [=[{| class="floatright" cellpadding="5" cellspacing="0" style="background: #ffffff; border: 1px #aaa solid; border-collapse: collapse; margin-top: .5em;" rules="all"
|+ ''']=] .. title .. edit_link .. "'''\n" ..
	upper_display .. '|- style="text-align: center;"\n' ..
	prev_outer_display .. prev_display .. cur_display .. next_display .. next_outer_display .. "|-\n" ..
	lower_display .. "|-\n" ..
	forms_display .. footer_display .. "|}"
end


-- Assumes string or nil (or false), the types that can be found in an args table.
local function if_not_empty(val)
	if val and mw.text.trim(val) == "" then
		return nil
	else
		return val
	end
end


function export.show_box_manual(frame)
	local m_links = require("Module:links")
	local num_type = frame.args["type"]

	local args = {}
	--cloning parent's args while also assigning nil to empty strings
	for pname, param in pairs(frame:getParent().args) do
		args[pname] = if_not_empty(param)
	end

	local lang = args[1] or (mw.title.getCurrentTitle().nsText == "Template" and "und") or error("Language code has not been specified. Please pass parameter 1 to the template.")
	local sc = args["sc"];
	local headlink = args["headlink"]
	local wplink = args["wplink"]
	local alt = args["alt"]
	local tr = args["tr"]

	local prev_symbol = if_not_empty(args[2])
	local cur_symbol = if_not_empty(args[3]);
	local next_symbol = if_not_empty(args[4])

	local prev_term = if_not_empty(args[5])
	local next_term = if_not_empty(args[6])

	local cardinal_term = args["card"]; local cardinal_alt = args["cardalt"]; local cardinal_tr = args["cardtr"]

	local ordinal_term = args["ord"]; local ordinal_alt = args["ordalt"]; local ordinal_tr = args["ordtr"]

	local adverbial_term = args["adv"]; local adverbial_alt = args["advalt"]; local adverbial_tr = args["advtr"]

	local multiplier_term = args["mult"]; local multiplier_alt = args["multalt"]; local multiplier_tr = args["multtr"]

	local distributive_term = args["dis"]; local distributive_alt = args["disalt"]; local distributive_tr = args["distr"]

	local collective_term = args["coll"]; local collective_alt = args["collalt"]; local collective_tr = args["colltr"]

	local fractional_term = args["frac"]; local fractional_alt = args["fracalt"]; local fractional_tr = args["fractr"]

	local optional1_title = args["opt"]
	local optional1_term = args["optx"]; local optional1_alt = args["optxalt"]; local optional1_tr = args["optxtr"]

	local optional2_title = args["opt2"]
	local optional2_term = args["opt2x"]; local optional2_alt = args["opt2xalt"]; local optional2_tr = args["opt2xtr"]


	lang = require("Module:languages").getByCode(lang) or error("The language code \"" .. lang .. "\" is not valid.")
	sc = (sc and (require("Module:scripts").getByCode(sc) or error("The script code \"" .. sc .. "\" is not valid.")) or nil)

	local subpage = mw.title.getCurrentTitle().subpageText
	local is_reconstructed = lang:hasType("reconstructed") or mw.title.getCurrentTitle().nsText == "Reconstruction"
	alt = alt or (is_reconstructed and "*" or "") .. subpage

	if num_type == "cardinal" then
		cardinal_term = (is_reconstructed and "*" or "") .. subpage
		cardinal_alt = alt
		cardinal_tr = tr
	elseif num_type == "ordinal" then
		ordinal_term = (is_reconstructed and "*" or "") .. subpage
		ordinal_alt = alt
		ordinal_tr = tr
	end

	local header = lang:getCanonicalName() .. " " .. num_type .. " numbers"

	if headlink then
		header = "[[" .. headlink .. "|" .. header .. "]]"
	end

	local previous = ""

	if prev_term or prev_symbol then
		previous = m_links.full_link({lang = lang, sc = sc, term = prev_term, alt = "&nbsp;&lt;&nbsp;&nbsp;" .. prev_symbol, tr = "-"})
	end

	local current = m_links.full_link({lang = lang, sc = sc, alt = cur_symbol, tr = "-"})

	local next = ""

	if next_term or next_symbol then
		next = m_links.full_link({lang = lang, sc = sc, term = next_term, alt = next_symbol .. "&nbsp;&nbsp;&gt;&nbsp;", tr = "-"})
	end

	local forms = {}

	if cardinal_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[cardinal number|Cardinal]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = cardinal_term, alt = cardinal_alt, tr = cardinal_tr}))
	end

	if ordinal_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[ordinal number|Ordinal]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = ordinal_term, alt = ordinal_alt, tr = ordinal_tr}))
	end

	if adverbial_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[adverbial number|Adverbial]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = adverbial_term, alt = adverbial_alt, tr = adverbial_tr}))
	end

	if multiplier_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[multiplier|Multiplier]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = multiplier_term, alt = multiplier_alt, tr = multiplier_tr}))
	end

	if distributive_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[distributive number|Distributive]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = distributive_term, alt = distributive_alt, tr = distributive_tr}))
	end

	if collective_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[collective number|Collective]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = collective_term, alt = collective_alt, tr = collective_tr}))
	end

	if fractional_term then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''[[fractional|Fractional]]'' : " .. m_links.full_link({lang = lang, sc = sc, term = fractional_term, alt = fractional_alt, tr = fractional_tr}))
	end

	if optional1_title then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''" .. optional1_title .. "'' : " .. m_links.full_link({lang = lang, sc = sc, term = optional1_term, alt = optional1_alt, tr = optional1_tr}))
	end

	if optional2_title then
		table.insert(forms, " &nbsp;&nbsp;&nbsp; ''" .. optional2_title .. "'' : " .. m_links.full_link({lang = lang, sc = sc, term = optional2_term, alt = optional2_alt, tr = optional2_tr}))
	end

	local footer = ""

	if wplink then
		footer =
			"[[w:" .. lang:getCode() .. ":Main Page|" .. lang:getCanonicalName() .. " Wikipedia]] article on " ..
			m_links.full_link({lang = lang, sc = sc, term = "w:" .. lang:getCode() .. ":" .. wplink, alt = alt, tr = tr})
	end

	return [=[{| class="floatright" cellpadding="5" cellspacing="0" style="background: #ffffff; border: 1px #aaa solid; border-collapse: collapse; margin-top: .5em;" rules="all"
|+ ''']=] .. header .. [=['''
|-
| style="width: 64px; background:#dddddd; text-align: center; font-size:smaller;" | ]=] .. previous .. [=[

! style="width: 98px; text-align: center; font-size:larger;" | ]=] .. current .. [=[

| style="width: 64px; text-align: center; background:#dddddd; font-size:smaller;" | ]=] .. next .. [=[

|-
| colspan="3" style="text-align: center;" | ]=] .. table.concat(forms, "<br/>") .. [=[

|-
| colspan="3" style="text-align: center; background: #dddddd;" | ]=] .. footer .. [=[

|}]=]
end

return export