Module:form of: Difference between revisions

Jump to navigation Jump to search
no edit summary
(Created page with "local m_links = require("Module:links") local m_table = require("Module:table") local m_pos = mw.loadData("Module:form of/pos") local m_functions = require("Module:form of/fun...")
 
No edit summary
Line 120: Line 120:
end
end


-- FIXME! Change callers of this to directly call [[Module:string utilities]].
function export.ucfirst(text)
return require("Module:string utilities").ucfirst(text)
end


function export.format_form_of(data)
if type(data) ~= "table" then
error("First argument must now be a table of arguments")
end


function export.format_form_of(text, terminfo, posttext)
local text_classes = data.text_classes or "form-of-definition use-with-mention"
local terminfo_classes = data.text_classes or "form-of-definition-link"
local parts = {}
local parts = {}
table.insert(parts, "<span class='form-of-definition use-with-mention'>")
table.insert(parts, "<span class='" .. text_classes .. "'>")
table.insert(parts, text)
table.insert(parts, data.text)
if text ~= "" and terminfo then
if data.text ~= "" and data.terminfo then
table.insert(parts, " ")
table.insert(parts, " ")
end
end
if terminfo then
if data.terminfo then
table.insert(parts, "<span class='form-of-definition-link'>")
table.insert(parts, "<span class='" .. terminfo_classes .. "'>")
if type(terminfo) == "string" then
if type(data.terminfo) == "string" then
table.insert(parts, terminfo)
table.insert(parts, data.terminfo)
else
else
table.insert(parts, m_links.full_link(terminfo, "term", false))
table.insert(parts, m_links.full_link(data.terminfo, data.terminfo_face, false))
end
end
table.insert(parts, "</span>")
table.insert(parts, "</span>")
end
end
if posttext then
if data.posttext then
table.insert(parts, posttext)
table.insert(parts, data.posttext)
end
end
table.insert(parts, "</span>")
table.insert(parts, "</span>")
Line 409: Line 409:




local function normalize_pos(pos)
-- Split a tag set containing two-level multipart tags into one or more tag sets not containing such tags.
-- Single-level multipart tags are left alone. (If we need to, a slight modification of the following code
-- will also split single-level multipart tags.) This assumes that multipart tags are represented as lists
-- and two-level multipart tags are represented as lists of lists, as is output by normalize_tags().
-- NOTE: We have to be careful to properly handle imbalanced two-level multipart tags such as
-- <code>def:s//p</code> (or the reverse, <code>s//def:p</code>).
function export.split_two_level_multipart_tag_set(tag_set)
-- This would be a whole lot easier in Python, with built-in support for
-- slicing and array concatenation.
for i, tag in ipairs(tag_set) do
if type(tag) == "table" then
-- We saw a multipart tag. Check if any of the parts are two-level.
local saw_two_level_tag = false
for _, first_level_tag in ipairs(tag) do
if type(first_level_tag) == "table" then
saw_two_level_tag = true
break
end
end
if saw_two_level_tag then
-- We found a two-level multipart tag.
-- (1) Extract the preceding tags.
local pre_tags = {}
for j=1,i-1 do
table.insert(pre_tags, tag_set[j])
end
-- (2) Extract the following tags.
local post_tags = {}
for j=i+1,#tag_set do
table.insert(post_tags, tag_set[j])
end
-- (3) Loop over each tag set alternant in the two-level multipart tag.
-- For each alternant, form the tag set consisting of pre_tags + alternant + post_tags,
-- and recursively split that tag set.
local resulting_tag_sets = {}
for _, first_level_tag_set in ipairs(tag) do
local expanded_tag_set = {}
for _, pre_tag in ipairs(pre_tags) do
table.insert(expanded_tag_set, pre_tag)
end
-- The second level may have a string or a list.
if type(first_level_tag_set) == "table" then
for _, second_level_tag in ipairs(first_level_tag_set) do
table.insert(expanded_tag_set, second_level_tag)
end
else
table.insert(expanded_tag_set, first_level_tag_set)
end
for _, post_tag in ipairs(post_tags) do
table.insert(expanded_tag_set, post_tag)
end
for _, split_tag_set in ipairs(export.split_two_level_multipart_tag_set(expanded_tag_set)) do
table.insert(resulting_tag_sets, split_tag_set)
end
end
return resulting_tag_sets
end
end
end
 
return {tag_set}
end
 
 
-- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
function export.split_tags_into_tag_sets(tags)
local tag_set_group = {}
local cur_tag_set = {}
for _, tag in ipairs(tags) do
if tag == ";" then
if #cur_tag_set > 0 then
table.insert(tag_set_group, cur_tag_set)
end
cur_tag_set = {}
else
table.insert(cur_tag_set, tag)
end
end
if #cur_tag_set > 0 then
table.insert(tag_set_group, cur_tag_set)
end
return tag_set_group
end
 
 
-- Given a list of tags, split into tag sets (separated by semicolons in the initial list of tags).
-- Then, potentially split each tag set into multiple tag sets if there are any two-level multipart
-- tags in those tag sets.
function export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(tags)
-- First, split into tag sets.
local tag_sets = export.split_tags_into_tag_sets(tags)
-- Now split any two-level multipart tags.
local resulting_tag_sets = {}
for _, tag_set in ipairs(tag_sets) do
for _, resulting_tag_set in ipairs(export.split_two_level_multipart_tag_set(tag_set)) do
table.insert(resulting_tag_sets, resulting_tag_set)
end
end
return resulting_tag_sets
end
 
 
function export.normalize_pos(pos)
return m_pos[pos] or pos
return m_pos[pos] or pos
end
end
Line 482: Line 584:




-- Return true if the list `tags1`, treated as a set, is a subset of the list `tags2`, also
-- treated as a set.
local function is_subset(tags1, tags2)
tags1 = m_table.listToSet(tags1)
tags2 = m_table.listToSet(tags2)
for tag, _ in pairs(tags1) do
if not tags2[tag] then
return false
end
end
return true
end
-- Compute and return the appropriate categories for the tags in `tags` (user-specified tags,
-- which may consist of multiple tag sets separated by semicolons) and the language in `lang`.
-- This checks both language-specific and language-agnostic category specs in [[Module:form of/cats]].
-- `POS` is the user-specified part of speech, if any, and `terminfo` is currently unused.
function export.fetch_lang_categories(lang, tags, terminfo, POS)
function export.fetch_lang_categories(lang, tags, terminfo, POS)
local m_cats = mw.loadData("Module:form of/cats")
local m_cats = mw.loadData("Module:form of/cats")
Line 487: Line 607:
local categories = {}
local categories = {}


local normalized_tags = export.normalize_tags(tags, "recombine multitags")
local normalized_tags = export.normalize_tags(tags)
POS = normalize_pos(POS)
local split_tag_sets = export.split_tags_into_tag_sets_and_expand_two_level_multipart_tags(normalized_tags)
POS = export.normalize_pos(POS)


local function make_function_table()
-- Loop over each tag set and compute categories for each one.
return {
for _, tag_set in ipairs(split_tag_sets) do
lang=lang,
local function make_function_table()
tags=normalized_tags,
return {
term=term,
lang=lang,
p=POS
tags=normalized_tags,
}
term=term,
end
p=POS
}
end


local function check_condition(spec)
-- Given a tag from the current tag set (which may be a list in case of a multipart tag),
if type(spec) == "boolean" then
-- and a tag from a categorization spec, check that the two match.
return spec
-- (1) If both are strings, we just check for equality.
elseif type(spec) ~= "table" then
-- (2) If the spec tag is a string and the tag set tag is a list (i.e. it originates from a
error("Wrong type of condition " .. spec .. ": " .. type(spec))
-- multipart tag), we check that the spec tag is in the list. This is because we want to treat
end
-- multipart tags in user-specified tag sets as if the user had specified multiple tag sets.
local predicate = spec[1]
-- For example, if the user said "1//3|s|pres|ind" and the categorization spec says {"has", "1"},
if predicate == "has" then
-- we want this to match, because "1//3|s|pres|ind" should be treated equivalently to two tag
return m_table.contains(normalized_tags, normalize_tag(spec[2])), 3
-- sets "1|s|pres|ind" and "3|s|pres|ind", and the former matches the categorization spec.
elseif predicate == "hasall" then
-- (3) If the spec tag is a list (i.e. it originates from a multipart tag), we check that the
for _, tag in ipairs(spec[2]) do
-- tag set tag is also a list and is a superset of the spec tag. For example, if the categorization
if not m_table.contains(normalized_tags, normalize_tag(tag)) then
-- spec says {"has", "1//3"}, then the tag set tag must be a multipart tag that has both "1" and "3"
return false, 3
-- in it. "1//3" works, as does "1//2//3".
local function tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag)
if type(spec_tag) == "table" then
if type(tag_set_tag) == "table" and is_subset(spec_tag, tag_set_tag) then
return true
end
end
end
elseif type(tag_set_tag) == "table" then
return true, 3
if m_table.contains(tag_set_tag, spec_tag) then
elseif predicate == "hasany" then
return true
for _, tag in ipairs(spec[2]) do
if m_table.contains(normalized_tags, normalize_tag(tag)) then
return true, 3
end
end
elseif tag_set_tag == spec_tag then
return true
end
end
return false, 3
return false
elseif predicate == "tags=" then
end
local normalized_spec_tags = export.normalize_tags(spec[2],
 
"recombine multitags")
-- Check that the current tag set matches the given spec tag. This means that any of the tags
return m_table.deepEqualsList(normalized_tags, normalized_spec_tags), 3
-- in the current tag set match, according to tag_set_tag_matches_spec_tag(); see above. If the
elseif predicate == "p=" then
-- current tag set contains only string tags (i.e. no multipart tags), and the spec tag is a
return POS == normalize_pos(spec[2]), 3
-- string (i.e. not a multipart tag), this boils down to list containment, but it gets more
elseif predicate == "pany" then
-- complex when multipart tags are present.
for _, specpos in ipairs(spec[2]) do
local function tag_set_matches_spec_tag(spec_tag)
if POS == normalize_pos(specpos) then
spec_tag = normalize_tag(spec_tag)
return true, 3
for _, tag_set_tag in ipairs(tag_set) do
if tag_set_tag_matches_spec_tag(tag_set_tag, spec_tag) then
return true
end
end
end
end
return false, 3
return false
elseif predicate == "pexists" then
end
return POS ~= nil, 2
 
elseif predicate == "not" then
-- Check whether the given spec matches the current tag set. Two values are returned:
local condval = check_condition(spec[2])
-- (1) whether the spec matches the tag set; (2) the index of the category to add if
return not condval, 3
-- the spec matches.
elseif predicate == "and" then
local function check_condition(spec)
local condval = check_condition(spec[2])
if type(spec) == "boolean" then
if condval then
return spec
condval = check_condition(spec[3])
elseif type(spec) ~= "table" then
error("Wrong type of condition " .. spec .. ": " .. type(spec))
end
end
return condval, 4
local predicate = spec[1]
elseif predicate == "or" then
if predicate == "has" then
local condval = check_condition(spec[2])
return tag_set_matches_spec_tag(spec[2]), 3
if not condval then
elseif predicate == "hasall" then
condval = check_condition(spec[3])
for _, tag in ipairs(spec[2]) do
end
if not tag_set_matches_spec_tag(tag) then
return condval, 4
return false, 3
elseif predication == "call" then
end
local fn = m_functions.cat_functions[spec[2]]
end
if not fn then
return true, 3
error("No condition function named '" .. spec[2] .. "'")
elseif predicate == "hasany" then
for _, tag in ipairs(spec[2]) do
if tag_set_matches_spec_tag(tag) then
return true, 3
end
end
return false, 3
elseif predicate == "tags=" then
local normalized_spec_tags = export.normalize_tags(spec[2])
-- Allow tags to be in different orders, and multipart tags to
-- be in different orders. To handle this, we first check that
-- both tag set tags and spec tags have the same length. If so,
-- we sort the multipart tags in the tag set tags and spec tags,
-- and then check that all tags in the spec tags are in the
-- tag set tags.
if #tag_set ~= #normalized_spec_tags then
return false, 3
end
local tag_set_tags = m_table.deepcopy(tag_set)
for i=1,#tag_set_tags do
if type(tag_set_tags[i]) == "table" then
table.sort(tag_set_tags[i])
end
if type(normalized_spec_tags[i]) == "table" then
table.sort(normalized_spec_tags[i])
end
end
for i=1,#tag_set_tags do
if not m_table.contains(tag_set_tags, normalized_spec_tags[i], "deepCompare") then
return false, 3
end
end
return true, 3
elseif predicate == "p=" then
return POS == export.normalize_pos(spec[2]), 3
elseif predicate == "pany" then
for _, specpos in ipairs(spec[2]) do
if POS == export.normalize_pos(specpos) then
return true, 3
end
end
return false, 3
elseif predicate == "pexists" then
return POS ~= nil, 2
elseif predicate == "not" then
local condval = check_condition(spec[2])
return not condval, 3
elseif predicate == "and" then
local condval = check_condition(spec[2])
if condval then
condval = check_condition(spec[3])
end
return condval, 4
elseif predicate == "or" then
local condval = check_condition(spec[2])
if not condval then
condval = check_condition(spec[3])
end
return condval, 4
elseif predication == "call" then
local fn = m_functions.cat_functions[spec[2]]
if not fn then
error("No condition function named '" .. spec[2] .. "'")
end
return fn(make_function_table()), 3
else
error("Unrecognized predicate: " .. predicate)
end
end
return fn(make_function_table()), 3
else
error("Unrecognized predicate: " .. predicate)
end
end
end


local function process_spec(spec)
-- Process a given spec. This checks any conditions in the spec against the
if not spec then
-- tag set, and insert any resulting categories into `categories`. Return value
return false
-- is true if the outermost condition evaluated to true and a category was inserted
elseif type(spec) == "string" then
-- (this is used in {"cond" ...} conditions, which stop when a subcondition evaluates
-- Substitute POS request with user-specified part of speech
-- to true).
-- or default
local function process_spec(spec)
spec = rsub(spec, "<<p=(.-)>>", function(default)
if not spec then
return POS or normalize_pos(default)
return false
end)
elseif type(spec) == "string" then
table.insert(categories, lang:getCanonicalName() .. " " .. spec)
-- Substitute POS request with user-specified part of speech
return true
-- or default
elseif type(spec) ~= "table" then
spec = rsub(spec, "<<p=(.-)>>", function(default)
error("Wrong type of specification " .. spec .. ": " .. type(spec))
return POS or export.normalize_pos(default)
end
end)
local predicate = spec[1]
table.insert(categories, lang:getCanonicalName() .. " " .. spec)
if predicate == "multi" then
return true
-- WARNING! #spec doesn't work for objects loaded from loadData()
elseif type(spec) ~= "table" then
for i, sp in ipairs(spec) do
error("Wrong type of specification " .. spec .. ": " .. type(spec))
if i > 1 then
process_spec(sp)
end
end
end
return true
local predicate = spec[1]
elseif predicate == "cond" then
if predicate == "multi" then
-- WARNING! #spec doesn't work for objects loaded from loadData()
-- WARNING! #spec doesn't work for objects loaded from loadData()
for i, sp in ipairs(spec) do
for i, sp in ipairs(spec) do
if i > 1 and process_spec(sp) then
if i > 1 then
return true
process_spec(sp)
end
end
end
end
return false
elseif predicate == "call" then
local fn = m_functions.cat_functions[spec[2]]
if not fn then
error("No spec function named '" .. spec[2] .. "'")
end
return process_spec(fn(make_function_table()))
else
local condval, ifspec = check_condition(spec)
if condval then
process_spec(spec[ifspec])
return true
return true
elseif predicate == "cond" then
-- WARNING! #spec doesn't work for objects loaded from loadData()
for i, sp in ipairs(spec) do
if i > 1 and process_spec(sp) then
return true
end
end
return false
elseif predicate == "call" then
local fn = m_functions.cat_functions[spec[2]]
if not fn then
error("No spec function named '" .. spec[2] .. "'")
end
return process_spec(fn(make_function_table()))
else
else
process_spec(spec[ifspec + 1])
local condval, ifspec = check_condition(spec)
return false
if condval then
process_spec(spec[ifspec])
return true
else
process_spec(spec[ifspec + 1])
-- FIXME: Are we sure this is correct?
return false
end
end
end
end
end
end


local langspecs = m_cats[lang:getCode()]
local langspecs = m_cats[lang:getCode()]
if langspecs then
for _, spec in ipairs(langspecs) do
process_spec(spec)
end
end
if lang:getCode() ~= "und" then
local langspecs = m_cats["und"]
if langspecs then
if langspecs then
for _, spec in ipairs(langspecs) do
for _, spec in ipairs(langspecs) do
process_spec(spec)
process_spec(spec)
end
end
if lang:getCode() ~= "und" then
local langspecs = m_cats["und"]
if langspecs then
for _, spec in ipairs(langspecs) do
process_spec(spec)
end
end
end
end
end
end
end
return categories
return categories
end
end




function export.tagged_inflections(tags, terminfo, notext, capfirst, posttext, joiner)
function export.tagged_inflections(data, terminfo, notext, capfirst, posttext, joiner)
if not data.tags then
error("First argument must now be a table of arguments")
end
local cur_infl = {}
local cur_infl = {}
local inflections = {}
local inflections = {}


local ntags = export.normalize_tags(tags, nil, "do-track")
local ntags = export.normalize_tags(data.tags, nil, "do-track")


for i, tagspec in ipairs(ntags) do
for i, tagspec in ipairs(ntags) do
Line 644: Line 846:
cur_infl = {}
cur_infl = {}
else
else
local to_insert = export.get_tag_display_form(tagspec, joiner)
local to_insert = export.get_tag_display_form(tagspec, data.joiner)
-- Maybe insert a space before inserting the display form
-- Maybe insert a space before inserting the display form
-- of the tag. We insert a space if
-- of the tag. We insert a space if
Line 688: Line 890:
table.insert(inflections, table.concat(cur_infl))
table.insert(inflections, table.concat(cur_infl))
end
end
local format_data = require("Module:table").shallowcopy(data)


if #inflections == 1 then
if #inflections == 1 then
return export.format_form_of(
format_data.text =
notext and "" or ((capfirst and export.ucfirst(inflections[1]) or inflections[1]) ..
data.notext and "" or ((data.capfirst and require("Module:string utilities").ucfirst(inflections[1]) or inflections[1]) ..
(terminfo and " of" or "")),
(data.terminfo and " of" or ""))
terminfo, posttext
return export.format_form_of(format_data)
)
else
else
local link = export.format_form_of(
format_data.text = data.notext and "" or ((data.capfirst and "Inflection" or "inflection") ..
notext and "" or ((capfirst and "Inflection" or "inflection") ..
(data.terminfo and " of" or ""))
(terminfo and " of" or "")),
format_data.posttext = (data.posttext or "") .. ":"
terminfo, (posttext or "") .. ":"
local link = export.format_form_of(format_data)
)
local text_classes = data.text_classes or "form-of-definition use-with-mention"
return link .."\n## <span class='form-of-definition use-with-mention'>" .. table.concat(inflections, "</span>\n## <span class='form-of-definition use-with-mention'>") .. "</span>"
return link .."\n## <span class='" .. text_classes .. "'>" ..
table.concat(inflections, "</span>\n## <span class='" .. text_classes .. "'>") .. "</span>"
end
end
end
end
Line 745: Line 949:


return ret
return ret
end
function export.dump_form_of_data(frame)
local data = {
data = require("Module:form of/data"),
data2 = require("Module:form of/data2")
}
return require("Module:JSON").toJSON(data)
end
end




return export
return export
-- For Vim, so we get 4-space tabs
-- vim: set ts=4 sw=4 noet:

Navigation menu