Module:family tree/nested data: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
(One intermediate revision by the same user not shown) | |||
Line 166: | Line 166: | ||
if code and not val then | if code and not val then | ||
-- Error in data table! | -- Error in data table! | ||
mw.log("no such family code: " | mw.log("no such family code: ") | ||
end | end | ||
return nil | return nil |
Latest revision as of 23:13, 27 July 2022
Documentation for this module may be created at Module:family tree/nested data/doc
--[=[
Authors: [[wikt:User:kc_kennylau]], [[wikt:User:JohnC5]], [[wikt:User:Erutuon]], [[wikt:User:Suzukaze-c]]
--]=]
local regular_languages = require("Module:languages/alldata")
local families = require("Module:families/data")
-- Version of [[Module:etymology languages/data]] that chooses the language-
-- codiest code of several codes that have the same data. For instance,
-- it chooses "de-AT" over "Austrian German".
local etymology_languages = require("Module:family tree/etymology languages")
local supplementary_language_and_family_data = require("Module:family tree/data")
local memoize = require("Module:fun").memoize
local make_auto_subtabler = require("Module:auto-subtable")
local function get_data(code)
return regular_languages[code] or etymology_languages[code] or families[code]
or error("language code " .. tostring(code) .. " not recognized")
end
local get_sort_value = memoize(function (code)
local data = get_data(code)
return (data[1] or data.canonicalName):gsub("Proto%-", "")
end)
-- used in deep_sort
local function compare(code1, code2)
return get_sort_value(code1) < get_sort_value(code2)
end
local function deep_sort(current)
local result = {}
local is_table = {}
for key, val in pairs(current) do
if type(key) == "number" then
table.insert(result, val)
else
is_table[key] = true
table.insert(result, key)
end
end
table.sort(result, compare)
for i = 1, #result do
if is_table[result[i]] then
local name = result[i]
result[i] = deep_sort(current[result[i]])
result[i].name = name
else
result[i] = { name = result[i] }
end
end
return result
end
-- Reliably get family for etymology language.
local function get_family(code)
while code do
local data = get_data(code)
local family = data[3] or data.family
if family then -- This is a regular language or family code.
return family
end
-- This is an etymology language code. Go up in the chain of parenthood.
code = data.parent
end
end
local function all_in_family(family_code, codes)
for _, code in ipairs(codes) do
if get_family(code) ~= family_code then
return false
end
end
return true
end
local protolanguage_of = memoize(function(fam_code)
local data = get_data(fam_code)
if data then
local proto = data.protoLanguage
if proto then
return proto
end
end
local proto = fam_code .. "-pro"
if regular_languages[proto] or etymology_languages[proto] then
return proto
end
end)
-- Key: old parent code
-- Value: new parent code (move all regular languages or families here)
local new_parent_code = {}
-- Key: ancestor of proto-language
-- Value: table, containing
-- Key: proto-language
-- Value: family of proto-language
local protolanguage_to_put_under_family = make_auto_subtabler()
local function find_ancestors(code, val)
-- First look for the special family tree–only ancestor.
if supplementary_language_and_family_data[code] and supplementary_language_and_family_data[code].parent then
return { supplementary_language_and_family_data[code].parent }
-- Handle etymology languages.
elseif etymology_languages[code] then
local parent = val.parent
if parent then
return { parent }
end
-- Handle regular languages
-- Return ancestors value if the ancestors belong to the same language
-- family.
elseif val.ancestors and (#val.ancestors == 1
or all_in_family(get_family(code), val.ancestors)) then
return val.ancestors
-- If the proto-language of a family does not belong to the family (i.e.,
-- it belongs to the same family as the family does), nest the family
-- under the proto-language instead of the other way around.
elseif families[code] then
local proto = protolanguage_of(code)
if proto then
if get_family(proto) == get_family(code) then
new_parent_code[proto] = code
return { proto }
end
-- Normally a family is the child of a proto-language or a family,
-- but if the family's proto-language has an ancestors field, use
-- that as parent instead.
local proto_data = get_data(proto)
if proto_data.ancestors then
for _, ancestor in ipairs(proto_data.ancestors) do
protolanguage_to_put_under_family[ancestor][proto] = code
end
return proto_data.ancestors
end
end
end
-- Handle regular languages and language families.
-- Return a proto-language if possible, else a language family.
if get_family(code) then
-- Go up through the language families that the language or family
-- belongs to.
local origin = code
while true do
code = get_family(code)
val = families[code]
-- no family, "not a family", undetermined family
if not (code and val and code ~= "qfa-not" and code ~= "qfa-und") then
if code and not val then
-- Error in data table!
mw.log("no such family code: ")
end
return nil
end
local proto = protolanguage_of(code)
if proto and proto ~= origin then
return { proto }
else
return { code }
end
end
end
end
local function make_parent_to_children_map()
local children = make_auto_subtabler{}
for _, data_module in ipairs { families, regular_languages, etymology_languages } do
for code, data in pairs(data_module) do
local ancestors = find_ancestors(code, data)
if ancestors then
for _, ancestor in ipairs(ancestors) do
if ancestor ~= code then
table.insert(children[ancestor], code)
end
end
end
end
end
-- No more auto subtabling needed.
return children:un_auto_subtable()
end
-- For instance, Latin is the proto-language of the Romance languages, but
-- belongs to the Italic family. All the children of Latin that are languages
-- or families were initially placed under Latin, but must be placed under the
-- Romance languages.
local function move_children(parent_to_children_map)
make_auto_subtabler(parent_to_children_map)
for old_code, new_code in pairs(new_parent_code) do
local old_children = parent_to_children_map[old_code]
local new_children = parent_to_children_map[new_code]
local i = 1
while old_children[i] do
local child = old_children[i]
if child ~= new_code and (regular_languages[child] or families[child]) then
table.insert(new_children, table.remove(old_children, i))
else
i = i + 1
end
end
if #old_children == 0 then
parent_to_children_map[old_code] = nil
end
end
for old_parent, child_to_new_parent in pairs(protolanguage_to_put_under_family) do
local old_children = parent_to_children_map[old_parent]
if type(old_children) == "table" then
local i = 1
while old_children[i] do
local child = old_children[i]
local new_parent = child_to_new_parent[child]
if new_parent then
table.insert(parent_to_children_map[new_parent], table.remove(old_children, i))
else
i = i + 1
end
end
end
end
parent_to_children_map:un_auto_subtable()
end
local function reverse_comp(a, b)
return a > b
end
local function make_nested(data, children)
local make_nil = {}
for key, val in pairs(data) do
if type(key) == "number" then
if children[val] then
data[val] = make_nested(children[val], children)
table.insert(make_nil, key)
end
else
data[key] = make_nested(val, children)
end
end
if make_nil[2] then -- Make sure larger keys are removed first.
table.sort(make_nil, reverse_comp)
end
for _, key in ipairs(make_nil) do
table.remove(data, key)
end
return data
end
local function main()
local parent_to_children_map = make_parent_to_children_map()
move_children(parent_to_children_map)
local nested = make_nested(parent_to_children_map, parent_to_children_map)
nested = deep_sort(nested)
return { nested = nested, protolanguage_of = protolanguage_of }
end
return main()