Module:siwa-pron/data: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 2: Line 2:


local m_table = require("Module:table")
local m_table = require("Module:table")
local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local front_vowel = "iɪyeøɛœæa"
local back_vowel = "uɔ" .. NASALIZED .. "?ɑʊ"
local vowel = "[" .. front_vowel .. back_vowel .. "ɨ]"
local spat1 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(h)([^ː])"
local spat2 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꬶ)([^ː])"
local spat3 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ƀ)([^ː])"
local spat4 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(đ)([^ː])"
local spat5 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꝺ)([^ː])"
local function open_to_closed(v)
local otc = {}
local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", ["ø"] = "œ",}
for vc in gmatch(v, ".") do
vc = gsub(vc, vc, switch[vc])
table.insert(otc, vc)
end
return table.concat(otc)
end
local rules = {
{ --ligatures and ł
["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ",
["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ",
["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g
},
{ --long consonants
["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː",
["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː",
["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː",
["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː",
["nɲ"] = "ɲː", ["hl"] = "ɬː",
},
{ --[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36)
to tell apart natural and stress-borne]]
["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ",
["ɡ"] = "ɣ", ["k"] = "ꬶ",
},
{ --default all vowels as open (open-closed distinctions are computed later)
["a"] = "ɑ", ["ả"] = "æː",
["ę"] = "æ",
["ẻ"] = "eː",
["ỉ"] = "iː",
["o"] = "ɔ", ["ỏ"] = "ʊː",
["ủ"] = "uː",
["ỷ"] = "yː",
["ů"] = "ø", ["ẻu"] = "øː",
["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
},
{
["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels
["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated
["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾",
["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z]
["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2",
["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2",
},
{
["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd",
["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː",
},
{
[spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3"
},
{
["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp",
["đđ"] = "ʔt", ["tđ"] = "ʔt",
["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk",
["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ",
["mn"] = "mnː", ["mʔk"] = "mkː",
["(p[msɕ])"] = "%1ː", ["pr"] = "px",
["b([sɕ])"] = "p%1",
["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː",
["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː",
["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː",
["ʦxv"] = "ʦxw", ["đx"] = "tːx",
["(ð[mꬶ])"] = "%1ː",
["ꝺx"] = "ðx",
["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː",
["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː",
["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx",
["(ł[mnꬶ])"] = "%1ː",
["(ꬶ[msɕl])"] = "%1ː",
["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł",
["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv",
},
{
["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
["ɑ$"] = "a",
},
{
["į"] = "j", ["l(ʦx)v"] = "ɬ%1w",
["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered)
},
{ --undo ligatures
["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s",
["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ"
},
{
["·"] = "", --remove morpheme separator
},
}


export.prefix = m_table.listToSet({
export.prefix = m_table.listToSet({

Revision as of 18:13, 7 January 2021



local export = {}

local m_table = require("Module:table")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split

local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local front_vowel = "iɪyeøɛœæa"
local back_vowel = "uɔ" .. NASALIZED .. "?ɑʊ"
local vowel = "[" .. front_vowel .. back_vowel .. "ɨ]"

local spat1 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(h)([^ː])"
local spat2 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꬶ)([^ː])"
local spat3 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ƀ)([^ː])"
local spat4 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(đ)([^ː])"
local spat5 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꝺ)([^ː])"

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", ["ø"] = "œ",}
	
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

local rules = {
	{ --ligatures and ł
		["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ",
		["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ",
		["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g
	},
	{	--long consonants
		["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː",
		["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː",
		["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː",
		["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː",
		["nɲ"] = "ɲː", ["hl"] = "ɬː",
	},
	{	--[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36)
			to tell apart natural and stress-borne]]
		["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ", 
		["ɡ"] = "ɣ", ["k"] = "ꬶ",
	},
	{	--default all vowels as open (open-closed distinctions are computed later)
		["a"] = "ɑ", ["ả"] = "æː",
		["ę"] = "æ",
		["ẻ"] = "eː",
		["ỉ"] = "iː",
		["o"] = "ɔ", ["ỏ"] = "ʊː",
		["ủ"] = "uː",
		["ỷ"] = "yː",
		["ů"] = "ø", ["ẻu"] = "øː",
		["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
	},
	{
		["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels
		["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated
		["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾",
		["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z]
		["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2",
		["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2",
	},
	{
		["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd",
		["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː",
	},
	{
		[spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3"
	},
	{
		["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp",
		["đđ"] = "ʔt", ["tđ"] = "ʔt",
		["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk",
		["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ",
		["mn"] = "mnː", ["mʔk"] = "mkː",
		["(p[msɕ])"] = "%1ː", ["pr"] = "px",
		["b([sɕ])"] = "p%1",
		["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː",
		["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː",
		["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː",
		["ʦxv"] = "ʦxw", ["đx"] = "tːx",
		["(ð[mꬶ])"] = "%1ː",
		["ꝺx"] = "ðx",
		["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː",
		["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː",
		["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx",
		["(ł[mnꬶ])"] = "%1ː",
		["(ꬶ[msɕl])"] = "%1ː",
		["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł",
		["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv",
	},
	{
		["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
		["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
		["ɑ$"] = "a",
	},
	{
		["į"] = "j", ["l(ʦx)v"] = "ɬ%1w",
		["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered)
	},
	{	--undo ligatures
		["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s",
		["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ"
	},
	{
		["·"] = "", --remove morpheme separator	
	},
}

export.prefix = m_table.listToSet({
	"o", "i", "u", "a", "ta", "t",
	"ma", "sa", "tama", "tata", "kata",
	"keu", "ga", "hę", "tů", "uįo", "į"
})

export.suffix = m_table.listToSet({
	"mi", "si", "ki", "i", "ta", "la", "m"
})

export.initial_clusters = m_table.listToSet({
	"sv", "sġ", "sġv",
	"ts", "tv", "tsv", "tsġ", "tsġv",
	"dl",
	"kv", "km", "kn",
})

export.internal_clusters = m_table.listToSet({
	"mp", "mn", "mt", "mk", "mġ", "ms", "mṡ", "mr", "mh",
	"pr", "pm", "ps", "pṡ",
	"bm", "bs", "bṡ",
	"nt", "nd", "ns", "nṡ", "nr", "nk", "nh",
	"tr", "tk", "tġ", "tv", "tn", "ts",
	"st", "sl", "sk", "sġ", "sm", "sp", "sv",
	"ṡm", "ṡp", "ṡv", "ṡk",
	"rġ", "rh", "rm", "rp", "rv", "rt", "rk",
	"lm", "lp", "lb", "lv", "ln", "lk", "lg", "lġ", "lh", "lt", "ld", "lṡ", "lr",
	"dn", "dl", "dġ",
	"ḍm", "ḍk", "ḍv", 
	"gn", "gs", "gl", "gm", "gv",
	"ġm", "ġv", "ġn", "ġl", 

	"mkk", "mst",
	"nst", "ndl", "ndr",
	"tsn", "tsk", "tsġ", "tkv", "tsv", "tst", "tsl", "tsm",
	"skl", "skv", "skk", "sġv",
	"ṡgv", "ṡkv",
	"rkk", "rpp", "rtt",
	"lpp", "lkk", "ltt", "lts", "ltṡ", "ldį",
	"dlv", "dlm", "dln", "dlk", "dlġ",

	"ntsġ", "tskv", "tsġv", "ltsv", "ltsġ",

	"ltsġv",
})

export.anaptyctic = m_table.listToSet({
	"mn", "mkk",
	"tsn", "tsk", "tskv", "tsġ", "tk", "tkv", "tġ", "tv", "tn", "tsv", "tst", "tsm",
	"skv", "skk",
	"ṡkv",
	"rkk", "rpp", "rv", "rtt",
	"lpp", "lkk", "ltt",
	"dlv", "dlm", "dln", "dlġ", "dġ",
	"ḍk", "ḍv",
	"gn", "gm",
	"ġm", "ġv", "ġn", "ġl",
	"pm", "ps", "pṡ",
	"km", "kv", "ks", "ksk", "kṡ", "kl",
})

export.ejective = m_table.listToSet({
	"tr", "pr", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
})

export.diphthongs = m_table.listToSet({
	"ia", "oa", "ua", "ůa", "ai", "ei", "ie", "oi", "ue", "ui",
	"au", "io", "uo", "ay", "ey", "iů", "eu", "iu", "ou", "ůi",
})

export.triphthongs = m_table.listToSet({
	"iau", "iai", "iei", "ieu", "iue", "ioi", "oai", "eui", "uoi", "uei", 
})

return export