Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(350 intermediate revisions by the same user not shown)
Line 1: Line 1:
local export = {}
local m_IPA = require("Module:IPA")
local m_su = require("Module:string utilities")
local m_table = require("Module:table")
local m_sm = mw.loadData("Module:siwa-pron/data")
local m_sm = mw.loadData("Module:siwa-pron/data")


Line 12: Line 7:
local u = mw.ustring.char
local u = mw.ustring.char
local split = mw.text.split
local split = mw.text.split
local gsplit = mw.text.gsplit


local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local export = {}
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃
 
local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚


--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæa"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔ" .. NASALIZED .. "?ɑʊ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "ɨ]"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
 
local unrelaxed = {
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y",
}


local spat1 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(h)([^ː])"
function spat(c)
local spat2 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꬶ)([^ː])"
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
local spat3 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ƀ)([])"
end
local spat4 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(đ)([^ː])"
 
local spat5 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꝺ)([^ː])"
local function ncategories(categories)
local out_categories = {}
for key, cat in ipairs(categories) do
out_categories[key] = "[[Category:" .. cat .. "]]"
end
 
return table.concat(out_categories, "")
end


local function open_to_closed(v)
local function open_to_closed(v)
local otc = {}
local otc = {}
local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", ["ø"] = "œ",}
local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
["ø"] = "ü", -- dialectal variation, will be changed later
["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
for vc in gmatch(v, ".") do
for vc in gmatch(v, ".") do
vc = gsub(vc, vc, switch[vc])
vc = gsub(vc, vc, switch[vc])
Line 38: Line 50:
return table.concat(otc)
return table.concat(otc)
end
end
local rules = {
{ --ligatures and ł
["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ",
["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ",
["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g
},
{ --long consonants
["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː",
["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː",
["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː",
["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː",
["nɲ"] = "ɲː", ["hl"] = "ɬː",
},
{ --[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36)
to tell apart natural and stress-borne]]
["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ",
["ɡ"] = "ɣ", ["k"] = "ꬶ",
},
{ --default all vowels as open (open-closed distinctions are computed later)
["a"] = "ɑ", ["ả"] = "æː",
["ę"] = "æ",
["ẻ"] = "eː",
["ỉ"] = "iː",
["o"] = "ɔ", ["ỏ"] = "ʊː",
["ủ"] = "uː",
["ỷ"] = "yː",
["ů"] = "ø", ["ẻu"] = "øː",
["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
},
{
["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels
["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated
["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾",
["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z]
["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2",
["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2",
},
{
["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd",
["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː",
},
{
[spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3"
},
{
["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp",
["đđ"] = "ʔt", ["tđ"] = "ʔt",
["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk",
["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ",
["mn"] = "mnː", ["mʔk"] = "mkː",
["(p[msɕ])"] = "%1ː", ["pr"] = "px",
["b([sɕ])"] = "p%1",
["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː",
["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː",
["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː",
["ʦxv"] = "ʦxw", ["đx"] = "tːx",
["(ð[mꬶ])"] = "%1ː",
["ꝺx"] = "ðx",
["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː",
["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː",
["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx",
["(ł[mnꬶ])"] = "%1ː",
["(ꬶ[msɕl])"] = "%1ː",
["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł",
["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv",
},
{
["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
["ɑ$"] = "a",
},
{
["į"] = "j", ["l(ʦx)v"] = "ɬ%1w",
["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered)
},
{ --undo ligatures
["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s",
["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ"
},
{
["·"] = "", --remove morpheme separator
},
}


function export.morphemes(word)
function export.morphemes(word)
Line 139: Line 67:
end
end
return table.concat(pss,"·")
local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end
end


function export.crux(term)
local function detect_dialect(term)
local IPA = {}
if find(term, "[ṁṅłƛ]") then
return "w"
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
return "e"
end
end
 
local anaptyctic = {
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"},
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}
 
local rules1 = {
{".", {
["ḍ"] = "ð", ["ṡ"] = "ɕ",
["ḥ"] = "ʔ", ["į"] = "j",
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ų"] = "w", ["ł"] = "ɬ",
}},
{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
-- long consonants
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"},
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
-- preaspirated consonants
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"}, 
-- other stem- and/or word-initial configurations
{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}
local rules2 = {
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
-- internal consonant clusters
{"[ƀp][ƀp]", "ʔp"},
{"[đt][đt]", "ʔt"},
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
{"[ꝺd]x", "ðx"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"},
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
{"mʔk", "mkː"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"(ⱡ[mnꬶk])", "%1ː"},
{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},
-- closed vowels
{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"ɑ$", "a"},
}
local final = {
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
--undo ligatures
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}
function export.crux(term, a, e, w)
term=mw.ustring.lower(term)
term=mw.ustring.lower(term)
term=export.morphemes(term)
term=export.morphemes(term)
for _, rule in ipairs(rules) do
for _, rule in ipairs(rules1) do
for regex, replacement in pairs(rule) do
term = gsub(term, rule[1], rule[2])
term = gsub(term, regex, replacement)
end
 
if w then
term = gsub(term, "ꬶl", "ʔⱡ")
for _, anap in ipairs(anaptyctic) do
term = gsub(term, anap[1], anap[2])
end
end
elseif e then
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
term = gsub(term, "ƀr", "pʼqʼ")
term = gsub(term, "đr", "tʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ꬶl", "klː")
else
term = gsub(term, "ꬶl", "klː")
end
for _, rrule in ipairs(rules2) do
term = gsub(term, rrule[1], rrule[2])
end
if w then
term = gsub(term, "ü", "ɤ")
else
term = gsub(term, "ü", "œ")
end
for _, f in ipairs(final) do
term = gsub(term, f[1], f[2])
end
return term
end
function format_IPA(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end
function IPA_span(items)
local bits = {}
for _, item in ipairs(items) do
local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
table.insert(bits, bit)
end
return table.concat(bits)
end
function line_format(pronunciation, dialect)
local full_pronunciations = {}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
table.insert(full_pronunciations, format_IPA(IPA_args))
return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end
function separate_word(term, a, e, w)
local result = {}
for word in gsplit(term, " ") do
table.insert(result, export.crux(word, a, e, w))
end
end
table.insert(IPA, term)
return table.concat(result, " ")
return table.concat(IPA)
end
end


function export.show(frame)
function export.show(frame)
local parent_args = frame:getParent().args
local params = {
local params = {
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
["a"] = {type = 'boolean', default = true},
["e"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["dia"] = {},
["rs"] = {type = 'boolean'},
}
}
local args = require("Module:parameters").process(parent_args, params)
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1]
local term = args[1]
local categories = {}
local ipa = export.crux(term)
local ipa = "* "
local IPA_key = "IPA for Siwa"
if detect_dialect(term) == "w" then
local key_link = "[[".. IPA_key .."|key]]"
args.e = false; args.a = false
 
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
local prefix = "[[w:IPA chart|IPA]]<sup>(" .. key_link .. ")</sup>:&#32;"
if mw.title.getCurrentTitle().nsText ~= 'Template' then
local accent="(\''Aingo\'') "
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
elseif detect_dialect(term) == "e" then
args.w = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
elseif args.a then
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
end
if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
if args.a  then
ipa = ipa .. "\n* "
end
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
if args.a or args.e then
ipa = ipa .. "\n* "
end
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
if find(ipa, "ɤ") then
ipa = ipa .. "\n** "
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
ipa = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>[" .. ipa .. "]</span>"
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
ipa = accent..prefix..ipa
return ipa
return ipa .. ncategories(categories)
end
end


return export
return export

Latest revision as of 13:31, 10 August 2022



local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

local unrelaxed = {
	["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o",	["ụ"] = "u", ["ỵ"] = "y",
}

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
	
	return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end

local function detect_dialect(term)
	if find(term, "[ṁṅłƛ]") then
		return "w"
	elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
		return "e"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, 
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
	
	{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w", ["ł"] = "ɬ",
	}},
	{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
	
	{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
	{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
	{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
	
	{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
	{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
	{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
	{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
	{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},  
	
	-- other stem- and/or word-initial configurations
	{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
	{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
	{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
	
	-- internal consonant clusters
	{"[ƀp][ƀp]", "ʔp"},
	{"[đt][đt]", "ʔt"},
	{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
	{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
	{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
	{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, 
	{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
	{"mʔk", "mkː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ⱡ[mnꬶk])", "%1ː"},
	{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
	{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ

	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
	{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
	 
	{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔⱡ")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
		term = gsub(term, "ƀr", "pʼqʼ")
		term = gsub(term, "đr", "tʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
		["dia"] = {},
		["rs"] = {type = 'boolean'},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "w" then
		args.e = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa terms")
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	elseif detect_dialect(term) == "e" then
		args.w = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa terms")
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	end
	
	if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
	
return ipa .. ncategories(categories)
end

return export