Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(164 intermediate revisions by the same user not shown)
Line 11: Line 11:
local export = {}
local export = {}


local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃


--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
local unrelaxed = {
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y",
}


function spat(c)
function spat(c)
return "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")" .. c .. "([^ː])"
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end
end


Line 64: Line 67:
end
end
return table.concat(pss,"·")
local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end
end


local function detect_dialect(term)
local function detect_dialect(term)
if find(term, "̊") or find(term, "ṡ$") or find(term, "rg") or find(term, "") then
if find(term, "[ṁṅłƛ]") then
return "w"
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"") or find(term, "̊") then
return "e"
return "e"
elseif find(term, "[ṁṅłƛ]") then
return "w"
end
end
end
end


local anaptyctic = {
local anaptyctic = {
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔk"},
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"ʦꬶ", "ːʣɨʔk"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"},  
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔt"},
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
{"đꬶ", "ːdɨʔk"}, {"đꬶv", "ːdɨkwː"}, {"đ([xn])", "ːdɨ%1ː"},
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"sꬶv", "skʔɨwː"}, {"sʔk", "ːsɨʔk"}, {"ɕꬶv", "ɕkɨwː"},
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%2"}, {"ł([mnx])", "ːłɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%"}, {"([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"ðꬶ", "ːðɨʔk"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([łrð])v", "ː%1ɨwː"},
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"xv", "ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {".ꬶv", "ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔk"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}
}


Line 100: Line 105:
["ḥ"] = "ʔ", ["į"] = "j",
["ḥ"] = "ʔ", ["į"] = "j",
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ų"] = "w",
["ų"] = "w", ["ł"] = "ɬ",
}},
}},
{"o̊", "ɯ"}, {"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ł"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"},
{"", "ʨ"}, {"dj", "ʥ"}, {"dl", ""}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
-- long consonants
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "" end},
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"},
{"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"},  
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
-- long consonants
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"ả", "æː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"ẻu", "øː"}, {"õu", "õː̃"},
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
{"ey", ""}, {"ɑy", "æø"}, -- y-final diphthongs
-- diphthongs
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"},
{"øɑ", "üa"}, {"øi", "üɪ"}, {"", "ʊɔ"},
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"ɑy", "æœ"}, {"ey", "ɛœ"},
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
{"^(ˈ)ꬶ([" .. front_vowel .. "])", "%1c%2"},  
-- preaspirated consonants
{"^(ˈ)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
{"^(ˈ[ƀđꬶc])", "%"}, -- voiceless stops word-initially become aspirated
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]", "ħd"}, {"[]ɣ", "ħɡ"},
{"^(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
{"^(ˈ)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
 
{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
{"ƀƀ", "ʔp"}, {"", "ʔp"},
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
{"đđ", "ʔt"}, {"", "ʔt"},
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},
{"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"},
{"bm", "ʔp̚m"}, {"ꝺn", "ʔt̚n"}, {"ꬶn", "ʔk̚ŋ"},
-- other stem- and/or word-initial configurations
-- other stem- and/or word-initial configurations
{"đi", "ʨi"}, {"ꝺi", "ʥi"}, {"ɣi", "ɉi"}, {"ɣj", ""}, {"ɣjː", "ɟː"},
{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
{"ˈƀ", "ˈp"}, {"ˈđ", "ˈt"}, {"ˈꬶ", "ˈk"}, {"ˈꝺ", "ˈd"}, {"ƛ", "ʔł"}, {"ˈɣ", "ˈɡ"}
{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}
}


local rules2 = {
local rules2 = {
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, {spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"},
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
 
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
-- internal consonant clusters
-- internal consonant clusters
{"pp", "ʔp"},
{"[ƀp][ƀp]", "ʔp"},
{"tt", "ʔt"},
{"[đt][đt]", "ʔt"},
{"k[kc]", "ʔk"},-- {"kc", "ʔc"},
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
{"mn", "mnː"}, {"mʔk", "mkː"},
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"b([sɕ])", "p%1"},
{"n[ꬶk]", "ŋk"}, {"([ðđʦłɕꬶr])v", "%1wː"},
{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
{"(ð[mꬶk])", "%1ː"},
{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
{"[ꝺd]x", "ðx"},
{"[ꝺd]x", "ðx"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"}, {"s[ꬶk]", "sk"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
{"([lr])ʔ([ptk])", "%1%2ː"},
{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"},  
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
{"mʔk", "mkː"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"(ł[mnꬶk])", "%1ː"},
{"([mnꬶk])", "%1ː"},
{"([ꬶk][msɕ])", "%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksł"},
{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},


-- closed vowels
-- closed vowels
{"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"ɑ$", "a"},
{"ɑ$", "a"},
}
}


local final = {
local final = {
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶ](["..front_vowel.."])", "ɟ%1"},
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
{"(" .. vowel .. ")[đt]$", "%1ʔ%1"}, -- -Vt becomes -VʔV (or -Vht, not considered)
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
 
--undo ligatures
--undo ligatures
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ł", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"ɉ", "ɟ"}, {"õ", "ɔ̃"},
{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
{"", ""}, {"", ""},
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
 
 
{"k(["..front_vowel.."])", "c%1"}, {"ɡ(["..front_vowel.."])", "ɟ%1"},
{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
{"·", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"},  
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}
}


Line 195: Line 207:


if w then
if w then
term = gsub(term, "ꬶl", "ʔł")
term = gsub(term, "ꬶl", "ʔⱡ")
for _, anap in ipairs(anaptyctic) do
for _, anap in ipairs(anaptyctic) do
term = gsub(term, anap[1], anap[2])
term = gsub(term, anap[1], anap[2])
end
end
elseif e then
elseif e then
term = gsub(term, "([đƀ])r", "%1ʼqʼ")
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
term = gsub(term, "ƀr", "pʼqʼ")
term = gsub(term, "đr", "tʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ꬶl", "klː")
term = gsub(term, "ꬶl", "klː")
Line 213: Line 228:
if w then
if w then
term = gsub(term, "ü", "ɤ")
term = gsub(term, "ü", "ɤ")
elseif r then
term = gsub(term, "ü", "ɵ")
else
else
term = gsub(term, "ü", "œ")
term = gsub(term, "ü", "œ")
Line 262: Line 275:
["e"] = {type = 'boolean', default = true},
["e"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["r"] = {type = 'boolean', default = true},
["dia"] = {},
["rs"] = {type = 'boolean'},
}
}
Line 271: Line 285:
local ipa = "* "
local ipa = "* "
if detect_dialect(term) == "e" then
if detect_dialect(term) == "w" then
ipa = ipa .. line_format(separate_word(term, false, true, false), {'Eastern'})
args.e = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Eastern Siwa lemmas")
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
elseif detect_dialect(term) == "w" then
elseif detect_dialect(term) == "e" then
ipa = ipa .. line_format(separate_word(term, false, false, true), {'Western'})
args.w = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Western Siwa lemmas")
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
elseif args.a then
elseif args.a then
ipa = ipa .. line_format(separate_word(term, true, false, false), {'Aingo'})
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
end
end
if args.e then
if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
if separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
if args.a  then
if args.a  then
ipa = ipa .. "\n* "
ipa = ipa .. "\n* "
end
end
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
ipa = ipa .. line_format(separate_word(term, true, true, false), {'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
end
end
end
end
end
if args.w then
if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
if separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
if args.a or args.e then
if args.a or args.e then
ipa = ipa .. "\n* "
ipa = ipa .. "\n* "
end
end
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
ipa = ipa .. line_format(separate_word(term, true, false, true), {'Western'})
if find(ipa, "ɤ") then
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
ipa = ipa .. "\n** "
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
end
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
end
end
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
return ipa .. ncategories(categories)
return ipa .. ncategories(categories)

Latest revision as of 13:31, 10 August 2022



local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

local unrelaxed = {
	["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o",	["ụ"] = "u", ["ỵ"] = "y",
}

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
	
	return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end

local function detect_dialect(term)
	if find(term, "[ṁṅłƛ]") then
		return "w"
	elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
		return "e"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, 
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
	
	{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w", ["ł"] = "ɬ",
	}},
	{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
	
	{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
	{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
	{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
	
	{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
	{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
	{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
	{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
	{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},  
	
	-- other stem- and/or word-initial configurations
	{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
	{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
	{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
	
	-- internal consonant clusters
	{"[ƀp][ƀp]", "ʔp"},
	{"[đt][đt]", "ʔt"},
	{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
	{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
	{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
	{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, 
	{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
	{"mʔk", "mkː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ⱡ[mnꬶk])", "%1ː"},
	{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
	{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ

	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
	{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
	 
	{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔⱡ")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
		term = gsub(term, "ƀr", "pʼqʼ")
		term = gsub(term, "đr", "tʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
		["dia"] = {},
		["rs"] = {type = 'boolean'},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "w" then
		args.e = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa terms")
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	elseif detect_dialect(term) == "e" then
		args.w = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa terms")
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	end
	
	if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
	
return ipa .. ncategories(categories)
end

return export