Module:siwa-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
 
(139 intermediate revisions by the same user not shown)
Line 11: Line 11:
local export = {}
local export = {}


local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚
local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚


--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?"
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"
local unrelaxed = {
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y",
}


function spat(c)
function spat(c)
return "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")" .. c .. "([^ː])"
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end
end


Line 63: Line 67:
end
end
return table.concat(pss,"·")
local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end
end


local function detect_dialect(term)
local function detect_dialect(term)
if find(term, "̊") or find(term, "ṡ$") or find(term, "rg") or find(term, "") then
if find(term, "[ṁṅłƛ]") then
return "w"
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"") or find(term, "̊") then
return "e"
return "e"
elseif find(term, "[ṁṅłƛ]") then
return "w"
end
end
end
end


local anaptyctic = {
local anaptyctic = {
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔk"},
{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"ʦꬶ", "ːʣɨʔk"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"},
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"},  
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔt"},
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
{"đꬶ", "ːdɨʔk"}, {"đꬶv", "ːdɨkwː"}, {"đ([xn])", "ːdɨ%1ː"},
{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"([^ˈ])đv", "%1ːdɨwː"},
{"sꬶv", "skʔɨwː"}, {"sʔk", "ːsɨʔk"}, {"ɕꬶv", "ɕkɨwː"},
{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%2"}, {"ł([mnx])", "ːłɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"([lr])(ʔ[kpt])", "ː%1ɨ%"}, {"([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
{"ðꬶ", "ːðɨʔk"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([łrð])v", "ː%1ɨwː"},
{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"xv", "ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔk"},
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}
}


Line 99: Line 105:
["ḥ"] = "ʔ", ["į"] = "j",
["ḥ"] = "ʔ", ["į"] = "j",
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ġ"] = "x", ["g"] = "ɡ", -- IPA g
["ų"] = "w",
["ų"] = "w", ["ł"] = "ɬ",
}},
}},
{"o̊", "ɯ"}, {"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ł"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"},
{"", "ʨ"}, {"dj", "ʥ"}, {"dl", ""}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
-- long consonants
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "" end},
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"},
{"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"},  
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
-- long consonants
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
{"ả", "æː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"ẻu", "øː"}, {"õu", "õː̃"},
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
{"ey", ""}, {"ɑy", "æø"}, -- y-final diphthongs
-- diphthongs
{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"},
{"øɑ", "üa"}, {"øi", "üɪ"}, {"", "ʊɔ"},
{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"ɑy", "æœ"}, {"ey", "ɛœ"},
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
{"^(ˈ)ꬶ([" .. front_vowel .. "])", "%1c%2"},  
-- preaspirated consonants
{"^(ˈ)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels
{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
{"^(ˈ[ƀđꬶc])", "%"}, -- voiceless stops word-initially become aspirated
{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]", "ħd"}, {"[]ɣ", "ħɡ"},
{"^(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
{"^(ˈ)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
{"^(ˈ)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
 
{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
{"ƀƀ", "ʔp"}, {"", "ʔp"},
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
{"đđ", "ʔt"}, {"", "ʔt"},
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},
{"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"},
{"bm", "ʔp̚m"}, {"ꝺn", "ʔt̚n"}, {"ꬶn", "ʔk̚ŋ"},
-- other stem- and/or word-initial configurations
-- other stem- and/or word-initial configurations
{"đi", "ʨi"}, {"ꝺi", "ʥi"}, {"ɣi", "ɉi"}, {"ɣj", ""}, {"ɣjː", "ɟː"},
{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
{"ˈƀ", "ˈp"}, {"ˈđ", "ˈt"}, {"ˈꬶ", "ˈk"}, {"ˈꝺ", "ˈd"}, {"ƛ", "ʔł"}, {"ˈɣ", "ˈɡ"}
{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}
}


local rules2 = {
local rules2 = {
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, {spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"},
{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
 
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
-- internal consonant clusters
-- internal consonant clusters
{"pp", "ʔp"},
{"[ƀp][ƀp]", "ʔp"},
{"tt", "ʔt"},
{"[đt][đt]", "ʔt"},
{"k[kc]", "ʔk"},-- {"kc", "ʔc"},
{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
{"mn", "mnː"}, {"mʔk", "mkː"},
{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
{"b([sɕ])", "p%1"},
{"b([sɕ])", "p%1"},
{"n[ꬶk]", "ŋk"}, {"([ðđʦłɕꬶr])v", "%1wː"},
{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
{"[đt]n", "tnː"}, {"[đt]r", "tx"}, {"[đt][ꬶk]v", "tkwː"},
{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
{"(ʦ[tđlmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"},
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
{"(ð[mꬶk])", "%1ː"}, {"ðɣ", "ðɡ"},
{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
{"[ꝺd]x", "ðx"},
{"[ꝺd]x", "ðx"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"}, {"s[ꬶk]", "sk"},
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
{"([lr])ʔ([ptk])", "%1%2ː"},
{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"},  
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
{"mʔk", "mkː"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
{"(ł[mnꬶk])", "%1ː"},
{"([mnꬶk])", "%1ː"},
{"([ꬶk][msɕ])", "%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksł"},
{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},


Line 168: Line 180:
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
{"ɑ$", "a"},
{"ɑ$", "a"},
}
}


local final = {
local final = {
{"[kꬶ](["..front_vowel.."])", "c%1"}, {"[ɡɣ](["..front_vowel.."])", "ɟ%1"},
{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
{"(" .. vowel .. ")[đt]$", "%1ʔ%1"}, -- -Vt becomes -VʔV (or -Vht, not considered)
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ
 
--undo ligatures
--undo ligatures
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ł", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
{"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"ɉ", "ɟ"}, {"õ", "ɔ̃"},
{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
{"", ""}, {"", ""},
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
 
 
{"k(["..front_vowel.."])", "c%1"}, {"ɡ(["..front_vowel.."])", "ɟ%1"},
{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
{"·", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"-", ""},
{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}
}


Line 194: Line 207:


if w then
if w then
term = gsub(term, "ꬶl", "ʔł")
term = gsub(term, "ꬶl", "ʔⱡ")
for _, anap in ipairs(anaptyctic) do
for _, anap in ipairs(anaptyctic) do
term = gsub(term, anap[1], anap[2])
term = gsub(term, anap[1], anap[2])
Line 200: Line 213:
elseif e then
elseif e then
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
term = gsub(term, "([đƀ])r", "%1ʼqʼ")
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
term = gsub(term, "ƀr", "pʼqʼ")
term = gsub(term, "đr", "tʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ʦx", "ʦʼqʼ")
term = gsub(term, "ꬶl", "klː")
term = gsub(term, "ꬶl", "klː")
Line 223: Line 238:
return term
return term
end
end


function format_IPA(items)
function format_IPA(items)
Line 262: Line 275:
["e"] = {type = 'boolean', default = true},
["e"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["w"] = {type = 'boolean', default = true},
["dia"] = {},
["rs"] = {type = 'boolean'},
}
}
Line 270: Line 285:
local ipa = "* "
local ipa = "* "
if detect_dialect(term) == "e" then
if detect_dialect(term) == "w" then
ipa = ipa .. line_format(separate_word(term, false, true, false), {'Eastern'})
args.e = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Eastern Siwa lemmas")
table.insert(categories, "Western Siwa terms")
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
elseif detect_dialect(term) == "w" then
elseif detect_dialect(term) == "e" then
ipa = ipa .. line_format(separate_word(term, false, false, true), {'Western'})
args.w = false; args.a = false
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Western Siwa lemmas")
table.insert(categories, "Eastern Siwa terms")
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
elseif args.a then
elseif args.a then
ipa = ipa .. line_format(separate_word(term, true, false, false), {'Aingo'})
ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
end
end
Line 290: Line 307:
ipa = ipa .. "\n* "
ipa = ipa .. "\n* "
end
end
ipa = ipa .. line_format(separate_word(term, true, true, false), {'Eastern'})
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa lemmas with Eastern IPA pronunciation")
table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
end
end
end
end
Line 300: Line 317:
ipa = ipa .. "\n* "
ipa = ipa .. "\n* "
end
end
ipa = ipa .. line_format(separate_word(term, true, false, true), {'Western'})
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
if find(ipa, "ɤ") then
if find(ipa, "ɤ") then
ipa = ipa .. "\n** "
ipa = ipa .. "\n** "
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {'Regna'})
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
end
end
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
if mw.title.getCurrentTitle().nsText ~= 'Template' then  
table.insert(categories, "Siwa lemmas with Western IPA pronunciation")
table.insert(categories, "Siwa terms with Western IPA pronunciation")
end
end
end
end
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
return ipa .. ncategories(categories)
return ipa .. ncategories(categories)

Latest revision as of 13:31, 10 August 2022



local m_sm = mw.loadData("Module:siwa-pron/data")

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local export = {}

local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚

--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?"
local front_vowel = "iɪyeøɛœæ"
local back_vowel = "uɔõɑʊɤɯ"
local vowel = "[" .. front_vowel .. back_vowel .. "a]"

local unrelaxed = {
	["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o",	["ụ"] = "u", ["ỵ"] = "y",
}

function spat(c)
	return 	"(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])"
end	

local function ncategories(categories)
	local out_categories = {}
	for key, cat in ipairs(categories) do
		out_categories[key] = "[[Category:" .. cat .. "]]"
	end

	return table.concat(out_categories, "")
end

local function open_to_closed(v)
	local otc = {}
	local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ",
		["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ",
		["ø"] = "ü", -- dialectal variation, will be changed later
		["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ",
		["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",}
		 
	for vc in gmatch(v, ".") do
		vc = gsub(vc, vc, switch[vc])
		table.insert(otc, vc)
	end
	return table.concat(otc)
end

function export.morphemes(word)
	local pss = {}

	if gmatch(word,"·") then
		pss = split(word,"·")
	end
	
	for i, m in ipairs(pss) do
		if m_sm.suffix[m] and gmatch(table.concat(pss),"[ˈˌ]") then
		elseif m_sm.prefix[m] then
			pss[i] = "ˌ" .. pss[i]
		else
			pss[i] = "ˈ" .. pss[i]
		end
	end
	
	local _, n = gsub(table.concat(pss,"·"), "ˈ", "")
	
	return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·")
end

local function detect_dialect(term)
	if find(term, "[ṁṅłƛ]") then
		return "w"
	elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then
		return "e"
	end
end

local anaptyctic = {
	{"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"},
	
	{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, 
	{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"},
	
	{"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"},
	{"([^ˈ])đv", "%1ːdɨwː"},
	
	{"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"},
	
	{"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"},
	
	{"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"},
	
	{"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"},
	
	{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"},
}

local rules1 = {
	{".", {
		["ḍ"] = "ð", ["ṡ"] = "ɕ",
		["ḥ"] = "ʔ", ["į"] = "j",
		["ġ"] = "x", ["g"] = "ɡ", -- IPA g
		["ų"] = "w", ["ł"] = "ɬ",
	}},
	{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"},  {"ts", "ʦ"}, {"o̊", "ɯ"},
	
	{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end},
	
	-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne
	{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"},
	
	-- long consonants
	{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"},
	{"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"},
	{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"},
	{"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"},
	{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"},
	
	-- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts
	{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters
	{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"},
	{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs
	
	{"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, 
	{"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels 
	{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated
	{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters
	{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not
	{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z]
	{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels
	
	-- preaspirated consonants
	{"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"},
	{"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"},
	{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"},
	
	{"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"},
	{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"},
	{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"},  
	
	-- other stem- and/or word-initial configurations
	{"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"},
	{"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"}
}

local rules2 = {
	-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs)
	{spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"},
	{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"},
	
	-- internal consonant clusters
	{"[ƀp][ƀp]", "ʔp"},
	{"[đt][đt]", "ʔt"},
	{"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"},
	{"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"},
	{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"},
	{"b([sɕ])", "p%1"},
	{"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"},
	{"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"},
	{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"},
	{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"},
	{"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"},
	{"[ꝺd]x", "ðx"},
	{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"},
	{"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, 
	{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"},
	{"mʔk", "mkː"},
	{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"},
	{"(ⱡ[mnꬶk])", "%1ː"},
	{"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"},
	{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"},

	-- closed vowels
	{"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end},
	{"ɑ$", "a"},
}

local final = {
	{"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"},
	{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ

	--undo ligatures
	{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"},
	{"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"},
	{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"},
	 
	{"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"},
	-- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers
	{"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"},
}

function export.crux(term, a, e, w)
	term=mw.ustring.lower(term)
	term=export.morphemes(term)
	
	for _, rule in ipairs(rules1) do
		term = gsub(term, rule[1], rule[2])
	end

	if w then
		term = gsub(term, "ꬶl", "ʔⱡ")
		for _, anap in ipairs(anaptyctic) do
			term = gsub(term, anap[1], anap[2])
		end
	elseif e then
		term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ")
		term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ")
		term = gsub(term, "ƀr", "pʼqʼ")
		term = gsub(term, "đr", "tʼqʼ")
		term = gsub(term, "ʦx", "ʦʼqʼ")
		term = gsub(term, "ꬶl", "klː")
	else
		term = gsub(term, "ꬶl", "klː")
	end
	
	for _, rrule in ipairs(rules2) do
		term = gsub(term, rrule[1], rrule[2])
	end
	
	if w then
		term = gsub(term, "ü", "ɤ")
	else
		term = gsub(term, "ü", "œ")
	end
	
	for _, f in ipairs(final) do
		term = gsub(term, f[1], f[2])
	end
	
	return term
end

function format_IPA(items)
	return "[[w:IPA chart|IPA]]<sup>([[IPA for Siwa|key]])</sup>:&#32;" .. IPA_span(items)
end

function IPA_span(items)
	local bits = {}
	for _, item in ipairs(items) do
		local bit = "<span style=\"font-size:110%;font-family:Gentium,'DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
		table.insert(bits, bit)
	end
	return table.concat(bits)
end

function line_format(pronunciation, dialect)
	local full_pronunciations = {}
	local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
	table.insert(full_pronunciations, format_IPA(IPA_args))
	return "(''" .. table.concat(dialect, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end

function separate_word(term, a, e, w)
	local result = {}
	
	for word in gsplit(term, " ") do
		table.insert(result, export.crux(word, a, e, w))
	end
	
	return table.concat(result, " ")
end

function export.show(frame)
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "uįo·sauṡṡi" or mw.title.getCurrentTitle().text },
		["a"] = {type = 'boolean', default = true},
		["e"] = {type = 'boolean', default = true},
		["w"] = {type = 'boolean', default = true},
		["dia"] = {},
		["rs"] = {type = 'boolean'},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1]
	local categories = {}
	
	local ipa = "* "
	
	if detect_dialect(term) == "w" then
		args.e = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Western Siwa terms")
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	elseif detect_dialect(term) == "e" then
		args.w = false; args.a = false
		ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Eastern Siwa terms")
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	elseif args.a then
		ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'})
	end
	
	if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then
		if args.a  then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'})
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Eastern IPA pronunciation")
		end
	end
	
	if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then
		if args.a or args.e then
			ipa = ipa .. "\n* "
		end
		ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'})
		if find(ipa, "ɤ") then
			ipa = ipa .. "\n** "
			ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'})
		end
		if mw.title.getCurrentTitle().nsText ~= 'Template' then 
			table.insert(categories, "Siwa terms with Western IPA pronunciation")
		end
	end
	
	if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end
	
return ipa .. ncategories(categories)
end

return export