45,633
edits
No edit summary |
No edit summary |
||
(187 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
local m_sm = mw.loadData("Module:siwa-pron/data") | local m_sm = mw.loadData("Module:siwa-pron/data") | ||
Line 14: | Line 11: | ||
local export = {} | local export = {} | ||
local | local UNR = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚ | ||
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later | --obsolete ligatures and L with stroke used to remove two-character hassle. will replace later | ||
local consonant = "[ | local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhħʨʥrlɬⱡłʣʦʔƀꝺđꬶɉʼⱦṁṅꬼɨ]" .. UNR .. "?" | ||
local front_vowel = "iɪyeøɛœæ" | local front_vowel = "iɪyeøɛœæ" | ||
local back_vowel = "uɔõɑʊɤɯ" | local back_vowel = "uɔõɑʊɤɯ" | ||
local vowel = "[" .. front_vowel .. back_vowel .. "a]" | local vowel = "[" .. front_vowel .. back_vowel .. "a]" | ||
local | local unrelaxed = { | ||
["ạ"] = "a", ["ẹ"] = "e", ["ị"] = "i", ["ọ"] = "o", ["ụ"] = "u", ["ỵ"] = "y", | |||
} | |||
function spat(c) | |||
return "(·?ˈ[mnɲŋpbtdcɟkɡvðsɕxɣhʨʥrlɬłⱡʣjwʦ⁽ʰ⁾ʼʔːƀꝺđꬶɉṁṅꬼ]*" .. UNR .. "?" .. vowel .. ")" .. c .. "([^ː])" | |||
end | |||
local function ncategories(categories) | local function ncategories(categories) | ||
Line 40: | Line 38: | ||
local function open_to_closed(v) | local function open_to_closed(v) | ||
local otc = {} | local otc = {} | ||
local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ", | local switch = {["ɑ"] = "a", ["æ"] = "æ", ["e"] = "ɛ", ["i"] = "ɪ", | ||
["ø"] = "ü", | ["ɔ"] = "ɔ", ["õ"] = "õ", ["u"] = "ʊ", ["y"] = "œ", ["ɯ"] = "ɯ", | ||
["ø"] = "ü", -- dialectal variation, will be changed later | |||
["a"] = "a", ["ɛ"] = "ɛ", ["ɪ"] = "ɪ", | |||
["ʊ"] = "ʊ", ["ü"] = "ü", ["œ"] = "œ",} | |||
for vc in gmatch(v, ".") do | for vc in gmatch(v, ".") do | ||
vc = gsub(vc, vc, switch[vc]) | vc = gsub(vc, vc, switch[vc]) | ||
Line 66: | Line 67: | ||
end | end | ||
return table.concat(pss,"·") | local _, n = gsub(table.concat(pss,"·"), "ˈ", "") | ||
return n>=2 and gsub(table.concat(pss,"·"), "ˈ", "", n-1) or table.concat(pss, "·") | |||
end | end | ||
local function detect_dialect(term) | local function detect_dialect(term) | ||
if find(term, " | if find(term, "[ṁṅłƛ]") then | ||
return "w" | |||
elseif find(term, "^t[mk]") or find(term, "^sm") or find(term, "^mġ") or find(term,"^ų") or find(term, "̊") then | |||
return "e" | return "e" | ||
end | end | ||
end | end | ||
local anaptyctic = { | local anaptyctic = { | ||
{"mn", "ːmɨnː"}, {"mʔk", " | {"mn", "ːmɨnː"}, {"mʔk", "ːmɨʔkː"}, | ||
{"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː | {"([^ˈ])ʦ([nxm])", "%1ːʣɨ%2ː"}, {"([^ˈ])ʦꬶv", "%1ːʣɨkwː"}, {"ʦꬶ", "ːʣɨʔkː"}, | ||
{"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", " | {"([^ˈ])ʦv", "%1ːʣɨwː"}, {"ʦđ", "ːʣɨʔtː"}, | ||
{" | {"đꬶv", "ːdɨkwː"}, {"đꬶ", "ːdɨʔkː"}, {"đ([xn])", "ːdɨ%1ː"}, | ||
{"([^ˈ])đv", "%1ːdɨwː"}, | {"([^ˈ])đv", "%1ːdɨwː"}, | ||
{"sꬶv", " | {"sꬶv", "skɨwː"}, {"sʔk", "ːsɨʔkː"}, {"ɕꬶv", "ɕkɨwː"}, | ||
{"([lr])(ʔ[kpt])", "ː%1ɨ% | {"([lr])(ʔ[kpt])", "ː%1ɨ%2ː"}, {"ⱡ([mnx])", "ːⱡɨ%1ː"}, {"ꝺx", "ːðɨɣː"}, | ||
{"ðꬶ", " | {"ðꬶ", "ːðɨʔkː"}, {"ɣ([nm])", "ːŋɨ%1ː"}, {"([ⱡrð])v", "ː%1ɨwː"}, | ||
{"x([mnl])", "ːɣɨ%1ː"}, {"xv", " | {"x([mnl])", "ːɣɨ%1ː"}, {"^(ˈ?)xv", "%1ɣɨwː"}, {"([^s])xv", "%1ːɣɨwː"}, {"ƀ([msɕ])", "ːbɨ%1ː"}, | ||
{"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {".ꬶv", " | {"([^ˈ])ꬶ([msɕl])", "%1ːɡɨ%2ː"}, {"(.)ꬶv", "%1ːɡɨwː"}, {"ꬶsꬶ", "ːksɨʔkː"}, | ||
} | } | ||
Line 102: | Line 105: | ||
["ḥ"] = "ʔ", ["į"] = "j", | ["ḥ"] = "ʔ", ["į"] = "j", | ||
["ġ"] = "x", ["g"] = "ɡ", -- IPA g | ["ġ"] = "x", ["g"] = "ɡ", -- IPA g | ||
["ų"] = "w", | ["ų"] = "w", ["ł"] = "ɬ", | ||
}}, | }}, | ||
{"tɕ", "ʨ"}, {"dj", "ʥ"}, {"dl", "ⱡ"}, {"kj", "c"}, {"nj", "ɲ"}, {"ts", "ʦ"}, {"o̊", "ɯ"}, | |||
{"([ạẹịọụỵ])", function(v) return unrelaxed[v] .. "ⱦ" end}, | |||
-- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne | |||
{"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"}, | |||
-- long consonants | -- long consonants | ||
{"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"}, | {"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"}, | ||
{" | {"ꝺʥ", "ʥː"}, {"ꝺꝺ", "tː"}, {"ðð", "ðː"}, {"ss", "sː"}, | ||
{"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {" | {"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɣɣ", "kː"}, | ||
{"xx", "xː"}, {" | {"xx", "xː"}, {"nɣ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"}, | ||
{"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, | {"nɲ", "ɲː"}, {"hl", "ɬː"}, {"ƛƛ", "ɬː"}, {"đʨ", "ʨː"}, | ||
{" | -- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts | ||
{"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, -- õ is conserved to avoid two characters | |||
{"ả", "æː"}, {"ẻu", "øː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"õu", "õː"}, | |||
{"ey", "eø"}, {"ɑy", "æø"}, -- y-final diphthongs | |||
-- | {"^(ˈ?)ꬶ([" .. front_vowel .. "])", "%1c%2"}, | ||
{" | {"^(ˈ?)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- word-initial [k] and [g] palatalize before front vowels | ||
{"^(ˈ?[ƀđꬶc])([^mn])", "%1ʰ%2"}, -- voiceless stops word-initially become aspirated | |||
{"^(ˈ?)đʰꬶ", "%1tk"}, {"^(ˈ?)đʰm", "%1tm"}, -- Far Eastern initial consonant clusters | |||
{"(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not | |||
{"^(ˈ?)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z] | |||
{"^(ˈ?)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels | |||
-- | -- preaspirated consonants | ||
{" | {"[hʔ](ʦ[ꬶx])", "ħ%1"}, {"[hʔ](đ[vx])", "ħ%1"}, | ||
{" | {"h(ː[wj])", "ħ%1"}, {"[hʔ]ꬶ", "ħk"}, {"[hʔ]đ", "ħt"}, {"[hʔ]ƀ", "ħp"}, {"[hʔ]ꝺ", "ħd"}, {"[hʔ]ɣ", "ħɡ"}, | ||
{"[hʔ]([pbtdkmnlsrʦ][^" .. UNR .. "])", "ħ%1"}, | |||
{" | {"ƀƀ", "ʔp"}, {"đđ", "ʔt"}, {"ꬶꬶ", "ʔk"}, {"ꝺ$", "ʥ"}, | ||
{"bm", "ʔp" .. UNR .. "m"}, {"ꝺn", "ʔt" .. UNR .. "n"}, {"ꬶn", "ʔk" .. UNR .. "ŋ"}, | |||
{"ƀ⁽ʰ⁾", "p⁽ʰ⁾"}, {"đ⁽ʰ⁾", "t⁽ʰ⁾"}, {"ꬶ⁽ʰ⁾", "k⁽ʰ⁾"}, | |||
{" | |||
{" | |||
-- other stem- and/or word-initial configurations | -- other stem- and/or word-initial configurations | ||
{"đi", " | {"([^ˈ])đi", "%1ʨi"}, {"([^ˈ])ꝺi", "%1ʥi"}, {"([^ˈ])ɣi", "%1ɟi"}, {"([^ˈ])ɣj", "%1jː"}, {"([^ˈ])ɣjː", "%1ɟː"}, | ||
{" | {"^([ˈˌ]?)ƀ", "%1p"}, {"^([ˈˌ]?)đ", "%1t"}, {"^([ˈˌ]?)ꬶ", "%1k"}, {"^([ˈˌ]?)ꝺ", "%1d"}, {"([ˈˌ])ɣ", "%1ɡ"} | ||
} | } | ||
local rules2 = { | local rules2 = { | ||
-- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs) | -- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs) | ||
{ | {spat("h"), "%1ʔ%2"}, {spat("ꬶ"), "%1k%2"}, {spat("ƀ"), "%1p%2"}, | ||
{spat("đ"), "%1t%2"}, {spat("ꝺ"), "%1d%2"}, {spat("ɣ"), "%1ɡ%2"}, | |||
-- internal consonant clusters | -- internal consonant clusters | ||
{" | {"[ƀp][ƀp]", "ʔp"}, | ||
{" | {"[đt][đt]", "ʔt"}, | ||
{" | {"[ꬶk][ꬶkc]", "ʔk"},-- {"kc", "ʔc"}, | ||
{"mn", "mnː"}, {"mʔk", "mkː"}, | {"mn", "mnː"}, {"mʔk", "mkː"}, {"m[ƀp]", "mp"}, {"m(s?)[đt]", "m%1t"}, {"m[ꬶk]", "mk"}, {"m[ʔh]", "mh"}, | ||
{"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"}, | {"[ƀp]([msɕ])", "p%1ː"}, {"[ƀp]r", "px"}, | ||
{"b([sɕ])", "p%1"}, | {"b([sɕ])", "p%1"}, | ||
{"n[ꬶk]", " | {"n[ꝺd]", "nd"}, {"n[đt]", "nt"}, {"ŋʔk", "ŋkː"}, {"n[ꬶk]([^ː])", "ꬼk%1"}, {"([ðđʦⱡɕꬶrkt])v", "%1wː"}, | ||
{"[đt]n", "tnː"}, {"[đt] | {"[đt]n", "tnː"}, {"[đt][ꬶk]v", "tkwː"}, | ||
{"ʦ[đt]", "ʦtː"}, {"ʦ[ꬶk]", "ʦkː"}, {"(ʦ[lmn])", "%1ː"}, {"ʦ[ꬶk]v", "ʦkwː"}, | |||
{"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, | {"ʦxv", "ʦxw"}, {"[đt]x", "tːx"}, {"[đt][ꬶk]", "tk"}, {"[đt]r", "tx"}, | ||
{" | {"ðm", "ðmː"}, {"ð[ꬶk]", "ðkː"}, {"ð[ɣɡ]", "ðɡ"}, | ||
{"[ꝺd]x", "ðx"}, | {"[ꝺd]x", "ðx"}, | ||
{"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", " | {"(s[ꬶk]l)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxw"}, | ||
{"([lr])ʔ([ptk])", "%1%2ː"}, | {"([srl])[đt]", "%1t"}, {"([sɕrl])[ꬶk]", "%1k"}, {"([sɕrl])[ƀp]", "%1p"}, | ||
{"([lr])ʔ([ptk])", "%1%2ː"}, {"l[ɣɡ]", "lɡ"}, | |||
{"mʔk", "mkː"}, | |||
{"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"}, | {"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"}, | ||
{"( | {"(ⱡ[mnꬶk])", "%1ː"}, | ||
{" | {"[ꬶk]([msɕ])", "k%1ː"}, {"[ꬶk]s[ꬶk]", "kskː"}, {"[ꬶk]sl", "ksⱡ"}, | ||
{"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"}, | {"[ɣɡ]([mn])", "ŋ%1ː"}, {"[ɣɡ]([vsl])", "k%1"}, | ||
-- closed vowels | -- closed vowels | ||
{"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(·?" .. consonant .. "·?ˈ?" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(ː?" .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | {"(" .. vowel .. "*)(ː?" .. consonant .. consonant .. "ɨ)", function(s1, s2) return open_to_closed(s1) .. s2 end}, | ||
{"(" .. vowel .. "*)(" .. consonant .. "ː" .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, | |||
{"ɑ$", "a"}, | {"ɑ$", "a"}, | ||
} | } | ||
local final = { | local final = { | ||
{"k(["..front_vowel.."])", "c%1"}, {" | {"k(["..front_vowel.."])", "c%1"}, {"[ɡꬶɣ](["..front_vowel.."])", "ɟ%1"}, | ||
{"([iɪ])[đt]$", "%1ʨ"}, {"(" .. vowel .. ")[đt]$", "%1ʔ%1̆"}, {"(" .. vowel .. "ː)[đt]$", "%1ʔ"}, -- -Vt becomes -VʔV̆ or -V̄ʔ | |||
--undo ligatures | --undo ligatures | ||
{"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {" | {"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ⱡ", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"}, | ||
{"ƀ", " | {"ƀ", "b"}, {"ꝺ", "ð"}, {"đ", "d"}, {"ꬶ", "ɡ"}, {"õ", "ɔ̃"}, | ||
{"ṁ", "m̥"}, {"ṅ", "n̥"}, {"ħ", "h"}, {"ɡ⁽", "k⁽"}, {"ꬼ", "ŋ̊"}, {"(t⁽ʰ⁾)ɟ", "%1c"},{"(tʼ)ɟ", "%1c"}, | |||
{"k(["..front_vowel.."])", "c%1"}, {"ɡ(["..front_vowel.."])", " | {"k(ː?["..front_vowel.."])", "c%1"}, {"ɡ(ː[" ..front_vowel.."])", "c%1"}, {"dɟ", "tc"}, {"ƛ", "tɬʼ"}, | ||
-- remove morpheme separator | -- remove morpheme separator, possible double long vowel markers, and repeated secondary stress markers | ||
{" | {"[·ⱦ]", ""}, {"ːː", "ː"}, {"(ˈ[^ˌ]*)ˌ", "%1"}, {"([^ˈ])-", "%1‿"}, | ||
} | } | ||
Line 194: | Line 207: | ||
if w then | if w then | ||
term = gsub(term, "ꬶl", " | term = gsub(term, "ꬶl", "ʔⱡ") | ||
for _, anap in ipairs(anaptyctic) do | for _, anap in ipairs(anaptyctic) do | ||
term = gsub(term, anap[1], anap[2]) | term = gsub(term, anap[1], anap[2]) | ||
end | end | ||
elseif e then | elseif e then | ||
term = gsub(term, "( | term = gsub(term, "(.)⁽ʰ⁾", "%1ʼ") | ||
term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ꬶʼ", "kʼ"); term = gsub(term, "ƀʼ", "pʼ"); term = gsub(term, "đʼ", "tʼ") | |||
term = gsub(term, "ƀr", "pʼqʼ") | |||
term = gsub(term, "đr", "tʼqʼ") | |||
term = gsub(term, "ʦx", "ʦʼqʼ") | term = gsub(term, "ʦx", "ʦʼqʼ") | ||
term = gsub(term, "ꬶl", "klː") | term = gsub(term, "ꬶl", "klː") | ||
Line 259: | Line 275: | ||
["e"] = {type = 'boolean', default = true}, | ["e"] = {type = 'boolean', default = true}, | ||
["w"] = {type = 'boolean', default = true}, | ["w"] = {type = 'boolean', default = true}, | ||
["dia"] = {}, | |||
["rs"] = {type = 'boolean'}, | |||
} | } | ||
Line 267: | Line 285: | ||
local ipa = "* " | local ipa = "* " | ||
if detect_dialect(term) == " | if detect_dialect(term) == "w" then | ||
ipa = ipa .. line_format(separate_word(term, false, true | args.e = false; args.a = false | ||
ipa = ipa .. line_format(separate_word(term, false, false, true), {args.dia or 'Western'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | if mw.title.getCurrentTitle().nsText ~= 'Template' then | ||
table.insert(categories, " | table.insert(categories, "Western Siwa terms") | ||
table.insert(categories, "Siwa | table.insert(categories, "Siwa terms with Western IPA pronunciation") | ||
end | end | ||
elseif detect_dialect(term) == " | elseif detect_dialect(term) == "e" then | ||
ipa = ipa .. line_format(separate_word(term, false, false | args.w = false; args.a = false | ||
ipa = ipa .. line_format(separate_word(term, false, true, false), {args.dia or 'Eastern'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | if mw.title.getCurrentTitle().nsText ~= 'Template' then | ||
table.insert(categories, " | table.insert(categories, "Eastern Siwa terms") | ||
table.insert(categories, "Siwa | table.insert(categories, "Siwa terms with Eastern IPA pronunciation") | ||
end | end | ||
elseif args.a then | elseif args.a then | ||
ipa = ipa .. line_format(separate_word(term, true, false, false), {'Aingo'}) | ipa = ipa .. line_format(separate_word(term, true, false, false), {args.dia or 'Aingo'}) | ||
end | end | ||
if args.e | if args.e and separate_word(term, true, false, false) ~= separate_word(term, false, true, false) then | ||
if args.a then | |||
ipa = ipa .. "\n* " | |||
end | |||
ipa = ipa .. line_format(separate_word(term, true, true, false), {args.dia or 'Eastern'}) | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Siwa terms with Eastern IPA pronunciation") | |||
end | end | ||
end | end | ||
if args.w | if args.w and separate_word(term, true, false, false) ~= separate_word(term, false, false, true) then | ||
if args.a or args.e then | |||
ipa = ipa .. "\n* " | |||
end | |||
ipa = ipa .. line_format(separate_word(term, true, false, true), {args.dia or 'Western'}) | |||
if find(ipa, "ɤ") then | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | ipa = ipa .. "\n** " | ||
ipa = ipa .. line_format(gsub(separate_word(term, false, false, true),"ɤ","ɵ"), {args.dia or 'Regna'}) | |||
end | |||
if mw.title.getCurrentTitle().nsText ~= 'Template' then | |||
table.insert(categories, "Siwa terms with Western IPA pronunciation") | |||
end | end | ||
end | end | ||
if args.rs then ipa = gsub(ipa, "[ˌˈ]", "") end | |||
return ipa .. ncategories(categories) | return ipa .. ncategories(categories) |