45,337
edits
No edit summary |
No edit summary |
||
Line 4: | Line 4: | ||
local m_su = require("Module:string utilities") | local m_su = require("Module:string utilities") | ||
local m_table = require("Module:table") | local m_table = require("Module:table") | ||
local m_sm = | local m_sm = mw.loadData("Module:siwa-pron/data") | ||
local sub = mw.ustring.sub | local sub = mw.ustring.sub | ||
Line 12: | Line 12: | ||
local u = mw.ustring.char | local u = mw.ustring.char | ||
local split = mw.text.split | local split = mw.text.split | ||
local UNRELEASED = u(0x031A) -- COMBINING LEFT ANGLE ABOVE. ̚ | |||
local NASALIZED = u(0x0303) -- COMBINING TILDE. ̃ | |||
--obsolete ligatures and L with stroke used to remove two-character hassle. will replace later | |||
local consonant = "[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦʔƀꝺđɣꬶɉ]" .. UNRELEASED .. "?" | |||
local front_vowel = "iɪyeøɛœæa" | |||
local back_vowel = "uɔ" .. NASALIZED .. "?ɑʊ" | |||
local vowel = "[" .. front_vowel .. back_vowel .. "ɨ]" | |||
local spat1 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(h)([^ː])" | |||
local spat2 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꬶ)([^ː])" | |||
local spat3 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ƀ)([^ː])" | |||
local spat4 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(đ)([^ː])" | |||
local spat5 = "(·?ˈ[mnɲŋpbtdcɟkɡʔvðsɕxɣhʨʥrlɬłʣjwʦ⁽ʰ⁾ʔːƀꝺđɣꬶɉ]*"..UNRELEASED.."?"..vowel..")(ꝺ)([^ː])" | |||
local function open_to_closed(v) | |||
local otc = {} | |||
local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", ["ø"] = "œ",} | |||
for vc in gmatch(v, ".") do | |||
vc = gsub(vc, vc, switch[vc]) | |||
table.insert(otc, vc) | |||
end | |||
return table.concat(otc) | |||
end | |||
export.rules = { | |||
{ --ligatures and ł | |||
["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ", | |||
["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ", | |||
["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g | |||
}, | |||
{ --long consonants | |||
["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː", | |||
["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː", | |||
["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː", | |||
["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː", | |||
["nɲ"] = "ɲː", ["hl"] = "ɬː", | |||
}, | |||
{ --[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) | |||
to tell apart natural and stress-borne]] | |||
["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ", | |||
["ɡ"] = "ɣ", ["k"] = "ꬶ", | |||
}, | |||
{ --default all vowels as open (open-closed distinctions are computed later) | |||
["a"] = "ɑ", ["ả"] = "æː", | |||
["ę"] = "æ", | |||
["ẻ"] = "eː", | |||
["ỉ"] = "iː", | |||
["o"] = "ɔ", ["ỏ"] = "ʊː", | |||
["ủ"] = "uː", | |||
["ỷ"] = "yː", | |||
["ů"] = "ø", ["ẻu"] = "øː", | |||
["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃", | |||
}, | |||
{ | |||
["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels | |||
["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated | |||
["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾", | |||
["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z] | |||
["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2", | |||
["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2", | |||
}, | |||
{ | |||
["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd", | |||
["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː", | |||
}, | |||
{ | |||
[spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3" | |||
}, | |||
{ | |||
["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp", | |||
["đđ"] = "ʔt", ["tđ"] = "ʔt", | |||
["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk", | |||
["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ", | |||
["mn"] = "mnː", ["mʔk"] = "mkː", | |||
["(p[msɕ])"] = "%1ː", ["pr"] = "px", | |||
["b([sɕ])"] = "p%1", | |||
["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː", | |||
["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː", | |||
["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː", | |||
["ʦxv"] = "ʦxw", ["đx"] = "tːx", | |||
["(ð[mꬶ])"] = "%1ː", | |||
["ꝺx"] = "ðx", | |||
["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː", | |||
["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː", | |||
["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx", | |||
["(ł[mnꬶ])"] = "%1ː", | |||
["(ꬶ[msɕl])"] = "%1ː", | |||
["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł", | |||
["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv", | |||
}, | |||
{ | |||
["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1) .. s2 end, | |||
["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1) .. s2 end, | |||
["ɑ$"] = "a", | |||
}, | |||
{ | |||
["į"] = "j", ["l(ʦx)v"] = "ɬ%1w", | |||
["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered) | |||
}, | |||
{ --undo ligatures | |||
["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s", | |||
["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ" | |||
}, | |||
{ | |||
["·"] = "", --remove morpheme separator | |||
}, | |||
} | |||
function export.morphemes(word) | function export.morphemes(word) | ||
Line 34: | Line 144: | ||
function export.crux(term) | function export.crux(term) | ||
local IPA = {} | local IPA = {} | ||
term=mw.ustring.lower(term) | term=mw.ustring.lower(term) | ||
term=export.morphemes(term) | term=export.morphemes(term) | ||
for _, rule in ipairs( | for _, rule in ipairs(rules) do | ||
for regex, replacement in pairs(rule) do | for regex, replacement in pairs(rule) do | ||
term = gsub(term, regex, replacement) | term = gsub(term, regex, replacement) |