|
|
Line 38: |
Line 38: |
| end | | end |
|
| |
|
| local function open_to_closed(v, a) | | local function open_to_closed(v, w) |
| local otc = {} | | local otc = {} |
| local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", | | local switch = {["ɑ"] = "a", ["e"] = "ɛ", ["i"] = "ɪ", ["ɔ"] = "ɔ", ["u"] = "ʊ", ["y"] = "œ", |
| ["ø"] = function(s1) | | ["ø"] = w and "ɤ" or "œ"} |
| if(a=="w") then s1="ɤ"
| |
| else s1="œ" end
| |
| return s1 end,}
| |
|
| |
|
| for vc in gmatch(v, ".") do | | for vc in gmatch(v, ".") do |
Line 52: |
Line 49: |
| return table.concat(otc) | | return table.concat(otc) |
| end | | end |
|
| |
| local rules = {
| |
| { --ligatures and ł
| |
| ["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ",
| |
| ["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ",
| |
| ["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g
| |
| },
| |
| { --long consonants
| |
| ["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː",
| |
| ["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː",
| |
| ["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː",
| |
| ["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː",
| |
| ["nɲ"] = "ɲː", ["hl"] = "ɬː",
| |
| },
| |
| { --[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36)
| |
| to tell apart natural and stress-borne]]
| |
| ["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ",
| |
| ["ɡ"] = "ɣ", ["k"] = "ꬶ",
| |
| },
| |
| { --default all vowels as open (open-closed distinctions are computed later)
| |
| ["a"] = "ɑ", ["ả"] = "æː",
| |
| ["ę"] = "æ",
| |
| ["ẻ"] = "eː",
| |
| ["ỉ"] = "iː",
| |
| ["o"] = "ɔ", ["ỏ"] = "ʊː",
| |
| ["ủ"] = "uː",
| |
| ["ỷ"] = "yː",
| |
| ["ů"] = "ø", ["ẻu"] = "øː",
| |
| ["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
| |
| },
| |
| {
| |
| ["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels
| |
| ["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated
| |
| ["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾",
| |
| ["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z]
| |
| ["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2",
| |
| ["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2",
| |
| },
| |
| {
| |
| ["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd",
| |
| ["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː",
| |
| },
| |
| {
| |
| [spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3"
| |
| },
| |
| {
| |
| ["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp",
| |
| ["đđ"] = "ʔt", ["tđ"] = "ʔt",
| |
| ["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk",
| |
| ["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ",
| |
| ["mn"] = "mnː", ["mʔk"] = "mkː",
| |
| ["(p[msɕ])"] = "%1ː", ["pr"] = "px",
| |
| ["b([sɕ])"] = "p%1",
| |
| ["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː",
| |
| ["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː",
| |
| ["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː",
| |
| ["ʦxv"] = "ʦxw", ["đx"] = "tːx",
| |
| ["(ð[mꬶ])"] = "%1ː",
| |
| ["ꝺx"] = "ðx",
| |
| ["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː",
| |
| ["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː",
| |
| ["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx",
| |
| ["(ł[mnꬶ])"] = "%1ː",
| |
| ["(ꬶ[msɕl])"] = "%1ː",
| |
| ["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł",
| |
| ["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv",
| |
| },
| |
| {
| |
| ["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
| |
| ["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1) .. s2 end,
| |
| ["ɑ$"] = "a",
| |
| },
| |
| {
| |
| ["į"] = "j", ["l(ʦx)v"] = "ɬ%1w",
| |
| ["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered)
| |
| },
| |
| { --undo ligatures
| |
| ["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s",
| |
| ["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ"
| |
| },
| |
| {
| |
| ["·"] = "", --remove morpheme separator
| |
| },
| |
| }
| |
|
| |
| local wrules = {
| |
| { --ligatures and ł
| |
| ["t[ṡɕ]"] = "ʨ", ["ṡ"] = "ɕ", ["į"] = "j", ["dj"] = "ʥ",
| |
| ["ḍ"] = "ð", ["dl"] = "ł", ["kj"] = "c", ["ḥ"] = "ʔ",
| |
| ["nj"] = "ɲ", ["ġ"] = "x", ["ts"] = "ʦ", ["g"] = "ɡ", -- IPA g
| |
| },
| |
| { --long consonants
| |
| ["mm"] = "mː", ["bb"] = "pː", ["vv"] = "wː", ["nn"] = "nː",
| |
| ["dʥ"] = "ʥː", ["dd"] = "tː", ["ðð"] = "ðː", ["ss"] = "sː",
| |
| ["ɕɕ"] = "ɕː", ["rr"] = "rː", ["ll"] = "lː", ["ɡɡ"] = "kː",
| |
| ["xx"] = "xː", ["nɡ"] = "ŋː", ["hh"] = "hː", ["ʔʔ"] = "ʔː",
| |
| ["nɲ"] = "ɲː", ["hl"] = "ɬː",
| |
| },
| |
| { --[[default all consonants to unstressed. [] with stroke and ꬶ (U+AB36)
| |
| to tell apart natural and stress-borne]]
| |
| ["p"] = "ƀ", ["d"] = "ꝺ", ["t"] = "đ",
| |
| ["ɡ"] = "ɣ", ["k"] = "ꬶ",
| |
| },
| |
| { --default all vowels as open (open-closed distinctions are computed later)
| |
| ["a"] = "ɑ", ["ả"] = "æː",
| |
| ["ę"] = "æ",
| |
| ["ẻ"] = "eː",
| |
| ["ỉ"] = "iː",
| |
| ["o"] = "ɔ", ["ỏ"] = "ʊː",
| |
| ["ủ"] = "uː",
| |
| ["ỷ"] = "yː",
| |
| ["ů"] = "ø", ["ẻu"] = "øː",
| |
| ["õ"] = "ɔ̃", ["õu"] = "ɔ̃ː̃",
| |
| },
| |
| {
| |
| ["^(ˈ)ꬶ([" .. front_vowel .. "])"] = "%1c%2", --word-initial [k] palatalizes before front-vowels
| |
| ["^(ˈ[ƀđꬶc])"] = "%1ʰ", --voiceless stops word-initially become aspirated
| |
| ["^(.*·ˈ[ƀđꬶc])"] = "%1⁽ʰ⁾",
| |
| ["^(ˈ)ɣj([" .. front_vowel .. "])"] = "%1ʣ%2", --<gį> word-initially and before front vowels is pronounced [d͡z]
| |
| ["^(ˈ)ɣj([" .. back_vowel .. "])"] = "%1ɟ%2",
| |
| ["^(ˈ)ɣ([" .. front_vowel .. "])"] = "%1ɟ%2",
| |
| },
| |
| {
| |
| ["ˈƀ"] = "ˈp", ["ˈđ"] = "ˈt", ["ˈꬶ"] = "ˈk", ["ˈꝺ"] = "ˈd",
| |
| ["đi"] = "ʨi", ["ꝺi"] = "ʥi", ["ɣi"] = "ɉi", ["ɣj"] = "jː", ["ɣjː"] = "ɟː",
| |
| },
| |
| {
| |
| [spat1] = "%1ʔ%3", [spat2] = "%1k%3", [spat3] = "%1p%3", [spat4] = "%1t%3", [spat5] = "%1ð%3"
| |
| },
| |
| {
| |
| ["ƀƀ"] = "ʔp", ["pƀ"] = "ʔp",
| |
| ["đđ"] = "ʔt", ["tđ"] = "ʔt",
| |
| ["ꬶꬶ"] = "ʔk", ["kꬶ"] = "ʔk",
| |
| ["bm"] = "ʔp̚m", ["ꝺn"] = "ʔt̚n", ["ꬶn"] = "ʔk̚ŋ",
| |
| ["mn"] = "mnː", ["mʔk"] = "mkː",
| |
| ["(p[msɕ])"] = "%1ː", ["pr"] = "px",
| |
| ["b([sɕ])"] = "p%1",
| |
| ["nꬶ"] = "ŋk", ["([ðđʦłɕꬶ])v"] = "%1wː",
| |
| ["đn"] = "tnː", ["đr"] = "tx", ["đꬶv"] = "tkwː",
| |
| ["(ʦ[đlmn])"] = "%1ː", ["ʦꬶv"] = "ʦkwː",
| |
| ["ʦxv"] = "ʦxw", ["đx"] = "tːx",
| |
| ["(ð[mꬶ])"] = "%1ː",
| |
| ["ꝺx"] = "ðx",
| |
| ["(sk[l])"] = "%1ː", ["sʔk"] = "skː", ["sxv"] = "sxwː",
| |
| ["([lr])ʔ([ptk])"] = "%1%2ː", ["rv"] = "rwː",
| |
| ["lʦ(x?)v"] = "ɬʦ%1w", ["lʦx"] = "ɬʦx",
| |
| ["(ł[mnꬶ])"] = "%1ː",
| |
| ["(ꬶ[msɕl])"] = "%1ː",
| |
| ["ꬶsꬶ"] = "kskː", ["ꬶsl"] = "ksł",
| |
| ["ɣ([mn])"] = "ŋ%1ː", ["ɣ([vsl])"] = "kv",
| |
| },
| |
| {
| |
| ["(" .. vowel .. "*)(" .. consonant .. consonant .. ")"] = function(s1, s2) return open_to_closed(s1,"w") .. s2 end,
| |
| ["(" .. vowel .. "*)(" .. consonant .. ")$"] = function(s1, s2) return open_to_closed(s1,"w") .. s2 end,
| |
| ["ɑ$"] = "a",
| |
| },
| |
| {
| |
| ["į"] = "j", ["l(ʦx)v"] = "ɬ%1w",
| |
| ["(" .. vowel .. ")đ$"] = "%1ʔ%1", -- -Vt becomes -VʔV (or -Vht, not considered)
| |
| },
| |
| { --undo ligatures
| |
| ["ʨ"] = "t͡ɕ", ["ʥ"] = "d͡ʑ", ["ł"] = "tɬ", ["ʣ"] = "d͡z", ["ʦ"] = "t͡s",
| |
| ["ƀ"] = "p", ["ꝺ"] = "d", ["đ"] = "t", ["ꬶ"] = "ɡ", ["ɉ"] = "ɟ"
| |
| },
| |
| {
| |
| ["·"] = "", --remove morpheme separator
| |
| },
| |
| }
| |
|
| |
|
| function export.morphemes(word) | | function export.morphemes(word) |
Line 241: |
Line 70: |
|
| |
|
| function export.crux(term, e, w) | | function export.crux(term, e, w) |
|
| |
| term=mw.ustring.lower(term) | | term=mw.ustring.lower(term) |
| term=export.morphemes(term) | | term=export.morphemes(term) |
| | |
| | local rules = { |
| | {".", { |
| | ["ḍ"] = "ð", ["ṡ"] = "ɕ", |
| | ["ḥ"] = "ʔ", ["į"] = "j", |
| | ["ġ"] = "x", ["g"] = "ɡ", -- IPA g |
| | }}, |
| | {"t[ṡɕ]", "ʨ"}, {"dj", "ʥ"}, {"dl", "ł"}, {"kj", "c"}, {"nj", "ɲ"}, {"ts", "ʦ"}, |
| | |
| | -- long consonants |
| | {"mm", "mː"}, {"bb", "pː"}, {"vv", "wː"}, {"nn", "nː"}, |
| | {"dʥ", "ʥː"}, {"dd", "tː"}, {"ðð", "ðː"}, {"ss", "sː"}, |
| | {"ɕɕ", "ɕː"}, {"rr", "rː"}, {"ll", "lː"}, {"ɡɡ", "kː"}, |
| | {"xx", "xː"}, {"nɡ", "ŋː"}, {"hh", "hː"}, {"ʔʔ", "ʔː"}, |
| | {"nɲ", "ɲː"}, {"hl", "ɬː"}, |
| | |
| | -- default all consonants to unstressed. [] with stroke and ꬶ (U+AB36) to tell apart natural and stress-borne |
| | {"p", "ƀ"}, {"d", "ꝺ"}, {"t", "đ"}, {"ɡ", "ɣ"}, {"k", "ꬶ"}, |
| | |
| | -- default all vowels to open (open-closed distinctions are computed later); long vowels don't have closed counterparts |
| | {"a", "ɑ"}, {"ę", "æ"}, {"o", "ɔ"}, {"ů", "ø"}, {"õ", "ɔ̃"}, |
| | {"ả", "æː"}, {"ẻ", "eː"}, {"ỉ", "iː"}, {"ỏ", "ʊː"}, {"ủ", "uː"}, {"ỷ", "yː"}, {"ẻu", "øː"}, {"õu", "ɔ̃ː̃"}, |
| | |
| | {"^(ˈ)ꬶ([" .. front_vowel .. "])", "%1c%2"}, -- word-initial [k] palatalizes before front-vowels |
| | {"^(ˈ[ƀđꬶc])", "%1ʰ"}, -- voiceless stops word-initially become aspirated |
| | {"^(.*·ˈ[ƀđꬶc])", "%1⁽ʰ⁾"}, -- although after a prefix they may or may not be |
| | {"^(ˈ)ɣj([" .. front_vowel .. "])", "%1ʣ%2"}, -- <gį> word-initially and before front vowels is pronounced [d͡z] |
| | {"^(ˈ)ɣj([" .. back_vowel .. "])", "%1ɟ%2"}, -- and [ɟ] before back vowels |
| | {"^(ˈ)ɣ([" .. front_vowel .. "])", "%1ɟ%2"}, -- ɣ > ɟ / #_[+back] |
| | |
| | -- other stem- and/or word-initial configurations |
| | {"đi", "ʨi"}, {"ꝺi", "ʥi"}, {"ɣi", "ɉi"}, {"ɣj", "jː"}, {"ɣjː", "ɟː"}, |
| | {"ˈƀ", "ˈp"}, {"ˈđ", "ˈt"}, {"ˈꬶ", "ˈk"}, {"ˈꝺ", "ˈd"}, |
| | |
| | -- stressed consonants (coda of stressed syllables without long vowels, diphthongs, or triphthongs) |
| | {spat1, "%1ʔ%3"}, {spat2, "%1k%3"}, {spat3, "%1p%3"}, {spat4, "%1t%3"}, {spat5, "%1ð%3"}, |
| | |
| | -- internal consonant clusters |
| | {"ƀƀ", "ʔp"}, {"pƀ", "ʔp"}, |
| | {"đđ", "ʔt"}, {"tđ", "ʔt"}, |
| | {"ꬶꬶ", "ʔk"}, {"kꬶ", "ʔk"}, |
| | {"bm", "ʔp̚m"}, {"ꝺn", "ʔt̚n"}, {"ꬶn", "ʔk̚ŋ"}, |
| | {"mn", "mnː"}, {"mʔk", "mkː"}, |
| | {"(p[msɕ])", "%1ː"}, {"pr", "px"}, |
| | {"b([sɕ])", "p%1"}, |
| | {"nꬶ", "ŋk"}, {"([ðđʦłɕꬶr])v", "%1wː"}, |
| | {"đn", "tnː"}, {"đr", "tx"}, {"đꬶv", "tkwː"}, |
| | {"(ʦ[đlmn])", "%1ː"}, {"ʦꬶv", "ʦkwː"}, |
| | {"ʦxv", "ʦxw"}, {"đx", "tːx"}, |
| | {"(ð[mꬶ])", "%1ː"}, |
| | {"ꝺx", "ðx"}, |
| | {"(skl)", "%1ː"}, {"sʔk", "skː"}, {"sxv", "sxwː"}, |
| | {"([lr])ʔ([ptk])", "%1%2ː"}, |
| | {"lʦ(x?)v", "ɬʦ%1w"}, {"lʦx", "ɬʦx"}, |
| | {"(ł[mnꬶ])", "%1ː"}, |
| | {"(ꬶ[msɕ])", "%1ː"}, {"ꬶl", "ʔł", {"w"}}, {"ꬶl", "klː"}, |
| | {"ꬶsꬶ", "kskː"}, {"ꬶsl", "ksł"}, |
| | {"ɣ([mn])", "ŋ%1ː"}, {"ɣ([vsl])", "k%1"}, |
| | |
| | -- closed vowels |
| | {"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1, w) .. s2 end, {"w"}}, |
| | {"(" .. vowel .. "*)(" .. consonant .. consonant .. ")", function(s1, s2) return open_to_closed(s1) .. s2 end}, |
| | {"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1, w) .. s2 end, {"w"}}, |
| | {"(" .. vowel .. "*)(" .. consonant .. ")$", function(s1, s2) return open_to_closed(s1) .. s2 end}, |
| | {"ɑ$", "a"}, |
| | {"(" .. vowel .. ")đ$", "%1ʔ%1"}, -- -Vt becomes -VʔV (or -Vht, not considered) |
| | |
| | --undo ligatures |
| | {"ʨ", "t͡ɕ"}, {"ʥ", "d͡ʑ"}, {"ł", "tɬ"}, {"ʣ", "d͡z"}, {"ʦ", "t͡s"}, |
| | {"ƀ", "p"}, {"ꝺ", "d"}, {"đ", "t"}, {"ꬶ", "ɡ"}, {"ɉ", "ɟ"}, |
| | |
| | -- remove morpheme separator and possible double long vowel markers |
| | {"·", ""}, {"ːː", "ː"}, |
| | } |
| | | |
| for _, rule in ipairs(rules) do | | for _, rule in ipairs(rules) do |
| for regex, replacement in pairs(rule) do | | local fordialect = rule[3] |
| term = gsub(term, regex, replacement) | | if not fordialect or m_table.contains(fordialect, "w") then |
| | word = rsub(word, rule[1], rule[2]) |
| end | | end |
| end | | end |