|
|
| (93 intermediate revisions by the same user not shown) |
| Line 1: |
Line 1: |
| local data = {} | | local gsub = mw.ustring.gsub |
|
| |
|
| data.stressed_vowels = {
| | local export = {} |
| ["ả"] = "a", ["a[ui]"] = "a", ["oa"] = "a", ["[eẻę]"] = "e", ["e[iu]"] = "e",
| |
| ["ay"] = "e", ["[iỉ]"] = "i", ["i[aeou]"] = "i", ["[oỏõ]"] = "o", ["[oõ]u"] = "o",
| |
| ["oi"] = "o", ["[uủ]"] = "u", ["u[oi]"] = "u", ["[yỷ]"] = "y", ["ů[ai]?"] = "y", ["ẻu"] = "y", ["ey"] = "y",
| |
| } | |
|
| |
|
| data.digraphs_to_single = {
| | function export.lenition(word) |
| ["ts"] = "ʦ", ["tṡ"] = "ʨ", ["dį"] = "ʥ", ["ng"] = "ŋ", ["nį"] = "ɲ", | | --local word = frame:getParent().args[1] -- for testing |
| ["kį"] = "c", ["gį"] = "ɟ", ["hh"] = "ħ", ["ḍḍ"] = "ð", ["dl"] = "ɬ", ["o̊"] = "ȯ", | | local lenited = "" |
| ["õu"] = "ở", ["ẻu"] = "ử", | | local v = "([aeiouyůõảẻỉỏủỷę·])" |
| } | | local lenition_patterns = { |
| | --geminated voiced stops |
| | ["bb"] = "b", ["dd"] = "d", ["gg"] = "g", ["ġġ"] = "ġ", |
| | --long sonorants |
| | ["gį"] = "į", ["mm"] = "m", ["ll"] = "l", ["nn"] = "n", ["rr"] = "r", |
| | --uvular stops |
| | ["bġ"] = "p", ["pr"] = "p", ["dġ"] = "t", ["tr"] = "t", |
| | --glottalized stops |
| | ["bm"] = "m", ["dn"] = "n", ["kn"] = "ng", |
| | --consonant clusters |
| | ["dl"] = "l", ["([lr])pp"] = "%1p", ["([lr])tt"] = "%1t", ["([lrms])kk"] = "%1k",["ps"] = "bs", ["k([lvs])"] = "g%1", |
| | --voiced consonants (▫, arbitrary character to compute diphthongs further down) |
| | ["vv"] = "ų", ["d([aảou])"] = "l%1", ["(.)[dġ]([ieůy])"] = "%1▫%2", [v.."[vųbhḥgd]"..v] = "%1▫%2", ["ġ([aou])"] = "vv%1", ["ġo̊"] = "vvo̊", |
| | --Ci# |
| | ["ri"] = "ṡi", ["nįi"] = "gįi", ["hhį"] = "ṡ", |
| | --long voiceless consonants |
| | ["ḍb"] = "p", ["ḍḍ"] = "hh", ["ḍg"] = "k", |
| | --semi-vowels |
| | ["[ou]ų"] = "ů▫", ["ių"] = "y▫", ["[óú]ų"] = "ẻu▫", ["íų"] = "ỷ▫", |
| | } |
| | |
| | for regex, repl in pairs(lenition_patterns) do |
| | if word:match("·.*·") then |
| | lenited = gsub(word, "·" .. regex .. "·", repl, 1) |
| | else |
| | lenited = gsub(word, regex, repl, 1) |
| | end |
| | |
| | if lenited ~= word then return lenited end |
| | end |
| | |
| | return word |
| | end |
|
| |
|
| data.lenition_patterns = {
| | export.diphthongs = { |
| ["bb"] = "b", ["dd"] = "d", ["gg"] = "g", ["ɟ"] = "į", ["mm"] = "m", ["ll"] = "l", ["nn"] = "n", | | ["[aę]▫[aę]"] = "ả", ["e▫e"] = "ẻ", ["i▫i"] = "ỉ", ["u▫[yuů]"] = "ủ", ["[yů]▫u"] = "ủ", |
| ["rr"] = "r", ["bġ"] = "p", ["pr"] = "p", ["dġ"] = "t", ["tr"] = "t", ["ḍb"] = "p", ["ð"] = "hh", | | ["a▫e"] = "ai", ["a▫o"] = "au", ["ę▫[uů]"] = "ay", |
| ["ḍg"] = "k", ["bm"] = "m", ["dn"] = "n", ["kn"] = "ng", ["([lr])pp"] = "%1p", ["([lr])tt"] = "%1t", ["([lrms])kk"] = "%1k", | | ["ę▫[ei]"] = "ei", ["ę▫o"] = "eu", |
| ["k([lvs])"] = "g%1", ["ps"] = "bs", ["[vųbhḥg]"] = "", ["d[aou]"] = "l", ["ġ[aou]"] = "vv", ["[dġ][eůy]"] = "", ["[rġ]i"] = "ṡi", | | ["e▫[aę]"] = "ia", ["e▫o"] = "io", ["i▫y"] = "iů", |
| ["di"] = "", ["ɲi"] = "ɟi", ["ħį"] = "ṡ", ["[ou]ų"] = "ů", ["ɬ"] = "l", | | ["[uo]▫e"] = "oi", ["o▫ů"] = "ou", |
| | ["[yů]▫o"] = "uo", ["o▫o"] = "ỏ", |
| | ["y▫y"] = "ỷ", ["ů▫ů"] = "ẻu", |
| | ["ẻu▫i"] = "ůbi", |
| | ["([ae])▫ů"] = "%1y", ["([iou])▫ę"] = "%1a", |
| | ["[yů]▫[aę]"] = "ůa", ["[yů]▫[ei]"] = "ůi", -- to screen out stressed vs. unstressed |
| } | | } |
| | |
| | function export.coalescence(word, stress) |
| | for regex, repl in pairs(export.diphthongs) do |
| | word = gsub(word, regex, repl) |
| | end |
| | |
| | if stress then |
| | word = gsub(word, "ů[aęei]", "ỷ") |
| | end |
| | |
| | word = gsub(word, "▫", "") |
| | |
| | --[[for regex, repl in pairs(export.triphthong_coalescence) do |
| | word = gsub(word, regex, repl) |
| | end]] |
| | |
| | return word |
| | end |
|
| |
|
| --[[
| | export.stressed_vowels = { |
| I'm sure someone more experienced than I am (not too difficult to achieve) | | ["a"] = "a", ["ả"] = "a", ["au"] = "a", ["ai"] = "a", ["oa"] = "a", |
| would do this more elegantly and more efficient, but I couldn't come up with anything better.
| | ["e"] = "e", ["ẻ"] = "e", ["ę"] = "e", ["ei"] = "e", ["ay"] = "e", ["eu"] = "e", |
| ]] | | ["i"] = "i", ["ỉ"] = "i", ["ia"] = "i", ["ie"] = "i", ["io"] = "i", ["iu"] = "i", |
| | | ["o"] = "o", ["ỏ"] = "o", ["õ"] = "o", ["õu"] = "o", ["oi"] = "o", ["ou"] = "o", |
| data.endings = {}
| | ["u"] = "u", ["ủ"] = "u", ["uo"] = "u", ["ui"] = "u", |
| -- [gender]-[declension]-[subgroup]
| | ["y"] = "y", ["ỷ"] = "y", ["ů"] = "y", ["ẻu"] = "y", ["ey"] = "y", ["ůa"] = "y", ["ůi"] = "y", |
| data.endings.a = {
| |
| ["m"] = "i-a-mV", ["n"] = "i-a-nV", ["p"] = "i-a-pV", ["b"] = "i-a-bV", ["t"] = "i-a-tV", | |
| ["r"] = "i-a-rV", ["vv"] = "i-a-vvV", ["lk"] = "i-a-CkV", ["rk"] = "i-a-CkV", ["sk"] = "i-a-CkV", | |
| } | | } |
|
| |
|
| -- [gender]-[declension]-[final vowel(s)]-[subgroup]
| | export.digraphs_to_single = { |
| data.endings.e = {
| | ["ts"] = "ʦ", ["tṡ"] = "ʨ", ["dį"] = "ʥ", ["ng"] = "ŋ", ["nį"] = "ɲ", |
| ["na"] = "i-e-ae-n", ["ne"] = "i-e-ae-n", | | ["kį"] = "c", ["gį"] = "ɟ", ["hh"] = "ħ", ["ḍḍ"] = "ð", ["dl"] = "ɬ", ["o̊"] = "ȯ", |
| ["ba"] = "i-e-ae-b", ["be"] = "i-e-ae-b",
| | ["õu"] = "ở", ["ẻu"] = "ử", |
| ["ka"] = "i-e-ae-k", ["ke"] = "i-e-ae-k",
| |
| ["ma"] = "i-e-ae-m", ["me"] = "i-e-ae-m",
| |
| ["va"] = "i-e-ae-v", ["ve"] = "i-e-ae-v",
| |
| ["tsa"] = "i-e-ae-ts", ["tse"] = "i-e-ae-ts",
| |
| ["ta"] = "i-e-ae-t", ["te"] = "i-e-ae-t",
| |
| ["sa"] = "i-e-ae-s", ["se"] = "i-e-ae-s",
| |
| ["la"] = "i-e-ae-l", ["le"] = "i-e-ae-l",
| |
| ["ha"] = "i-e-ae-h/ġ", ["he"] = "i-e-ae-h/ġ", ["ġa"] = "i-e-ae-h/ġ", ["ġe"] = "i-e-ae-h/ġ",
| |
| ["a"] = "i-e-ae", ["e"] = "i-e-ae",
| |
| ["mi"] = "i-e-i-m",
| |
| ["pi"] = "i-e-i-p/b", ["bi"] = "i-e-i-p/b",
| |
| ["vi"] = "i-e-i-v", | |
| ["ni"] = "i-e-i-n",
| |
| ["ri"] = "i-e-i-r",
| |
| ["li"] = "i-e-i-l",
| |
| ["ki"] = "i-e-i-k/g", ["gi"] = "i-e-i-k/g",
| |
| ["i"] = "i-e-i", | |
| ["o"] = "i-e-o",
| |
| -- U = front vowel
| |
| ["mu"] = "i-e-U-m/n", ["my"] = "i-e-U-m/n", ["mů"] = "i-e-U-m/n", ["nu"] = "i-e-U-m/n", ["ny"] = "i-e-U-m/n", ["nů"] = "i-e-U-m/n",
| |
| ["ku"] = "i-e-U-k", ["ky"] = "i-e-U-k", ["ků"] = "i-e-U-k",
| |
| ["u"] = "i-e-U", ["y"] = "i-e-U", ["ů"] = "i-e-U",
| |
| } | | } |
|
| |
|
| -- [gender]-[declension]-[subgroup]
| | export.triphthong_coalescence = { |
| data.endings.i = {
| | ["([aoue])i([aoue])"] = "%1į%2", ["([aoue])ii"] = "%1gįi", ["([aoue])ỉ"] = "%1gįi", ["ai[yů]"] = "ey", |
| ["m"] = "i-i-mV", ["n"] = "i-i-nV", ["um"] = "i-i-uomnV", ["om"] = "i-i-uomnV", ["un"] = "i-i-uomnV", ["on"] = "i-i-uomnV", | | ["([eo])i[yů]"] = "%1įů", |
| ["v"] = "i-i-vV", ["g"] = "i-i-gV", ["s"] = "i-i-gV", ["r"] = "i-i-rV", ["h"] = "i-i-hV", ["k"] = "i-i-kV",
| | ["ue([aoue])"] = "uį%1", ["uẻ"] = "uįe", ["uei"] = "ugįi", ["u[ei][yů]"] = "ůgįů", |
| ["l"] = "i-i-lV", ["i"] = "i-i-i", -- else -V | | ["ie([aouy])"] = "igį%1", ["iee"] = "iẻ", ["ieů"] = "igįů", |
| }
| | ["ůi([aoe])"] = "ůį%1", ["ůi[uyů]"] = "ůgįů", ["ůii"] = "ůgįi", ["ůỉ"] = "ugįi", |
| | | ["ay([ae])"] = "ęm%1", ["ay([oi])"] = "ęb%1", ["ay[uyů]"] = "ębů", ["aỷ"] = "ębů", |
| -- [gender]-[declension]-[final vowel(s)]-[subgroup]
| | ["uo([aue])"] = "um%1", ["uo([oi])"] = "ub%1", ["uỏ"] = "ubo", ["uo[yů]"] = "ůbů", |
| data.endings.o = {
| | ["au([aueů])"] = "am%1", ["aủ"] = "amu", ["au([oi])"] = "ab%1", ["auy"] = "amů", |
| ["ba"] = "i-o-ae-b/t", ["be"] = "i-o-ae-b/t", ["ta"] = "i-o-ae-b/t", ["te"] = "i-o-ae-b/t", | | ["o[ua]([aueů])"] = "om%1", ["oả"] = "oma", ["oủ"] = "omu", ["o[ua]([oi])"] = "ob%1", ["o[ua]y"] = "omů", |
| ["va"] = "i-o-ae-v/m/n", ["ve"] = "i-o-ae-v/m/n", ["ma"] = "i-o-ae-v/m/n", ["me"] = "i-o-ae-v/m/n", ["na"] = "i-o-ae-v/m/n", ["ne"] = "i-o-ae-v/m/n",
| | ["e[uy]([ae])"] = "ům%1", ["e[uy]([oi])"] = "ůb%1", ["e[uy][uyů]"] = "ůbů", ["e[ủỷ]"] = "ůbů", |
| ["ra"] = "i-o-ae-r/h/g", ["re"] = "i-o-ae-r/h/g", ["ha"] = "i-o-ae-r/h/g", ["he"] = "i-o-ae-r/h/g", ["ga"] = "i-o-ae-r/h/g", ["ge"] = "i-o-ae-r/h/g", | | ["iu([aeů])"] = "ivv%1", ["iu([oui])"] = "ib%1", ["iủ"] = "ibu", ["iuy"] = "ivvů", |
| ["ka"] = "i-o-ae-k", ["ke"] = "i-o-ae-k", | | ["ả([aoue])"] = "ęį%1", ["ải"] = "ęgįi", ["[ảẻ][yů]"] = "ey", |
| ["la"] = "i-o-ae-l", ["le"] = "i-o-ae-l",
| | ["ỏ([aueů])"] = "om%1", ["ỏ([oi])"] = "ob%1", ["ỏy"] = "omů", |
| ["mi"] = "i-o-i-m/n/k/g", ["ni"] = "i-o-i-m/n/k/g", ["ki"] = "i-o-i-m/n/k/g", ["gi"] = "i-o-i-m/n/k/g",
| | ["ủ([ae])"] = "um%1", ["ủ([oui])"] = "ub%1", ["ủ[ůy]"] = "ůbů", |
| ["lki"] = "i-o-i-lk", | | ["ẻu([ae])"] = "ům%1", ["ẻu([oi])"] = "ůb%1", ["ẻu[uyů]"] = "ůbů", ["ẻủ"] = "ůbů", |
| ["rki"] = "i-o-i-rk",
| | ["ẻ([ao])"] = "evv%1", ["ẻ([ei])"] = "egį%1", |
| ["ski"] = "i-o-i-sk", ["sġi"] = "i-o-i-sk",
| | ["ỉ([aou])"] = "ivv%1", ["ỉ([ey])"] = "igį%1", ["ỉi"] = "iddįi", ["iỉ"] = "iddįi", ["ỉů"] = "igįy", |
| ["li"] = "i-o-i-l",
| | ["ỷ([ae])"] = "ym%1", ["ỷ([oi])"] = "yb%1", ["ỷ[uůy]"] = "yby", ["yỷ"] = "yby", |
| ["ri"] = "i-o-i-r/h", ["hi"] = "i-o-i-r/h",
| |
| ["ṡi"] = "i-o-i-ṡ", ["hhįi"] = "i-o-i-ṡ",
| |
| ["b"] = "i-o-ouyů-b/p/t/d", ["p"] = "i-o-ouyů-b/p/t/d", ["t"] = "i-o-ouyů-b/p/t/d", ["d"] = "i-o-ouyů-b/p/t/d", | |
| ["v"] = "i-o-ouyů-v/m/n", ["m"] = "i-o-ouyů-v/m/n", ["n"] = "i-o-ouyů-v/m/n",
| |
| ["r"] = "i-o-ouyů-r/h/g/k", ["h"] = "i-o-ouyů-r/h/g/k", ["g"] = "i-o-ouyů-r/h/g/k", ["k"] = "i-o-ouyů-r/h/g/k",
| |
| ["l"] = "i-o-ouyů-l", | |
| ["a"] = "i-o-ae", ["e"] = "i-o-ae",
| |
| ["i"] = "i-o-i", --else -ouyů
| |
| }
| |
| | |
| | |
| data.endings.u = {
| |
| ["ba"] = "i-u-aei-b/p/t/d", ["pa"] = "i-u-aei-b/p/t/d", ["ta"] = "i-u-aei-b/p/t/d", ["da"] = "i-u-aei-b/p/t/d",
| |
| ["be"] = "i-u-aei-b/p/t/d", ["pe"] = "i-u-aei-b/p/t/d", ["te"] = "i-u-aei-b/p/t/d", ["de"] = "i-u-aei-b/p/t/d", | |
| ["bi"] = "i-u-aei-b/p/t/d", ["pi"] = "i-u-aei-b/p/t/d", ["ti"] = "i-u-aei-b/p/t/d", ["di"] = "i-u-aei-b/p/t/d",
| |
| ["va"] = "", ["ma"] = "", ["na"] = "",
| |
| ["ve"] = "", ["me"] = "", ["ne"] = "",
| |
| ["vi"] = "", ["mi"] = "", ["ni"] = "", | |
| ["ra"] = "", ["ha"] = "", ["ga"] = "", ["ka"] = "",
| |
| ["re"] = "", ["he"] = "", ["ge"] = "", ["ke"] = "",
| |
| ["ri"] = "", ["hi"] = "", ["gi"] = "", ["ki"] = "",
| |
| ["la"] = "", ["le"] = "", ["li"] = "", | |
| ["a"] = "", ["e"] = "", ["i"] = "",
| |
| ["bo"] = "", ["bu"] = "", ["by"] = "", ["bů"] = "", | |
| ["po"] = "", ["pu"] = "", ["py"] = "", ["pů"] = "",
| |
| ["to"] = "", ["tu"] = "", ["ty"] = "", ["tů"] = "", | |
| ["do"] = "", ["du"] = "", ["dy"] = "", ["dů"] = "",
| |
| ["vo"] = "", ["vu"] = "", ["vy"] = "", ["vů"] = "", | |
| ["mo"] = "", ["mu"] = "", ["my"] = "", ["mů"] = "",
| |
| ["no"] = "", ["nu"] = "", ["ny"] = "", ["nů"] = "", | |
| ["ro"] = "", ["ru"] = "", ["ry"] = "", ["rů"] = "",
| |
| ["ho"] = "", ["hu"] = "", ["hy"] = "", ["hů"] = "", | |
| ["go"] = "", ["gu"] = "", ["gy"] = "", ["gů"] = "", | |
| ["ko"] = "", ["ku"] = "", ["ky"] = "", ["ků"] = "",
| |
| ["lo"] = "", ["lu"] = "", ["ly"] = "", ["lů"] = "", | |
| ["o"] = "", ["u"] = "", ["y"] = "", ["ů"] = "",
| |
|
| |
| } | | } |
|
| |
|
| return data | | return export |