Module:mg-pron: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
(87 intermediate revisions by the same user not shown) | |||
Line 8: | Line 8: | ||
local gsplit = mw.text.gsplit | local gsplit = mw.text.gsplit | ||
local | local N = u(0x0303) -- COMBINING TILDE, ̃ | ||
local NONSYLL = u(0x032F) -- COMBINING INVERTED BREVE BELOW, ̯ | local NONSYLL = u(0x032F) -- COMBINING INVERTED BREVE BELOW, ̯ | ||
local ADV = u(0x031F) -- COMBINING PLUS SIGN BELOW, ̟ | local ADV = u(0x031F) -- COMBINING PLUS SIGN BELOW, ̟ | ||
local RET = u(0x0320) -- COMBINING MINUS SIGN BELOW, ̠ | local RET = u(0x0320) -- COMBINING MINUS SIGN BELOW, ̠ | ||
local CEN = u(0x0308) -- COMBINING DIAERESIS, ̈ | local CEN = u(0x0308) -- COMBINING DIAERESIS, ̈ | ||
local ACUTE = u(0x0301) -- COMBINING ACUTE ACCENT, ́ | |||
local velar = "[kɡɣɫw]"; local palatal = "[ɲʧʃʎ]" | local velar = "[kɡɣɫw]"; local palatal = "[ɲʧʃʎ]" | ||
local consonants = "[ | local consonants = "[bkdhjlmnɲprɾstʃθβðɡɣzʧɫʎɓ]" | ||
local vowels = "[ | local vowels = "[áéíóúaɑɐeɪɛiɔʊouwJąĄ" .. ADV .. RET .. CEN .. ACUTE .. "]" | ||
local voiced = "[ | local voiced = "[bdhʎjmnɲβðɡɣzɓ]" | ||
local function same(foo, bar) | local function same(foo, bar) | ||
Line 23: | Line 24: | ||
foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter | foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter | ||
return foo == bar and true or false | return foo == bar and true or false | ||
end | |||
local function remove_acute(str, with_stress) | |||
str = mw.ustring.toNFD(str) | |||
str = gsub(str, ACUTE, "") | |||
str = mw.ustring.toNFC(str) | |||
return str == "Ą" and "ą" or (with_stress and "ˈ" or "") .. str | |||
end | end | ||
Line 30: | Line 39: | ||
local prelims = { | local prelims = { | ||
{"r", "ɾ"}, {"ch", "ʧ"}, {"qu", "k"}, {"il", "ʎ"}, {"ñ", "ɲ"}, {"c", "k"}, | {"r", "ɾ"}, {"ch", "ʧ"}, {"qu", "k"}, {"il", "ʎ"}, {"ñ", "ɲ"}, {"c", "k"}, --{"an$", "ɐn"}, | ||
{"[bv]", "β"}, {"gu?", "ɣ"}, {"d", "ð"}, | {"[bv]", "β"}, {"gu?", "ɣ"}, {"d", "ð"}, {"ʎ$", "il"}, {"an$", "ą"}, {"án$", "Ą"}, | ||
{"z", "θ"}, {"x", "ʃ"}, {"(" .. vowels .. ")i(" .. vowels .. ")", "%1j%2"}, | {"z", "θ"}, {"x", "ʃ"}, {"(" .. vowels .. ")i(" .. vowels .. ")", "%1j%2"}, {"ṡ", "z"}, | ||
{"(" .. vowels .. ")u", "%1w"}, {"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")i", "%1J"}, {" | {"(" .. vowels .. ")u", "%1w"}, {"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")i", "%1J"}, {"i(" .. vowels .. ")", "J%1"}, | ||
} | } | ||
Line 43: | Line 52: | ||
term = gsub(term, "·(" .. consonants .. ")·", "%1·") | term = gsub(term, "·(" .. consonants .. ")·", "%1·") | ||
local syll = split(term, "·") | local syll = split(term, "·"); local noa = {} | ||
local monosyll = {["n"] = "ˈ", ["pron"] = "", ["particle"] = "(ˈ)", ["prep"] = "(ˈ)", ["conj"] = "(ˈ)"} | local monosyll = {["n"] = "ˈ", ["pron"] = "", ["particle"] = "(ˈ)", ["prep"] = "(ˈ)", ["conj"] = "(ˈ)"} | ||
if #syll == 1 then | if #syll == 1 then | ||
if not pos then error('Part of speech needed to determine stress') end | if not pos then error('Part of speech needed to determine stress') end | ||
syll[1] = monosyll[pos] .. syll[1] | syll[1] = remove_acute(syll[1]) | ||
syll[1] = (monosyll[pos] or "(ˈ)") .. syll[1] | |||
else | else | ||
syll[#syll - 1] = "ˈ" .. syll[#syll - 1] | if match(term, "[áéíóúĄ]") then | ||
for _, s in ipairs(syll) do | |||
s = remove_acute(s, match(s, "[áéíóúĄ]") and true or false) | |||
table.insert(noa, s) | |||
end | |||
else | |||
syll[#syll - 1] = "ˈ" .. syll[#syll - 1] | |||
end | |||
end | end | ||
return table.concat(syll, "·") | return table.concat(#noa > 1 and noa or syll, "·") | ||
end | end | ||
local rules = { | local rules = { | ||
{"([ɾs])·([ɾs])", function(s1, s2) return same(s1, s2) and " | -- rr and ss clusters, preaspirated | ||
{".$", {["a"] = "ɐ", ["e"] = "ɪ", ["o"] = "ʊ", ["n"] = | {"([ɾs])·(ˈ?)([ɾs])", function(s1, st, s2) return same(s1, s2) and "·" .. st .. "ʰ" .. s1 or s1 .. st .. s2 end}, | ||
{".$", {["a"] = "ɐ", ["e"] = "ɪ", ["o"] = "ʊ", ["n"] = N}}, | |||
{"l·", "ɫ·"}, {"l$", "ɫ"}, -- velarized [l] | {"l·", "ɫ·"}, {"l$", "ɫ"}, -- velarized [l] | ||
{"a(" .. velar .. ")", "ɑ" .. ADV .. "%1"}, {"(" .. velar .. ")a", "%1ɑ" .. ADV}, -- [a] retracts next to velar consonants | {"a(" .. velar .. ")", "ɑ" .. ADV .. "%1"}, {"(" .. velar .. ")a", "%1ɑ" .. ADV}, -- [a] retracts next to velar consonants | ||
{"a(" .. palatal .. ")", "a" .. RET .. "%1"}, {"(" .. palatal .. "a)", "%1" .. RET}, -- and advances next to palatal | {"a(" .. palatal .. ")", "a" .. RET .. "%1"}, {"(" .. palatal .. "a)", "%1" .. RET}, -- and advances next to palatal | ||
{"s·(" .. voiced .. ")", "z·%1"}, {"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"}, -- [s]-voicing | {"s·(" .. voiced .. ")", "z·%1"}, {"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"}, {"%ss", "‿z"}, -- [s]-voicing | ||
{" | {"[Jj]m$", "y" .. N}, {"m$", "u" .. N}, {"n·", N .. "·"}, -- [m] and [n] behave as nasalizers in codas and word-finally | ||
{"e(" .. consonants .. ")·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. consonants .. ")·(" .. consonants .. ")", "ɔ%1·%2"}, | {"e(" .. N .. "?)·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. N .. "?)·(" .. consonants .. ")", "ɔ%1·%2"}, | ||
{"e(" .. consonants .. ")?·(" .. consonants .. "[ | {"e(" .. consonants .. ")·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. consonants .. ")·(" .. consonants .. ")", "ɔ%1·%2"}, | ||
{"e(" .. N .. "?)?·(" .. consonants .. "[aɐɑąĄ])", "ɛ%1·%2"}, {"o(" .. N .. "?)?·(" .. consonants .. "[aɐɑąĄ])", "ɔ%1·%2"}, | |||
{"e(" .. N .. "?" .. consonants .. ")?·(" .. consonants .. "[aɐɑąĄ])", "ɛ%1·%2"}, {"o(" .. N .. "?" .. consonants .. ")?·(" .. consonants .. "[aɐɑąĄ])", "ɔ%1·%2"}, | |||
{"(" .. vowels .. "i)", "%1" .. NONSYLL}, {"w", "u" .. NONSYLL}, {"J", "i" .. NONSYLL}, | {"(" .. vowels .. "i)", "%1" .. NONSYLL}, {"w", "u" .. NONSYLL}, {"J", "i" .. NONSYLL}, | ||
Line 73: | Line 94: | ||
{"(" .. consonants .. ")(·ˈ?)([βðɣ])", function(foo, bar, baz) return foo .. bar .. desoften[baz] end}, | {"(" .. consonants .. ")(·ˈ?)([βðɣ])", function(foo, bar, baz) return foo .. bar .. desoften[baz] end}, | ||
{"ʧ", "(t)ʃ"}, {"a([^" .. RET .. "])", "a" .. CEN .. "%1"}, {" | {"ʧ", "(t)ʃ"}, {"·ˈ", "ˈ"}, {"·", "."}, | ||
--{"a([^" .. RET .. "])", "a" .. CEN .. "%1"}, | |||
--{"([ɑa][" .. CEN .. RET .. ADV .. "])(ː?)" .. N, "%1" .. N .. "%2"}, {"a" .. CEN .. N, "a" .. RET .. N}, | |||
{"i" .. NONSYLL, "j"}, {"u" .. NONSYLL, "w"}, | |||
} | } | ||
Line 89: | Line 115: | ||
end | end | ||
term = gsub(term, "[ɡɣ]([ei])", (g and "j" or "h") .. "%1") | |||
term = gsub(term, "([ɡɣ])", g and "%1" or "h") | term = gsub(term, "([ɡɣ])", g and "%1" or "h") | ||
term = gsub(term, "hɑ" .. ADV, "ha" .. CEN) | term = gsub(term, "hɑ" .. ADV, "ha" .. CEN) | ||
term = gsub(term, "(ˈ..?a" .. CEN .. ")", "% | term = gsub(term, "a" .. CEN .. "(" .. velar .. ")", "ɑ" .. ADV .. "%1") | ||
term = gsub(term, "(ˈ.?.?[aɑ])([" .. ADV .. RET .. CEN .. "]?)([^.]*)", "%1%2ː%3") | |||
term = gsub(term, "ːⁿ", "ⁿː") | |||
term = gsub(term, "a([^" .. CEN .. ADV .. RET .. "])", "a" .. CEN .. "%1") | |||
term = gsub(term, "ą", "a" .. CEN .. N); term = gsub(term, "Ą", "a" .. CEN .. N .. "ː") | |||
term = gsub(term, "ɓ", "β") -- soft mutation | |||
return term | return term | ||
Line 130: | Line 162: | ||
[1] = { default = mw.title.getCurrentTitle().text }, -- PAGENAME | [1] = { default = mw.title.getCurrentTitle().text }, -- PAGENAME | ||
[2] = {}, | [2] = {}, | ||
["pos"] = {}, | |||
} | } | ||
local args = require("Module:parameters").process(frame:getParent().args, params) | local args = require("Module:parameters").process(frame:getParent().args, params) | ||
local term = frame.args[1] or mw.title.getCurrentTitle().nsText == 'Template' and "gueizuñe" or args[1] | local term = frame.args[1] or mw.title.getCurrentTitle().nsText == 'Template' and "gueizuñe" or args[1] | ||
local pos = frame.args[2] or args[2] | local pos = frame.args[2] or args.pos or args[2] | ||
local is_g = match(term, "g") | local is_g = match(term, "g") | ||
Line 141: | Line 174: | ||
if is_g then | if is_g then | ||
ipa = ipa .. "\n* " .. line_format(separate_word(term, pos, true), {' | ipa = ipa .. "\n* " .. line_format(separate_word(term, pos, true), {'without [[w:gheada|gheada]]'}) | ||
end | end | ||
Latest revision as of 07:40, 23 October 2023
- The following documentation is located at Module:mg-pron/doc.[edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
local sub = mw.ustring.sub
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit
local N = u(0x0303) -- COMBINING TILDE, ̃
local NONSYLL = u(0x032F) -- COMBINING INVERTED BREVE BELOW, ̯
local ADV = u(0x031F) -- COMBINING PLUS SIGN BELOW, ̟
local RET = u(0x0320) -- COMBINING MINUS SIGN BELOW, ̠
local CEN = u(0x0308) -- COMBINING DIAERESIS, ̈
local ACUTE = u(0x0301) -- COMBINING ACUTE ACCENT, ́
local velar = "[kɡɣɫw]"; local palatal = "[ɲʧʃʎ]"
local consonants = "[bkdhjlmnɲprɾstʃθβðɡɣzʧɫʎɓ]"
local vowels = "[áéíóúaɑɐeɪɛiɔʊouwJąĄ" .. ADV .. RET .. CEN .. ACUTE .. "]"
local voiced = "[bdhʎjmnɲβðɡɣzɓ]"
local function same(foo, bar)
foo, bar = mw.ustring.toNFD(foo), mw.ustring.toNFD(bar) -- decompose diacritics
foo, bar = match(foo, "^."), match(bar, "^.") -- sort out the letter
return foo == bar and true or false
end
local function remove_acute(str, with_stress)
str = mw.ustring.toNFD(str)
str = gsub(str, ACUTE, "")
str = mw.ustring.toNFC(str)
return str == "Ą" and "ą" or (with_stress and "ˈ" or "") .. str
end
local export = {}
local desoften = {["β"] = "b", ["ð"] = "d", ["ɣ"] = "ɡ"}
local prelims = {
{"r", "ɾ"}, {"ch", "ʧ"}, {"qu", "k"}, {"il", "ʎ"}, {"ñ", "ɲ"}, {"c", "k"}, --{"an$", "ɐn"},
{"[bv]", "β"}, {"gu?", "ɣ"}, {"d", "ð"}, {"ʎ$", "il"}, {"an$", "ą"}, {"án$", "Ą"},
{"z", "θ"}, {"x", "ʃ"}, {"(" .. vowels .. ")i(" .. vowels .. ")", "%1j%2"}, {"ṡ", "z"},
{"(" .. vowels .. ")u", "%1w"}, {"u(" .. vowels .. ")", "w%1"}, {"(" .. vowels .. ")i", "%1J"}, {"i(" .. vowels .. ")", "J%1"},
}
local function syllabify(term, pos)
term = gsub(term, "(" .. consonants .. "*)(" .. vowels .. "*)", "%1%2·")
term = gsub(term, "··", "·"); term = gsub(term, "·$", "")
term = gsub(term, "·(" .. consonants .. ")(" .. consonants .. ")(" .. vowels .. "*)", "%1·%2%3")
term = gsub(term, "·(" .. consonants .. ")$", "%1")
term = gsub(term, "·(" .. consonants .. ")·", "%1·")
local syll = split(term, "·"); local noa = {}
local monosyll = {["n"] = "ˈ", ["pron"] = "", ["particle"] = "(ˈ)", ["prep"] = "(ˈ)", ["conj"] = "(ˈ)"}
if #syll == 1 then
if not pos then error('Part of speech needed to determine stress') end
syll[1] = remove_acute(syll[1])
syll[1] = (monosyll[pos] or "(ˈ)") .. syll[1]
else
if match(term, "[áéíóúĄ]") then
for _, s in ipairs(syll) do
s = remove_acute(s, match(s, "[áéíóúĄ]") and true or false)
table.insert(noa, s)
end
else
syll[#syll - 1] = "ˈ" .. syll[#syll - 1]
end
end
return table.concat(#noa > 1 and noa or syll, "·")
end
local rules = {
-- rr and ss clusters, preaspirated
{"([ɾs])·(ˈ?)([ɾs])", function(s1, st, s2) return same(s1, s2) and "·" .. st .. "ʰ" .. s1 or s1 .. st .. s2 end},
{".$", {["a"] = "ɐ", ["e"] = "ɪ", ["o"] = "ʊ", ["n"] = N}},
{"l·", "ɫ·"}, {"l$", "ɫ"}, -- velarized [l]
{"a(" .. velar .. ")", "ɑ" .. ADV .. "%1"}, {"(" .. velar .. ")a", "%1ɑ" .. ADV}, -- [a] retracts next to velar consonants
{"a(" .. palatal .. ")", "a" .. RET .. "%1"}, {"(" .. palatal .. "a)", "%1" .. RET}, -- and advances next to palatal
{"s·(" .. voiced .. ")", "z·%1"}, {"(" .. vowels .. ")·s(" .. vowels .. ")", "%1·z%2"}, {"%ss", "‿z"}, -- [s]-voicing
{"[Jj]m$", "y" .. N}, {"m$", "u" .. N}, {"n·", N .. "·"}, -- [m] and [n] behave as nasalizers in codas and word-finally
{"e(" .. N .. "?)·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. N .. "?)·(" .. consonants .. ")", "ɔ%1·%2"},
{"e(" .. consonants .. ")·(" .. consonants .. ")", "ɛ%1·%2"}, {"o(" .. consonants .. ")·(" .. consonants .. ")", "ɔ%1·%2"},
{"e(" .. N .. "?)?·(" .. consonants .. "[aɐɑąĄ])", "ɛ%1·%2"}, {"o(" .. N .. "?)?·(" .. consonants .. "[aɐɑąĄ])", "ɔ%1·%2"},
{"e(" .. N .. "?" .. consonants .. ")?·(" .. consonants .. "[aɐɑąĄ])", "ɛ%1·%2"}, {"o(" .. N .. "?" .. consonants .. ")?·(" .. consonants .. "[aɐɑąĄ])", "ɔ%1·%2"},
{"(" .. vowels .. "i)", "%1" .. NONSYLL}, {"w", "u" .. NONSYLL}, {"J", "i" .. NONSYLL},
{"^(ˈ?)([βðɣ])", function(foo, bar) return foo .. desoften[bar] end},
{"([βðɣ])(·ˈ?)(" .. consonants .. ")", function(foo, bar, baz) return desoften[foo] .. bar .. baz end},
{"(" .. consonants .. ")(·ˈ?)([βðɣ])", function(foo, bar, baz) return foo .. bar .. desoften[baz] end},
{"ʧ", "(t)ʃ"}, {"·ˈ", "ˈ"}, {"·", "."},
--{"a([^" .. RET .. "])", "a" .. CEN .. "%1"},
--{"([ɑa][" .. CEN .. RET .. ADV .. "])(ː?)" .. N, "%1" .. N .. "%2"}, {"a" .. CEN .. N, "a" .. RET .. N},
{"i" .. NONSYLL, "j"}, {"u" .. NONSYLL, "w"},
}
function export.crux(term, pos, g)
term = mw.ustring.lower(term)
for _, repl in ipairs(prelims) do
term = gsub(term, repl[1], repl[2])
end
term = syllabify(term, pos)
for _, rule in ipairs(rules) do
term = gsub(term, rule[1], rule[2])
end
term = gsub(term, "[ɡɣ]([ei])", (g and "j" or "h") .. "%1")
term = gsub(term, "([ɡɣ])", g and "%1" or "h")
term = gsub(term, "hɑ" .. ADV, "ha" .. CEN)
term = gsub(term, "a" .. CEN .. "(" .. velar .. ")", "ɑ" .. ADV .. "%1")
term = gsub(term, "(ˈ.?.?[aɑ])([" .. ADV .. RET .. CEN .. "]?)([^.]*)", "%1%2ː%3")
term = gsub(term, "ːⁿ", "ⁿː")
term = gsub(term, "a([^" .. CEN .. ADV .. RET .. "])", "a" .. CEN .. "%1")
term = gsub(term, "ą", "a" .. CEN .. N); term = gsub(term, "Ą", "a" .. CEN .. N .. "ː")
term = gsub(term, "ɓ", "β") -- soft mutation
return term
end
function IPA_span(items)
local bits = {}
for _, item in ipairs(items) do
local bit = "<span style=\"font-size:110%;font-family:'Gentium','DejaVu Sans','Segoe UI',sans-serif>" .. item.pron .. "</span>"
table.insert(bits, bit)
end
return table.concat(bits)
end
function format_IPA(items)
return "[[w:IPA chart|IPA]]<sup>([[IPA for Modern Gallaecian|key]])</sup>: " .. IPA_span(items)
end
function line_format(pronunciation, register)
local full_pronunciations = {}
local IPA_args = {{pron = '[' .. pronunciation .. ']'}}
table.insert(full_pronunciations, format_IPA(IPA_args))
return "(''" .. table.concat(register, ", ") .. "'')" .. ' ' .. table.concat(full_pronunciations, ' or ')
end
function separate_word(term, pos, g)
local result = {}
for word in gsplit(term, " ") do
table.insert(result, export.crux(word , pos, g))
end
return table.concat(result, " ")
end
function export.show(frame)
local params = {
[1] = { default = mw.title.getCurrentTitle().text }, -- PAGENAME
[2] = {},
["pos"] = {},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = frame.args[1] or mw.title.getCurrentTitle().nsText == 'Template' and "gueizuñe" or args[1]
local pos = frame.args[2] or args.pos or args[2]
local is_g = match(term, "g")
local ipa = "* "
ipa = ipa .. line_format(separate_word(term, pos), {'Standard Calá'})
if is_g then
ipa = ipa .. "\n* " .. line_format(separate_word(term, pos, true), {'without [[w:gheada|gheada]]'})
end
return ipa
end
return export