Module:mtev-pronunc: Difference between revisions
Jump to navigation
Jump to search
Created page with "local export = {} local u = require("Module:string/char") local gsub = mw.ustring.gsub local match = mw.ustring.match local ACUTE = u(0x0301) local COARTIC = u(0x0361) local DENTAL = u(0x032A) local FLAP = u(0x0306) local NORELEASE = u(0x031A) local SYLLABIC = u(0x0329) local NASAL = u(0x0303) local m_IPA = require("Module:IPA") local lang = require("Module:languages").getByCode("sa") local m_a = require("Module:accent qualifier") local consonants..." |
No edit summary |
||
| (2 intermediate revisions by the same user not shown) | |||
| Line 14: | Line 14: | ||
local m_IPA = require("Module:IPA") | local m_IPA = require("Module:IPA") | ||
local lang = require("Module:languages").getByCode(" | local lang = require("Module:languages").getByCode("mtev") | ||
local m_a = require("Module:accent qualifier") | local m_a = require("Module:accent qualifier") | ||
| Line 70: | Line 70: | ||
local vowel_list = { | local vowel_list = { | ||
[" | ["ɑ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true, | ||
[" | ["uː"] = true, ["e"] = true, ["eː"] = true, ["o"] = true, ["oː"] = true, | ||
} | } | ||
| Line 112: | Line 112: | ||
local tt = { | local tt = { | ||
-- vowels | -- vowels | ||
["अ"] = " | ["अ"] = "ɑ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː", | ||
["ए"] = "e", ["ऐ"] = "eː", ["ओ"] = "o", ["औ"] = "oː", | |||
-- visarga | -- visarga | ||
["ः"] = "h", | ["ः"] = "h", | ||
| Line 124: | Line 124: | ||
--Vedic extensions | --Vedic extensions | ||
['ᳵ'] = "x", ['ᳶ'] = "ɸ", | ['ᳵ'] = "x", ['ᳶ'] = "ɸ", | ||
} | } | ||
| Line 203: | Line 198: | ||
-- it is not necessary to include 'l' in the pattern for short vowels as it doesn't occur as a vowel in syllable coda and as consonantal 'l' would then be erroneously included | -- it is not necessary to include 'l' in the pattern for short vowels as it doesn't occur as a vowel in syllable coda and as consonantal 'l' would then be erroneously included | ||
local short_vowel_patt = "^[ | local short_vowel_patt = "^[ɑiur]" .. SYLLABIC .. "?" .. ACUTE .. "?$" | ||
-- Classic stress accent | -- Classic stress accent | ||
| Line 259: | Line 254: | ||
text = gsub( | text = gsub( | ||
text, | text, | ||
"([ | "([ɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)ṃ", | ||
"%1%2" .. NASAL .. "%3%4%5" | "%1%2" .. NASAL .. "%3%4%5" | ||
) | ) | ||
| Line 276: | Line 271: | ||
table.insert(t, consonants[c]) | table.insert(t, consonants[c]) | ||
if not diacritics[chars[i + 1]] then | if not diacritics[chars[i + 1]] then | ||
table.insert(t, " | table.insert(t, "ɑ") | ||
end | end | ||
elseif c == "्" then | elseif c == "्" then | ||
| Line 294: | Line 289: | ||
word = gsub(word, "ɭ̆([.']?)ɦ", "%1ɭ̆ʱ") | word = gsub(word, "ɭ̆([.']?)ɦ", "%1ɭ̆ʱ") | ||
return word | return word | ||
end | end | ||
| Line 349: | Line 338: | ||
local superscript = { | local superscript = { | ||
["ɑ"] = "ɑ̆", | ["ɑ"] = "ɑ̆", | ||
["e"] = "ĕ", | ["e"] = "ĕ", | ||
| Line 362: | Line 350: | ||
text = abhinidhana_phonemic(text) | text = abhinidhana_phonemic(text) | ||
-- Classical | |||
-- Classical | |||
local cla_phnm = text | local cla_phnm = text | ||
cla_phnm = gsub(cla_phnm, "[ | cla_phnm = gsub(cla_phnm, "[éóíú" .. ACUTE .. "]", { | ||
["é"] = "e", ["ó"] = "o", ["í"] = "i", ["ú"] = "u | ["é"] = "e", ["ó"] = "o", ["í"] = "i", | ||
["ú"] = "u", [ACUTE] = "" | |||
}) | }) | ||
-- Add dental diacritic to t, d, tʰ, dʱ, n, l, s. | -- Add dental diacritic to t, d, tʰ, dʱ, n, l, s. | ||
cla_phnm = gsub( | cla_phnm = gsub( | ||
| Line 408: | Line 370: | ||
local cla_phnt = abhinidhana_phonetic(cla_phnm) | local cla_phnt = abhinidhana_phonetic(cla_phnm) | ||
cla_phnt = gsub( | cla_phnt = gsub( | ||
cla_phnt, | cla_phnt, | ||
"([ | "([ɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h$", | ||
function (vow, nas, length, glide) | function (vow, nas, length, glide) | ||
return vow .. nas .. length .. glide .. "h" .. superscript[vow] | return vow .. nas .. length .. glide .. "h" .. superscript[vow] | ||
| Line 420: | Line 380: | ||
cla_phnt = gsub( | cla_phnt = gsub( | ||
cla_phnt, | cla_phnt, | ||
"([ | "([ɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h ", | ||
function (vow, nas, length, glide) | function (vow, nas, length, glide) | ||
return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " " | return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " " | ||
| Line 427: | Line 387: | ||
dialects['cla'] = { | dialects['cla'] = { | ||
label = " | label = "Middle Tevvic", | ||
phonemic = cla_phnm, | phonemic = cla_phnm, | ||
phonetic = cla_phnt, | phonetic = cla_phnt, | ||
| Line 435: | Line 395: | ||
end | end | ||
local function make_table(dialects | local function make_table(dialects, noclassical, nolabel) | ||
local dial_types = { | local dial_types = {'cla'} | ||
if noclassical then | if noclassical then | ||
| Line 493: | Line 449: | ||
w = {default = mw.loadData("Module:headword/data").pagename}, | w = {default = mw.loadData("Module:headword/data").pagename}, | ||
a = {list = true, allow_holes = true, type = 'number'}, | a = {list = true, allow_holes = true, type = 'number'}, | ||
noclassical = {type = 'boolean'}, | noclassical = {type = 'boolean'}, | ||
nolabel = {type = 'boolean'}, | nolabel = {type = 'boolean'}, | ||
| Line 506: | Line 461: | ||
local dialects = make_dialects(text) | local dialects = make_dialects(text) | ||
return make_table(dialects | return make_table(dialects, args.noclassical, args.nolabel) | ||
end | end | ||
return export | return export | ||
Latest revision as of 03:23, 6 June 2026
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local ACUTE = u(0x0301)
local COARTIC = u(0x0361)
local DENTAL = u(0x032A)
local FLAP = u(0x0306)
local NORELEASE = u(0x031A)
local SYLLABIC = u(0x0329)
local NASAL = u(0x0303)
local m_IPA = require("Module:IPA")
local lang = require("Module:languages").getByCode("mtev")
local m_a = require("Module:accent qualifier")
local consonants = {
["क"] = "k",
["ग"] = "ɡ",
["ख"] = "kʰ",
["घ"] = "ɡʱ",
["ङ"] = "ŋ",
["च"] = "t͡ɕ",
["ज"] = "d͡ʑ",
["छ"] = "t͡ɕʰ",
["झ"] = "d͡ʑʱ",
["ञ"] = "ɲ",
["त"] = "t",
["द"] = "d",
["थ"] = "tʰ",
["ध"] = "dʱ",
["न"] = "n",
["ट"] = "ʈ",
["ड"] = "ɖ",
["ठ"] = "ʈʰ",
["ढ"] = "ɖʱ",
["ण"] = "ɳ",
["प"] = "p",
["ब"] = "b",
["फ"] = "pʰ",
["भ"] = "bʱ",
["म"] = "m",
["य"] = "j",
["र"] = "ɾ",
["ल"] = "l",
["व"] = "ʋ",
["श"] = "ɕ",
["ष"] = "ʂ",
["स"] = "s",
["ह"] = "ɦ",
["ज़"] = "z",
["झ़"] = "ʑ",
["ढ़"] = "ʐ",
}
local diacritics = {
["ा"] = "ɑː",
["ि"] = "i",
["ी"] = "iː",
["ु"] = "u",
["ू"] = "uː",
["े"] = "e",
["ै"] = "eː",
["ो"] = "o",
["ौ"] = "oː",
["्"] = "",
}
local vowel_list = {
["ɑ"] = true, ["ɑː"] = true, ["i"] = true, ["iː"] = true, ["u"] = true,
["uː"] = true, ["e"] = true, ["eː"] = true, ["o"] = true, ["oː"] = true,
}
local stop_list = {
["k"] = true, ["ɡ"] = true, ["kʰ"] = true, ["ɡʱ"] = true,
["t͡ɕ"] = true, ["d͡ʑ"] = true, ["t͡ɕʰ"] = true, ["d͡ʑʱ"] = true,
["t"] = true, ["d"] = true, ["tʰ"] = true, ["dʱ"] = true,
["ʈ"] = true, ["ɖ"] = true, ["ʈʰ"] = true, ["ɖʱ"] = true,
["p"] = true, ["b"] = true, ["pʰ"] = true, ["bʱ"] = true,
["z"] = true, ["ʑ"] = true, ["ʐ"] = true,
}
local consonant_sonority = {
-- voiceless stops and affricates
["k"] = 1, ["kʰ"] = 1,
["t͡ɕ"] = 1, ["t͡ɕʰ"] = 1,
["t"] = 1, ["tʰ"] = 1,
["ʈ"] = 1, ["ʈʰ"] = 1,
["p"] = 1, ["pʰ"] = 1,
-- voiceless fricatives
["ɕ"] = 2, ["ʂ"] = 2, ["s"] = 2, ["h"] = 2, ["x"] = 2, ["ɸ"] = 2,
-- voiced stops and affricates
["ɡ"] = 3, ["ɡʱ"] = 3,
["d͡ʑ"] = 3, ["d͡ʑʱ"] = 3,
["d"] = 3, ["dʱ"] = 3,
["ɖ"] = 3, ["ɖʱ"] = 3,
["b"] = 3, ["bʱ"] = 3,
-- voiced fricatives
["ɦ"] = 4,
-- nasals
["ŋ"] = 5, ["ɲ"] = 5, ["n"] = 5, ["ɳ"] = 5, ["m"] = 5, ["m̐"] = 5, ["ṃ"] = 5,
-- flaps
["ɾ"] = 6,
-- laterals
["l"] = 7, ["ɭ̆"] = 7, ["ɭ̆ʱ"] = 7,
-- glides
["j"] = 8, ["ʋ"] = 8,
}
local tt = {
-- vowels
["अ"] = "ɑ", ["आ"] = "ɑː", ["इ"] = "i", ["ई"] = "iː", ["उ"] = "u", ["ऊ"] = "uː",
["ए"] = "e", ["ऐ"] = "eː", ["ओ"] = "o", ["औ"] = "oː",
-- visarga
["ः"] = "h",
-- chandrabindu
["ँ"] = "m̐",
-- anusvara
["ं"] = "ṃ",
-- avagraha
['ऽ'] = "",
--Vedic extensions
['ᳵ'] = "x", ['ᳶ'] = "ɸ",
}
local function shift_to_codas(syllables)
-- shift codas to previous syllable using the Weerasinghe-Wasala-Gamage method
local to_move = 0
for i, syll in ipairs(syllables) do
if i == 1 then
-- no need to shift to coda if in the first syllable
elseif #syll < 3 then
-- coda movement only needed for onset clusters of 2 or more
elseif #syll == 3 then
-- V.CCV => VC.CV
to_move = 1
elseif #syll == 4 then
if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" or (stop_list[syll[1]] and stop_list[syll[2]]) then
-- V.CCrV or V.CCyV => VC.CrV or VC.CyV
-- if the first two consonants are stops, VC.CCV
to_move = 1
else
-- V.CCCV => VCC.CV
to_move = 2
end
else
-- 4 consonants or more
if syll[#syll - 1] == "ɾ" or syll[#syll - 1] == "j" then
to_move = #syll - 3
else
-- find index of consonant of least sonority
to_move = #syll - 1
local min_son = consonant_sonority[syll[#syll - 1]]
for i = (#syll - 1), 1, -1 do
if consonant_sonority[syll[i]] < min_son then
to_move = i
min_son = consonant_sonority[syll[i]]
end
end
end
end
while to_move > 0 do
table.insert(syllables[i - 1], table.remove(syllables[i], 1))
to_move = to_move - 1
end
end
return syllables
end
local function syllabify(remainder, accent)
local syllables = {}
local syll = {}
while #remainder > 0 do
local phoneme = table.remove(remainder, 1)
if vowel_list[phoneme] then
table.insert(syll, phoneme)
table.insert(syllables, syll)
syll = {}
else
table.insert(syll, phoneme)
end
end
-- store whatever consonants remain
local final_cons = syll
-- Vedic pitch accent
if accent ~= nil and accent <= #syllables then
syll = syllables[accent]
syllables[accent][#syll] = accent_vowel[syll[#syll]]
end
syllables = shift_to_codas(syllables)
-- it is not necessary to include 'l' in the pattern for short vowels as it doesn't occur as a vowel in syllable coda and as consonantal 'l' would then be erroneously included
local short_vowel_patt = "^[ɑiur]" .. SYLLABIC .. "?" .. ACUTE .. "?$"
-- Classic stress accent
-- local num_sylls = #syllables
-- if num_sylls == 2 then
-- table.insert(syllables[1], 1, 'ˈ')
-- elseif num_sylls == 3 then
-- -- if the final segment of the second syllable is not a short vowel, stress the second syllable
-- if match(syllables[2][#syllables[2]], short_vowel_patt) == nil then
-- table.insert(syllables[2], 1, 'ˈ')
-- -- else stress the third
-- else
-- table.insert(syllables[1], 1, 'ˈ')
-- end
-- elseif num_sylls >= 4 then
-- if match(syllables[num_sylls - 1][#syllables[num_sylls - 1]], short_vowel_patt) == nil then
-- table.insert(syllables[num_sylls - 1], 1, 'ˈ')
-- elseif match(syllables[num_sylls - 2][#syllables[num_sylls - 2]], short_vowel_patt) == nil then
-- table.insert(syllables[num_sylls - 2], 1, 'ˈ')
-- else
-- table.insert(syllables[num_sylls - 3], 1, 'ˈ')
-- end
-- end
-- If there are phonemes left, then the word ends in a consonant
-- Add them to the last syllable
for _, phoneme in ipairs(final_cons) do
table.insert(syllables[#syllables], phoneme)
end
for i, _ in ipairs(syllables) do
syllables[i] = table.concat(syllables[i], "")
end
return table.concat(syllables, ".")
end
local anu_to_nasals = {
["k"] = "ŋ", ["ɡ"] = "ŋ", ["ŋ"] = "ŋ",
["t͡ɕ"] = "ɲ", ["d͡ʑ"] = "ɲ", [""] = "ɲ",
["t"] = "n", ["d"] = "n", ["n"] = "n",
["ʈ"] = "ɳ", ["ɖ"] = "ɳ", ["ɳ"] = "ɳ",
["p"] = "m", ["b"] = "m", ["m"] = "m",
}
local function anusvara(text)
text = gsub(text, "ṃ$", "m")
text = gsub(
text,
"ṃ([ %.ˈ]?)([kɡtdnʈɖpbm])([" .. DENTAL .. COARTIC .. "]?)([ɕʑ]?)",
function(div, cons, mark, fric)
return anu_to_nasals[cons .. mark .. fric] .. div .. cons .. mark .. fric
end
)
text = gsub(
text,
"([ɑiurleo])(" .. SYLLABIC .. "?)(" .. ACUTE .. "?)(ː?)([jw]?)ṃ",
"%1%2" .. NASAL .. "%3%4%5"
)
text = gsub(text, "ṃ", "ɴ")
return text
end
local function convert_word(word, accent)
local chars = {}
local t = {}
gsub(word, ".", function(c) table.insert(chars, c) end)
for i, c in ipairs(chars) do
if consonants[c] then
table.insert(t, consonants[c])
if not diacritics[chars[i + 1]] then
table.insert(t, "ɑ")
end
elseif c == "्" then
-- do nothing
elseif diacritics[c] then
table.insert(t, diacritics[c])
elseif tt[c] then
table.insert(t, tt[c])
end
end
word = syllabify(t, accent)
word = gsub(word, "%.ˈ", "ˈ")
-- correction for ळ्ह = ɭ̆ʱ
word = gsub(word, "ɭ̆([.']?)ɦ", "%1ɭ̆ʱ")
return word
end
local function convert_words(words, accents)
local result = {}
local word_num = 1
for word in mw.text.gsplit(words, " ") do
table.insert(result, convert_word(word, accents[word_num]))
word_num = word_num + 1
end
text = table.concat(result, " ")
return text
end
local function phon_procs(text)
-- Anusvāra
text = anusvara(text)
return text
end
local function abhinidhana_phonemic(text)
--de-aspirate and de-affricate before stops
text = gsub(
text,
"([kɡtdʈɖpb])(" .. DENTAL .. "?)[ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
"%1%2%3%4"
)
text = gsub(
text,
"([td])" .. COARTIC .. "[ɕʑ][ʰʱ]?([ %.ˈ]?)([kɡtdʈɖpb])",
"%1%2%3"
)
return text
end
local function abhinidhana_phonetic(text)
text = gsub(
text,
"([kɡtdʈɖpb])(" .. DENTAL .. "?)([ %.ˈ]?)([kɡtdʈɖpb])",
"%1%2" .. NORELEASE .. "%3%4"
)
return text
end
local superscript = {
["ɑ"] = "ɑ̆",
["e"] = "ĕ",
["o"] = "ŏ",
["i"] = "ĭ",
["u"] = "ŭ",
}
local function make_dialects(text)
local dialects = {}
text = abhinidhana_phonemic(text)
-- Classical
local cla_phnm = text
cla_phnm = gsub(cla_phnm, "[éóíú" .. ACUTE .. "]", {
["é"] = "e", ["ó"] = "o", ["í"] = "i",
["ú"] = "u", [ACUTE] = ""
})
-- Add dental diacritic to t, d, tʰ, dʱ, n, l, s.
cla_phnm = gsub(
cla_phnm,
"([td]" .. COARTIC .. "?[ɕʑ]?)([ʰʱ]?)",
function(base_consonant, aspiration)
if base_consonant == "t" or base_consonant == "d" then
return base_consonant .. DENTAL .. aspiration
end
end
)
cla_phnm = gsub(cla_phnm, "([nls])", "%1" .. DENTAL)
local cla_phnt = abhinidhana_phonetic(cla_phnm)
cla_phnt = gsub(
cla_phnt,
"([ɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h$",
function (vow, nas, length, glide)
return vow .. nas .. length .. glide .. "h" .. superscript[vow]
end
)
cla_phnt = gsub(
cla_phnt,
"([ɑeoiu])(" .. NASAL .. "?)(ː?)([jw]?)h ",
function (vow, nas, length, glide)
return vow .. nas .. length .. glide .. "h" .. superscript[vow] .. " "
end
)
dialects['cla'] = {
label = "Middle Tevvic",
phonemic = cla_phnm,
phonetic = cla_phnt,
}
return dialects
end
local function make_table(dialects, noclassical, nolabel)
local dial_types = {'cla'}
if noclassical then
table.remove(dial_types, 2)
end
if #dial_types == 1 then
local dial = dial_types[1]
local IPA_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
if dialects[dial].phonemic ~= dialects[dial].phonetic then
table.insert(IPA_args, {pron = '[' .. dialects[dial].phonetic .. ']'})
end
if nolabel then
return m_IPA.format_IPA_full { lang = lang, items = IPA_args }
else
return table.concat{
'\n* ',
m_a.format_qualifiers(lang, {dialects[dial].label}),
' ',
m_IPA.format_IPA_full { lang = lang, items = IPA_args },
}
end
else
local inline_args = {{pron = '/' .. dialects.cla.phonemic .. '/'}}
if dialects.cla.phonemic ~= dialects.cla.phonetic then
table.insert(inline_args, {pron = '['.. dialects.cla.phonetic ..']'})
end
local full = {}
for _, dial in ipairs(dial_types) do
local full_args = {{pron = '/' .. dialects[dial].phonemic .. '/'}}
if dialects[dial].phonemic ~= dialects[dial].phonetic then
table.insert(full_args, {pron = '['.. dialects[dial].phonetic ..']'})
end
table.insert(full, table.concat{
'\n* ',
m_a.format_qualifiers(lang, {dialects[dial].label}),
' ',
m_IPA.format_IPA_full { lang = lang, items = full_args },
})
end
return table.concat(full, "")
end
end
function export.show(frame)
local params = {
[1] = {alias_of = 'w'},
w = {default = mw.loadData("Module:headword/data").pagename},
a = {list = true, allow_holes = true, type = 'number'},
noclassical = {type = 'boolean'},
nolabel = {type = 'boolean'},
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local text = convert_words(args.w, args.a)
text = phon_procs(text)
local dialects = make_dialects(text)
return make_table(dialects, args.noclassical, args.nolabel)
end
return export