Module:av-pron: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
No edit summary |
||
Line 7: | Line 7: | ||
local circumflex = mw.ustring.char(0x302) | local circumflex = mw.ustring.char(0x302) | ||
local acute_or_grave = "[" .. acute .. grave .. "]" | local acute_or_grave = "[" .. acute .. grave .. "]" | ||
local vowels = " | local vowels = "aeiou" | ||
local vowel = "[" .. vowels .. "]" | local vowel = "[" .. vowels .. "]" | ||
local vowel_or_semivowel = "[" .. vowels .. "jw]" | local vowel_or_semivowel = "[" .. vowels .. "jw]" | ||
local not_vowel = "[^" .. vowels .. "]" | local not_vowel = "[^" .. vowels .. "]" | ||
local front = "[ | local front = "[eij]" | ||
local fronted = mw.ustring.char(0x031F) | local fronted = mw.ustring.char(0x031F) | ||
local voiced_consonant = "[bdɡlmnrv]" | local voiced_consonant = "[bdɡlmnrv]" | ||
Line 21: | Line 21: | ||
-- voiced_z must be a table of integer indices, a boolean, or nil. | -- voiced_z must be a table of integer indices, a boolean, or nil. | ||
function export.to_phonemic(word | function export.to_phonemic(word, single_character_affricates) | ||
word = mw.ustring.lower(word):gsub("'", "") | word = mw.ustring.lower(word):gsub("'", "") | ||
-- Decompose combining characters: for instance, è → e + ◌̀ | -- Decompose combining characters: for instance, è → e + ◌̀ | ||
local decomposed = mw.ustring.toNFD(word):gsub(" | local decomposed = mw.ustring.toNFD(word):gsub("ch", "k"):gsub("ng$", "ŋ") | ||
-- Transcriptions must contain an acute or grave, to indicate stress position. | -- Transcriptions must contain an acute or grave, to indicate stress position. | ||
Line 58: | Line 49: | ||
"a(" .. grave .. "?)w", | "a(" .. grave .. "?)w", | ||
{ [""] = vowel_count == 1 and "ɔ" or "o", [grave] = "ɔ"}) | { [""] = vowel_count == 1 and "ɔ" or "o", [grave] = "ɔ"}) | ||
-- Handle gl and gn. | -- Handle gl and gn. | ||
transcription = mw.ustring.gsub(transcription, | transcription = mw.ustring.gsub(transcription, | ||
"(g[ | "(g[n])(.?)()", | ||
function (digraph, after, pos) | function (digraph, after, pos) | ||
local consonant | local consonant | ||
if digraph == "gn" then | if digraph == "gn" then | ||
consonant = "ɲ" | consonant = "ɲ" | ||
local following = mw.ustring.sub(transcription, pos, pos) | local following = mw.ustring.sub(transcription, pos, pos) | ||
Line 121: | Line 79: | ||
end | end | ||
-- | -- t is /t͡s/ before i. | ||
local consonant | local consonant | ||
if (next == "e | if (next == "i") then | ||
if first == "t" then | |||
consonant = "ʦ" | |||
else | |||
consonant = consonant | |||
end | |||
end | |||
-- c, g are soft before e, i. | |||
local consonant | |||
if (next == "e" or next == "i") then | |||
if first == "c" then | if first == "c" then | ||
consonant = "ʧ" | consonant = "ʧ" | ||
Line 135: | Line 103: | ||
consonant = "ɡ" | consonant = "ɡ" | ||
end | end | ||
end | end | ||
return consonant .. next | return consonant .. next | ||
end) | end) | ||
-- u or i (without accent) before another vowel is a semivowel. | -- u or i (without accent) before another vowel is a semivowel. | ||
Line 163: | Line 124: | ||
-- sc before e, i is /ʃ/, doubled after a vowel. | -- sc before e, i is /ʃ/, doubled after a vowel. | ||
transcription = transcription:gsub("sʧ", "ʃ") | transcription = transcription:gsub("sʧ", "ʃ") | ||
-- Move stress before syllable onset, and add syllable breaks. | -- Move stress before syllable onset, and add syllable breaks. | ||
Line 282: | Line 167: | ||
-- Incomplete and currently not used by any templates. | -- Incomplete and currently not used by any templates. | ||
function export.to_phonetic(word | function export.to_phonetic(word) | ||
local phonetic = export.to_phonemic(word | local phonetic = export.to_phonemic(word) | ||
-- Vowels longer in stressed, open, non-word-final syllables. | -- Vowels longer in stressed, open, non-word-final syllables. | ||
Line 293: | Line 178: | ||
phonetic = mw.ustring.gsub(phonetic, | phonetic = mw.ustring.gsub(phonetic, | ||
"n([%.ˈ]?[ɡk])", "ŋ%1") | "n([%.ˈ]?[ɡk])", "ŋ%1") | ||
return phonetic | return phonetic | ||
Line 320: | Line 198: | ||
local Array = require "Module:array" | local Array = require "Module:array" | ||
local transcriptions = Array(args[1]) | local transcriptions = Array(args[1]) | ||
:map( | :map( | ||
function (word, i) | function (word, i) | ||
return { pron = "/" .. export.to_phonemic(word | return { pron = "/" .. export.to_phonemic(word) .. "/" } | ||
end) | end) | ||
Revision as of 15:19, 9 November 2019
Documentation for this module may be created at Module:av-pron/doc
local export = {}
local stress = "ˈ"
local long = "ː"
local acute = mw.ustring.char(0x301)
local grave = mw.ustring.char(0x300)
local circumflex = mw.ustring.char(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeiou"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[eij]"
local fronted = mw.ustring.char(0x031F)
local voiced_consonant = "[bdɡlmnrv]"
local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }
-- ʦ, ʣ, ʧ, ʤ used for
-- t͡s, d͡z, t͡ʃ, d͡ʒ in body of function.
-- voiced_z must be a table of integer indices, a boolean, or nil.
function export.to_phonemic(word, single_character_affricates)
word = mw.ustring.lower(word):gsub("'", "")
-- Decompose combining characters: for instance, è → e + ◌̀
local decomposed = mw.ustring.toNFD(word):gsub("ch", "k"):gsub("ng$", "ŋ")
-- Transcriptions must contain an acute or grave, to indicate stress position.
-- This does not handle phrases containing more than one stressed word.
-- Default to penultimate stress rather than throw error?
local vowel_count
if not mw.ustring.find(decomposed, acute_or_grave) then
-- Allow monosyllabic unstressed words.
vowel_count = select(2, decomposed:gsub("[aeiou]", "%1"))
if vowel_count ~= 1 then
-- Add acute accent on second-to-last vowel.
decomposed = mw.ustring.gsub(decomposed,
"(" .. vowel .. ")(" .. not_vowel .. "*[iu]?" .. vowel .. not_vowel .. "*)$",
"%1" .. acute .. "%2")
end
end
local transcription = decomposed
-- Assume that aw is English.
transcription = mw.ustring.gsub(
transcription,
"a(" .. grave .. "?)w",
{ [""] = vowel_count == 1 and "ɔ" or "o", [grave] = "ɔ"})
-- Handle gl and gn.
transcription = mw.ustring.gsub(transcription,
"(g[n])(.?)()",
function (digraph, after, pos)
local consonant
if digraph == "gn" then
consonant = "ɲ"
local following = mw.ustring.sub(transcription, pos, pos)
if following ~= "" and vowels:find(following) then
after = ""
end
end
if consonant then
return consonant .. after
end
end)
-- Handle other cases of c, g.
transcription = mw.ustring.gsub(transcription,
"(([cg])([cg]?)(h?))(.?)",
function (consonant, first, double, second, next)
-- Don't allow the combinations cg, gc.
-- Or do something else?
if double ~= "" and double ~= first then
error("Invalid sequence " .. first .. double .. ".")
end
-- t is /t͡s/ before i.
local consonant
if (next == "i") then
if first == "t" then
consonant = "ʦ"
else
consonant = consonant
end
end
-- c, g are soft before e, i.
local consonant
if (next == "e" or next == "i") then
if first == "c" then
consonant = "ʧ"
else
consonant = "ʤ"
end
else
if first == "c" then
consonant = "k"
else
consonant = "ɡ"
end
end
return consonant .. next
end)
-- u or i (without accent) before another vowel is a semivowel.
-- ci, gi + vowel, gli, qu must be dealt with beforehand.
transcription = mw.ustring.gsub(transcription,
"([iu])(" .. vowel .. ")",
function (semivowel, vowel)
if semivowel == "i" then
semivowel = "j"
else
semivowel = "w"
end
return semivowel .. vowel
end)
-- sc before e, i is /ʃ/, doubled after a vowel.
transcription = transcription:gsub("sʧ", "ʃ")
-- Move stress before syllable onset, and add syllable breaks.
-- This rule may need refinement.
transcription = mw.ustring.gsub(transcription,
"()(" .. not_vowel .. "?)([^" .. vowels .. stress .. "]*)(" .. stress
.. "?)(" .. vowel .. ")",
function (position, first, rest, syllable_divider, vowel)
-- beginning of word, that is, at the moment, beginning of string
if position == 1 then
return syllable_divider .. first .. rest .. vowel
end
if syllable_divider == "" then
syllable_divider = "."
end
if rest == "" then
return syllable_divider .. first .. vowel
else
return first .. syllable_divider .. rest .. vowel
end
end)
if not single_character_affricates then
transcription = mw.ustring.gsub(transcription, "([ʦʣʧʤ])([%." .. stress .. "]*)([ʦʣʧʤ]*)",
function (affricate1, divider, affricate2)
local full_affricate = full_affricates[affricate1]
if affricate2 ~= "" then
return mw.ustring.sub(full_affricate, 1, 1) .. divider .. full_affricate
end
return full_affricate .. divider
end)
end
transcription = mw.ustring.gsub(transcription, "[h%-" .. circumflex .. "]", "")
transcription = transcription:gsub("%.ˈ", "ˈ")
return transcription
end
-- Incomplete and currently not used by any templates.
function export.to_phonetic(word)
local phonetic = export.to_phonemic(word)
-- Vowels longer in stressed, open, non-word-final syllables.
phonetic = mw.ustring.gsub(phonetic,
"(" .. stress .. not_vowel .. "*" .. vowel .. ")([" .. vowels .. "%.])",
"%1" .. long .. "%2")
-- /n/ before /ɡ/ or /k/ is [ŋ]
phonetic = mw.ustring.gsub(phonetic,
"n([%.ˈ]?[ɡk])", "ŋ%1")
return phonetic
end
function export.show(frame)
local m_IPA = require "Module:IPA"
local args = require "Module:parameters".process(
frame:getParent().args,
{
-- words to transcribe
[1] = { list = true, default = mw.title.getCurrentTitle().text },
-- each parameter a series of numbers separated by commas,
-- or a boolean, indicating that a particular z is voiced or
-- that all of them are
voiced = { list = true },
})
local Array = require "Module:array"
local transcriptions = Array(args[1])
:map(
function (word, i)
return { pron = "/" .. export.to_phonemic(word) .. "/" }
end)
return m_IPA.format_IPA_full(
require "Module:languages".getByCode "it", transcriptions)
end
return export