Module:av-pron: Difference between revisions

4,270 bytes removed ,  9 November 2019
no edit summary
No edit summary
No edit summary
Line 7: Line 7:
local circumflex = mw.ustring.char(0x302)
local circumflex = mw.ustring.char(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeɛioɔu"
local vowels = "aeiou"
local vowel = "[" .. vowels .. "]"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[eɛij]"
local front = "[eij]"
local fronted = mw.ustring.char(0x031F)
local fronted = mw.ustring.char(0x031F)
local voiced_consonant = "[bdɡlmnrv]"
local voiced_consonant = "[bdɡlmnrv]"
Line 21: Line 21:


-- voiced_z must be a table of integer indices, a boolean, or nil.
-- voiced_z must be a table of integer indices, a boolean, or nil.
function export.to_phonemic(word, voiced_z, single_character_affricates)
function export.to_phonemic(word, single_character_affricates)
word = mw.ustring.lower(word):gsub("'", "")
word = mw.ustring.lower(word):gsub("'", "")
-- Decompose combining characters: for instance, è → e + ◌̀
-- Decompose combining characters: for instance, è → e + ◌̀
local decomposed = mw.ustring.toNFD(word):gsub("x", "ks"):gsub("y", "i")
local decomposed = mw.ustring.toNFD(word):gsub("ch", "k"):gsub("ng$", "ŋ")
:gsub("ck", "k"):gsub("sh", "ʃ"):gsub("ng$", "ŋ")
local all_z_voiced
if type(voiced_z) == "boolean" then
all_z_voiced = voiced_z
voiced_z = nil
else
require "libraryUtil".checkTypeMulti("to_IPA", 2, voiced_z,
{ "table", "boolean", "nil" })
end
-- Transcriptions must contain an acute or grave, to indicate stress position.
-- Transcriptions must contain an acute or grave, to indicate stress position.
Line 58: Line 49:
"a(" .. grave .. "?)w",
"a(" .. grave .. "?)w",
{ [""] = vowel_count == 1 and "ɔ" or "o", [grave] = "ɔ"})
{ [""] = vowel_count == 1 and "ɔ" or "o", [grave] = "ɔ"})
-- Handle è, ò.
transcription = transcription:gsub("([eo])(" .. grave .. ")",
function (vowel, accent)
return ({ e = "ɛ", o = "ɔ" })[vowel] .. accent
end) -- e or o followed by grave
-- ci, gi + vowel
-- Do ci, gi + e, é, è sometimes contain /j/?
transcription = mw.ustring.gsub(transcription,
"([cg])([cg]?)i(" .. vowel .. ")",
function (consonant, double, vowel)
local out_consonant
if consonant == "c" then
out_consonant = "ʧ"
else
out_consonant = "ʤ"
end
if double ~= "" then
if double ~= consonant then
error("Invalid sequence " .. consonant .. double .. ".")
end
out_consonant = out_consonant .. out_consonant
end
return out_consonant .. vowel
end)
-- Handle gl and gn.
-- Handle gl and gn.
transcription = mw.ustring.gsub(transcription,
transcription = mw.ustring.gsub(transcription,
"(g[nl])(.?)()",
"(g[n])(.?)()",
function (digraph, after, pos)
function (digraph, after, pos)
local consonant
local consonant
if digraph == "gn" then
if digraph == "gn" then
consonant = "ɲ"
consonant = "ɲ"
-- gli is /ʎi/, or /ʎ/ before a vowel
elseif after == "i" then
consonant = "ʎ"
local following = mw.ustring.sub(transcription, pos, pos)
local following = mw.ustring.sub(transcription, pos, pos)
Line 121: Line 79:
end
end
-- c, g is soft before e, i.
-- t is /t͡s/ before i.
local consonant
local consonant
if (next == "e" or next == "ɛ" or next == "i") and second ~= "h" then
if (next == "i") then
if first == "t" then
consonant = "ʦ"
else
consonant = consonant
end
end
-- c, g are soft before e, i.
local consonant
if (next == "e" or next == "i") then
if first == "c" then
if first == "c" then
consonant = "ʧ"
consonant = "ʧ"
Line 135: Line 103:
consonant = "ɡ"
consonant = "ɡ"
end
end
end
if double ~= "" then
consonant = consonant .. consonant
end
end
return consonant .. next
return consonant .. next
end)
end)
-- ⟨qu⟩ represents /kw/.
transcription = transcription:gsub("qu", "kw")
-- u or i (without accent) before another vowel is a semivowel.
-- u or i (without accent) before another vowel is a semivowel.
Line 163: Line 124:
-- sc before e, i is /ʃ/, doubled after a vowel.
-- sc before e, i is /ʃ/, doubled after a vowel.
transcription = transcription:gsub("sʧ", "ʃ")
transcription = transcription:gsub("sʧ", "ʃ")
-- ⟨z⟩ represents /t͡s/ or /d͡z/; no way to determine which.
-- For now, /t͡s/ is the default.
local before_izzare = mw.ustring.match(
transcription,
"(.-" .. vowel .. not_vowel .. "*)izza" .. acute_or_grave .. "?re$")
if before_izzare then
transcription = before_izzare
end
local z_index = 0
transcription = mw.ustring.gsub(
transcription,
"()(z+)(.?)",
function (pos, z, after)
local length = #z
if length > 2 then
error("Too many z's in a row!")
end
z_index = z_index + 1
local voiced = voiced_z and require "Module:table".contains(voiced_z, z_index)
or all_z_voiced
if pos == 1 then
if mw.ustring.find(transcription, "^[ij]" .. acute_or_grave .. "?" .. vowel, pos + #z) then
voiced = false
elseif mw.ustring.find(transcription, "^" .. vowel .. acute_or_grave .. "?" .. vowel, pos + #z) then
voiced = true
end
-- check whether followed by two vowels
-- check onset of next syllable
else
if mw.ustring.find(after, vowel_or_semivowel) then
local before = mw.ustring.sub(transcription, pos - 2, pos - 1)
if mw.ustring.find(before, vowel_or_semivowel .. acute_or_grave .. "?$") then
if length == 1 and mw.ustring.find(after, vowel)
and mw.ustring.find(before, vowel) then
voiced = true
end
length = 2
end
if mw.ustring.sub(transcription, pos + #z, pos + #z + 1) == "i" .. circumflex then
voiced = false
end
end
end
return (voiced and "ʣ" or "ʦ"):rep(length) .. after
end)
if before_izzare then
transcription = transcription .. mw.ustring.toNFD("iʣʣàre")
end
-- Replace acute and grave with stress mark.
transcription = mw.ustring.gsub(transcription,
"(" .. vowel .. ")" .. acute_or_grave, stress .. "%1")
-- Single ⟨s⟩ between vowels is /z/.
transcription = mw.ustring.gsub(transcription,
"(" .. vowel .. ")s(" .. stress .. "?" .. vowel .. ")", "%1z%2")
-- ⟨s⟩ immediately before a voiced consonant is always /z/
transcription = mw.ustring.gsub(transcription,
"s(" .. voiced_consonant .. ")", "z%1")
-- After a vowel, /ʃ ʎ ɲ/ are doubled.
-- [[w:Italian phonology]] says word-internally, [[w:Help:IPA/Italian]] says
-- after a vowel.
transcription = mw.ustring.gsub(transcription,
"(" .. vowel .. ")([ʃʎɲ])", "%1%2%2")
-- Move stress before syllable onset, and add syllable breaks.
-- Move stress before syllable onset, and add syllable breaks.
Line 282: Line 167:


-- Incomplete and currently not used by any templates.
-- Incomplete and currently not used by any templates.
function export.to_phonetic(word, voiced_z)
function export.to_phonetic(word)
local phonetic = export.to_phonemic(word, voiced_z)
local phonetic = export.to_phonemic(word)
-- Vowels longer in stressed, open, non-word-final syllables.
-- Vowels longer in stressed, open, non-word-final syllables.
Line 293: Line 178:
phonetic = mw.ustring.gsub(phonetic,
phonetic = mw.ustring.gsub(phonetic,
"n([%.ˈ]?[ɡk])", "ŋ%1")
"n([%.ˈ]?[ɡk])", "ŋ%1")
-- Imperfect: doesn't convert geminated k, g properly.
phonetic = mw.ustring.gsub(phonetic,
"([kg])(" .. front .. ")",
"%1" .. fronted .. "%2")
:gsub("a", "ä")
:gsub("n", "n̺") -- Converts n before a consonant, which is incorrect.
return phonetic
return phonetic
Line 320: Line 198:
local Array = require "Module:array"
local Array = require "Module:array"
local voiced_z = Array(args.voiced)
:map(function (param)
param = Array(mw.text.split(param, "%s*,%s*"))
:map(
function (item, i)
return tonumber(item)
or i == 1 and require "Module:yesno"(item) -- Rejects false values.
or error("Invalid input '" .. item .."' in |voiced= parameter. "
.. "Expected number or boolean.")
end)
if not param[2] and type(param[1]) == "boolean" then
param = param[1]
end
return param
end)
local transcriptions = Array(args[1])
local transcriptions = Array(args[1])
:map(
:map(
function (word, i)
function (word, i)
return { pron = "/" .. export.to_phonemic(word, voiced_z[i]) .. "/" }
return { pron = "/" .. export.to_phonemic(word) .. "/" }
end)
end)