Module:av-pron: Difference between revisions

From Linguifex
Jump to navigation Jump to search
No edit summary
No edit summary
Line 15: Line 15:
--"c" & "g" before "i" and "e" and all that stuff
--"c" & "g" before "i" and "e" and all that stuff
word = mw.ustring.gsub(word, "c([ie])" .. "%1")
word = mw.ustring.gsub(word, "c([ie])" .. "t͡ʃ%1")
word = mw.ustring.gsub(word, "g([ie])", "x%1")
word = mw.ustring.gsub(word, "g([ie])", "d͡ʒ%1")
word = mw.ustring.gsub(word, "t([i])" .. "t͡s%1")
word = mw.ustring.gsub(word, "sc([ie])" .. "ʃ%1")


table.insert(debug, word)
table.insert(debug, word)
--alphabet-to-phoneme
--alphabet-to-phoneme
word = mw.ustring.gsub(word, "qu", "c")
word = mw.ustring.gsub(word, "ch", "k")  
word = mw.ustring.gsub(word, "ch", "ʃ") --not the real sound
word = mw.ustring.gsub(word, "sc","ʃ")
word = mw.ustring.gsub(word, '[cgjñrvy]',
word = mw.ustring.gsub(word, '[cg]',
--['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
--['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{['c']='k', ['g']='ɡ', ['gn']='ɲ', })
{['c']='k', ['g']='ɡ'})
-- trill in #r, lr, nr, rr
local match_count = 0
word = mw.ustring.gsub(
word,
'(.?)ɾ(.?)',
function (before, after)
match_count = match_count + 1
-- mw.log(word, before, after)
if match_count == 1 and before == '' or before == 'l' or before == 'n'
or after ~= '' and ('bdfɡklʎmnɲpstxzʃɟ'):match(after) then
return before .. 'r' .. after
elseif before == 'ɾ' then
return 'r' .. after
elseif after == 'ɾ' then
return before .. 'r'
end
end)
word = mw.ustring.gsub(word, 'n([bm])', 'm%1')
word = mw.ustring.gsub(word, 'n([bm])', 'm%1')

Revision as of 22:08, 8 November 2019

Documentation for this module may be created at Module:av-pron/doc

local export = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word, "[^abcdefgilmnoprstuv.]", "")
	
	table.insert(debug, word)
	
	local V = "[aeiou]" -- vowel
	local C = "[^aeiou.]" -- consonant
	
	--"c" & "g" before "i" and "e" and all that stuff
	word = mw.ustring.gsub(word, "c([ie])" .. "t͡ʃ%1")
	word = mw.ustring.gsub(word, "g([ie])", "d͡ʒ%1")
	word = mw.ustring.gsub(word, "t([i])" .. "t͡s%1")
	word = mw.ustring.gsub(word, "sc([ie])" .. "ʃ%1")
	

	table.insert(debug, word)
	
	--alphabet-to-phoneme
	word = mw.ustring.gsub(word, "ch", "k") 
	word = mw.ustring.gsub(word, "sc","ʃ")
	word = mw.ustring.gsub(word, '[cg]',
		--['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{['c']='k', ['g']='ɡ'})
	
	word = mw.ustring.gsub(word, 'n([bm])', 'm%1')
	word = mw.ustring.gsub(word, 'z', LatinAmerica and 'z' or 'θ') -- not the real LatAm sound
	
	table.insert(debug, word)
	
	--syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. ")(" .. C .. W .. "?" .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
			"%1.%2")
	end
	word = mw.ustring.gsub(word, "([pbktdɡ])%.([lɾ])", ".%1%2")
	word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	word = mw.ustring.gsub(word, "([aeo])([aeo])", "%1.%2")
	word = mw.ustring.gsub(word, "([i])([i])", "%1.%2")
	word = mw.ustring.gsub(word, "([u])([u])", "%1.%2")

	table.insert(debug, word)
	
	--syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
	local remove_accent = { ['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u'}
	local nasalize = { ['a'] = 'ã', ['e'] = 'ẽ', ['i'] = 'ĩ', ['o'] = 'õ', ['u'] = 'ũ' }
	for i = 1, #syllables do
		syllables[i] = mw.ustring.gsub(syllables[i], '[áéíóú]', remove_accent)
		if phonetic and mw.ustring.find(syllables[i], '[mnɲ]' .. C .. '?$') then
			syllables[i] = mw.ustring.gsub(syllables[i], '[aeiou]', nasalize)
		end
		syllables[i] = mw.ustring.gsub(syllables[i], '[ptk]$', { ['p'] = 'b', ['t'] = 'd', ['k'] = 'ɡ' })
	end
	word = table.concat(syllables)
	
	--real sound of LatAm Z
	word = mw.ustring.gsub(word, 'z', 's')
	--secondary stress
	word = mw.ustring.gsub(word, 'ˈ(.+)ˈ', 'ˌ%1ˈ')
	word = mw.ustring.gsub(word, 'ˈ(.+)ˌ', 'ˌ%1ˌ')
	word = mw.ustring.gsub(word, 'ˌ(.+)ˈ(.+)ˈ', 'ˌ%1ˌ%2ˈ')

	--phonetic transcription
	if phonetic then
		--θ, s, f before voiced consonants
		local voiced = 'mnɲbdɟɡʎ'
		local r = 'ɾr'
		local tovoiced = {
			['θ'] = 'θ̬',
			['s'] = 'z',
			['f'] = 'v',
		}
		local function voice(sound, following)
			return tovoiced[sound]..following
		end
		word = mw.ustring.gsub(word, '([θs])([ˈˌ]?['..voiced..r..'])', voice)
		word = mw.ustring.gsub(word, '(f)([ˈˌ]?['..voiced..'])', voice)
		
		local stop_to_fricative = {['b']='β', ['d']='ð', ['ɟ']='ʝ', ['ɡ']='ɣ'}
		local fricative_to_stop = {['β']='b', ['ð']='d', ['ʝ']='ɟ', ['ɣ']='ɡ'}
		--lots of allophones going on
		word = mw.ustring.gsub(word, '[bdɟɡ]', stop_to_fricative)
		word = mw.ustring.gsub(
			word,
			'()([ˈˌ]?)([βðɣʝ])',
			function (pos, stress, fricative)
				-- Matching the character before the fricative in the pattern
				-- doesn't work because sometimes there are two fricatives in
				-- a row.
				local before = pos > 1 and mw.ustring.sub(word, pos - 1, pos - 1)
				-- mw.log(orig_word, before, stress, fricative)
				if not before or (fricative == 'ɣ' or fricative == 'β') and ('mnɲ'):find(before)
						or (fricative == 'ð' or fricative == 'ʝ') and ('lʎmnɲ'):find(before) then
					return stress .. fricative_to_stop[fricative]
				end -- else no change
			end)
		word = mw.ustring.gsub(word, '[td]', {['t']='t̪', ['d']='d̪'})
		--nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
			'ɱ', 'n̪', 'n̟', 'nʲ', 'ɲ', 'ŋ'
		local nasal_assimilation = {
			['f'] = labiodental,
			['t'] = dentialveolar, ['d'] = dentialveolar,
			['θ'] = dental,
			['ʃ'] = alveolopalatal,
			['ɟ'] = palatal, ['ʎ'] = palatal,
			['k'] = velar, ['x'] = velar, ['ɡ'] = velar,
		}
		
		word = mw.ustring.gsub(
			word,
			'n([ˈˌ]?)(.)',
			function (stress, following)
				return (nasal_assimilation[following] or 'n') .. stress .. following
			end)
		--lateral assimilation before consonants
		word = mw.ustring.gsub(
			word,
			'l([ˈˌ]?)(.)',
			function (stress, following)
				local l = 'l'
				if following == 't' or following == 'd' then -- dentialveolar
					l = 'l̪'
				elseif following == 'θ' then -- dental
					l = 'l̟'
				elseif following == 'ʃ' then -- alveolopalatal
					l = 'lʲ'
				end
				return l .. stress .. following
			end)
		--semivowels
		word = mw.ustring.gsub(word, '([aeouãẽõũ][iïĩ])', '%1̯')
		word = mw.ustring.gsub(word, '([aeioãẽĩõ][uũ])', '%1̯')
	end
	
	table.insert(debug, word)
	
	word = mw.ustring.gsub(word, 'h', '') --silent "h"
	word = mw.ustring.gsub(word, 'ʃ', 't͡ʃ') --fake "ch" to real "ch"
	word = mw.ustring.gsub(word, 'ɟ', 'ɟ͡ʝ') --fake "y" to real "y"
	word = mw.ustring.gsub(word, 'ï', 'i') --fake "y$" to real "y$"
	
	if do_debug == 'yes' then
		return word .. table.concat(debug, "")
	else
		return word
	end


function export.phonetic(frame)
	return export.show(frame, false, true)
end

function export.phoneticLatinAmerica(frame)
	return export.show(frame, true, true)
end

return export