Module:av-pron

From Linguifex
Revision as of 21:46, 8 November 2019 by Sware (talk | contribs) (Created page with "local export = {} if type(word) == 'table' then do_debug = word.args[4] word = word.args[1] end local orig_word = word word = mw.ustring.lower(word or mw.title.getCu...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:av-pron/doc

local export = {}
	
	if type(word) == 'table' then
		do_debug = word.args[4]
		word = word.args[1]
	end
	local orig_word = word
	word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
	word = mw.ustring.gsub(word, "[^abcdefgilmnoprstuv.]", "")
	
	table.insert(debug, word)
	
	local V = "[aeiou]" -- vowel
	local C = "[^aeiou.]" -- consonant
	--determining whether "y" is a consonant or a vowel + diphthongs, "-mente" suffix
	word = mw.ustring.gsub(word, "y(" .. C .. ")", "i%1")
	word = mw.ustring.gsub(word, "y(" .. V .. ")", "ɟ%1") -- not the real sound
	word = mw.ustring.gsub(word, "hi(" .. V .. ")", "ɟ%1")
	word = mw.ustring.gsub(word, "y$", "ï")
    word = mw.ustring.gsub(word, "mente$", "ménte")
	
	--"c" & "g" before "i" and "e" and all that stuff
	word = mw.ustring.gsub(word, "c([ie])" .. "%1")
	word = mw.ustring.gsub(word, "g([ie])", "x%1")

	table.insert(debug, word)
	
	--alphabet-to-phoneme
	word = mw.ustring.gsub(word, "qu", "c")
	word = mw.ustring.gsub(word, "ch", "ʃ") --not the real sound
	word = mw.ustring.gsub(word, '[cgjñrvy]',
		--['g']='ɡ':  U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
		{['c']='k', ['g']='ɡ', ['gn']='ɲ', })
	
	-- trill in #r, lr, nr, rr
	local match_count = 0
	word = mw.ustring.gsub(
		word,
		'(.?)ɾ(.?)',
		function (before, after)
			match_count = match_count + 1
			-- mw.log(word, before, after)
			if match_count == 1 and before == '' or before == 'l' or before == 'n'
					or after ~= '' and ('bdfɡklʎmnɲpstxzʃɟ'):match(after) then
				return before .. 'r' .. after
			elseif before == 'ɾ' then
				return 'r' .. after
			elseif after == 'ɾ' then
				return before .. 'r'
			end
		end)
	
	word = mw.ustring.gsub(word, 'n([bm])', 'm%1')
	word = mw.ustring.gsub(word, 'z', LatinAmerica and 'z' or 'θ') -- not the real LatAm sound
	
	table.insert(debug, word)
	
	--syllable division
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. ")(" .. C .. W .. "?" .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. V .. ")",
			"%1.%2")
	end
	for _ = 1, 2 do
		word = mw.ustring.gsub(word,
			"(" .. V .. C .. ")(" .. C .. C .. V .. ")",
			"%1.%2")
	end
	word = mw.ustring.gsub(word, "([pbktdɡ])%.([lɾ])", ".%1%2")
	word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
	word = mw.ustring.gsub(word, "([aeoáéíóú])([aeoáéíóú])", "%1.%2")
	word = mw.ustring.gsub(word, "([ií])([ií])", "%1.%2")
	word = mw.ustring.gsub(word, "([uú])([uú])", "%1.%2")

	table.insert(debug, word)
	
	--diphthongs
	word = mw.ustring.gsub(word, 'ih?([aeouáéóú])', 'j%1')
	word = mw.ustring.gsub(word, 'uh?([aeioáéíó])', 'w%1')
	
	table.insert(debug, word)
	
	--accentuation
	local syllables = mw.text.split(word, "%.")
	if mw.ustring.find(word, "[áéíóú]") then
		for i = 1, #syllables do
			if mw.ustring.find(syllables[i], "[áéíóú]") then
				syllables[i] = "ˈ"..syllables[i]
			end
		end
	else
		if mw.ustring.find(word, "[^aeiouns]$") then
			syllables[#syllables] = "ˈ" .. syllables[#syllables]
		else
			if #syllables > 1 then
				syllables[#syllables-1] = "ˈ" .. syllables[#syllables-1]
			end
		end
	end

	table.insert(debug, word)
	
	--syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
	local remove_accent = { ['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u'}
	local nasalize = { ['a'] = 'ã', ['e'] = 'ẽ', ['i'] = 'ĩ', ['o'] = 'õ', ['u'] = 'ũ' }
	for i = 1, #syllables do
		syllables[i] = mw.ustring.gsub(syllables[i], '[áéíóú]', remove_accent)
		if phonetic and mw.ustring.find(syllables[i], '[mnɲ]' .. C .. '?$') then
			syllables[i] = mw.ustring.gsub(syllables[i], '[aeiou]', nasalize)
		end
		syllables[i] = mw.ustring.gsub(syllables[i], '[ptk]$', { ['p'] = 'b', ['t'] = 'd', ['k'] = 'ɡ' })
	end
	word = table.concat(syllables)
	
	--real sound of LatAm Z
	word = mw.ustring.gsub(word, 'z', 's')
	--secondary stress
	word = mw.ustring.gsub(word, 'ˈ(.+)ˈ', 'ˌ%1ˈ')
	word = mw.ustring.gsub(word, 'ˈ(.+)ˌ', 'ˌ%1ˌ')
	word = mw.ustring.gsub(word, 'ˌ(.+)ˈ(.+)ˈ', 'ˌ%1ˌ%2ˈ')

	--phonetic transcription
	if phonetic then
		--θ, s, f before voiced consonants
		local voiced = 'mnɲbdɟɡʎ'
		local r = 'ɾr'
		local tovoiced = {
			['θ'] = 'θ̬',
			['s'] = 'z',
			['f'] = 'v',
		}
		local function voice(sound, following)
			return tovoiced[sound]..following
		end
		word = mw.ustring.gsub(word, '([θs])([ˈˌ]?['..voiced..r..'])', voice)
		word = mw.ustring.gsub(word, '(f)([ˈˌ]?['..voiced..'])', voice)
		
		local stop_to_fricative = {['b']='β', ['d']='ð', ['ɟ']='ʝ', ['ɡ']='ɣ'}
		local fricative_to_stop = {['β']='b', ['ð']='d', ['ʝ']='ɟ', ['ɣ']='ɡ'}
		--lots of allophones going on
		word = mw.ustring.gsub(word, '[bdɟɡ]', stop_to_fricative)
		word = mw.ustring.gsub(
			word,
			'()([ˈˌ]?)([βðɣʝ])',
			function (pos, stress, fricative)
				-- Matching the character before the fricative in the pattern
				-- doesn't work because sometimes there are two fricatives in
				-- a row.
				local before = pos > 1 and mw.ustring.sub(word, pos - 1, pos - 1)
				-- mw.log(orig_word, before, stress, fricative)
				if not before or (fricative == 'ɣ' or fricative == 'β') and ('mnɲ'):find(before)
						or (fricative == 'ð' or fricative == 'ʝ') and ('lʎmnɲ'):find(before) then
					return stress .. fricative_to_stop[fricative]
				end -- else no change
			end)
		word = mw.ustring.gsub(word, '[td]', {['t']='t̪', ['d']='d̪'})
		--nasal assimilation before consonants
		local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
			'ɱ', 'n̪', 'n̟', 'nʲ', 'ɲ', 'ŋ'
		local nasal_assimilation = {
			['f'] = labiodental,
			['t'] = dentialveolar, ['d'] = dentialveolar,
			['θ'] = dental,
			['ʃ'] = alveolopalatal,
			['ɟ'] = palatal, ['ʎ'] = palatal,
			['k'] = velar, ['x'] = velar, ['ɡ'] = velar,
		}
		
		word = mw.ustring.gsub(
			word,
			'n([ˈˌ]?)(.)',
			function (stress, following)
				return (nasal_assimilation[following] or 'n') .. stress .. following
			end)
		--lateral assimilation before consonants
		word = mw.ustring.gsub(
			word,
			'l([ˈˌ]?)(.)',
			function (stress, following)
				local l = 'l'
				if following == 't' or following == 'd' then -- dentialveolar
					l = 'l̪'
				elseif following == 'θ' then -- dental
					l = 'l̟'
				elseif following == 'ʃ' then -- alveolopalatal
					l = 'lʲ'
				end
				return l .. stress .. following
			end)
		--semivowels
		word = mw.ustring.gsub(word, '([aeouãẽõũ][iïĩ])', '%1̯')
		word = mw.ustring.gsub(word, '([aeioãẽĩõ][uũ])', '%1̯')
	end
	
	table.insert(debug, word)
	
	word = mw.ustring.gsub(word, 'h', '') --silent "h"
	word = mw.ustring.gsub(word, 'ʃ', 't͡ʃ') --fake "ch" to real "ch"
	word = mw.ustring.gsub(word, 'ɟ', 'ɟ͡ʝ') --fake "y" to real "y"
	word = mw.ustring.gsub(word, 'ï', 'i') --fake "y$" to real "y$"
	
	if do_debug == 'yes' then
		return word .. table.concat(debug, "")
	else
		return word
	end


function export.phonetic(frame)
	return export.show(frame, false, true)
end

function export.phoneticLatinAmerica(frame)
	return export.show(frame, true, true)
end

return export