|
|
(5 intermediate revisions by the same user not shown) |
Line 1: |
Line 1: |
| local export = {}
| |
|
| |
| if type(word) == 'table' then
| |
| do_debug = word.args[4]
| |
| word = word.args[1]
| |
| end
| |
| local orig_word = word
| |
| word = mw.ustring.lower(word or mw.title.getCurrentTitle().text)
| |
| word = mw.ustring.gsub(word, "[^abcdefgilmnoprstuv.]", "")
| |
|
| |
| table.insert(debug, word)
| |
|
| |
| local V = "[aeiou]" -- vowel
| |
| local C = "[^aeiou.]" -- consonant
| |
|
| |
| --"c" & "g" before "i" and "e" and all that stuff
| |
| word = mw.ustring.gsub(word, "c([ie])" .. "t͡ʃ%1")
| |
| word = mw.ustring.gsub(word, "g([ie])", "d͡ʒ%1")
| |
| word = mw.ustring.gsub(word, "t([i])" .. "t͡s%1")
| |
| word = mw.ustring.gsub(word, "sc([ie])" .. "ʃ%1")
| |
|
| |
|
| |
|
| table.insert(debug, word)
| |
|
| |
| --alphabet-to-phoneme
| |
| word = mw.ustring.gsub(word, "ch", "k")
| |
| word = mw.ustring.gsub(word, "sc","ʃ")
| |
| word = mw.ustring.gsub(word, '[cg]',
| |
| --['g']='ɡ': U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
| |
| {['c']='k', ['g']='ɡ'})
| |
|
| |
| word = mw.ustring.gsub(word, 'n([bm])', 'm%1')
| |
| word = mw.ustring.gsub(word, 'z', LatinAmerica and 'z' or 'θ') -- not the real LatAm sound
| |
|
| |
| table.insert(debug, word)
| |
|
| |
| --syllable division
| |
| for _ = 1, 2 do
| |
| word = mw.ustring.gsub(word,
| |
| "(" .. V .. ")(" .. C .. W .. "?" .. V .. ")",
| |
| "%1.%2")
| |
| end
| |
| for _ = 1, 2 do
| |
| word = mw.ustring.gsub(word,
| |
| "(" .. V .. C .. ")(" .. C .. V .. ")",
| |
| "%1.%2")
| |
| end
| |
| for _ = 1, 2 do
| |
| word = mw.ustring.gsub(word,
| |
| "(" .. V .. C .. ")(" .. C .. C .. V .. ")",
| |
| "%1.%2")
| |
| end
| |
| word = mw.ustring.gsub(word, "([pbktdɡ])%.([lɾ])", ".%1%2")
| |
| word = mw.ustring.gsub(word, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
| |
| word = mw.ustring.gsub(word, "([aeo])([aeo])", "%1.%2")
| |
| word = mw.ustring.gsub(word, "([i])([i])", "%1.%2")
| |
| word = mw.ustring.gsub(word, "([u])([u])", "%1.%2")
| |
|
| |
| table.insert(debug, word)
| |
|
| |
| --syllables nasalized if ending with "n", voiceless consonants in syllable-final position to voiced
| |
| local remove_accent = { ['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u'}
| |
| local nasalize = { ['a'] = 'ã', ['e'] = 'ẽ', ['i'] = 'ĩ', ['o'] = 'õ', ['u'] = 'ũ' }
| |
| for i = 1, #syllables do
| |
| syllables[i] = mw.ustring.gsub(syllables[i], '[áéíóú]', remove_accent)
| |
| if phonetic and mw.ustring.find(syllables[i], '[mnɲ]' .. C .. '?$') then
| |
| syllables[i] = mw.ustring.gsub(syllables[i], '[aeiou]', nasalize)
| |
| end
| |
| syllables[i] = mw.ustring.gsub(syllables[i], '[ptk]$', { ['p'] = 'b', ['t'] = 'd', ['k'] = 'ɡ' })
| |
| end
| |
| word = table.concat(syllables)
| |
|
| |
| --real sound of LatAm Z
| |
| word = mw.ustring.gsub(word, 'z', 's')
| |
| --secondary stress
| |
| word = mw.ustring.gsub(word, 'ˈ(.+)ˈ', 'ˌ%1ˈ')
| |
| word = mw.ustring.gsub(word, 'ˈ(.+)ˌ', 'ˌ%1ˌ')
| |
| word = mw.ustring.gsub(word, 'ˌ(.+)ˈ(.+)ˈ', 'ˌ%1ˌ%2ˈ')
| |
|
| |
| --phonetic transcription
| |
| if phonetic then
| |
| --θ, s, f before voiced consonants
| |
| local voiced = 'mnɲbdɟɡʎ'
| |
| local r = 'ɾr'
| |
| local tovoiced = {
| |
| ['θ'] = 'θ̬',
| |
| ['s'] = 'z',
| |
| ['f'] = 'v',
| |
| }
| |
| local function voice(sound, following)
| |
| return tovoiced[sound]..following
| |
| end
| |
| word = mw.ustring.gsub(word, '([θs])([ˈˌ]?['..voiced..r..'])', voice)
| |
| word = mw.ustring.gsub(word, '(f)([ˈˌ]?['..voiced..'])', voice)
| |
|
| |
| local stop_to_fricative = {['b']='β', ['d']='ð', ['ɟ']='ʝ', ['ɡ']='ɣ'}
| |
| local fricative_to_stop = {['β']='b', ['ð']='d', ['ʝ']='ɟ', ['ɣ']='ɡ'}
| |
| --lots of allophones going on
| |
| word = mw.ustring.gsub(word, '[bdɟɡ]', stop_to_fricative)
| |
| word = mw.ustring.gsub(
| |
| word,
| |
| '()([ˈˌ]?)([βðɣʝ])',
| |
| function (pos, stress, fricative)
| |
| -- Matching the character before the fricative in the pattern
| |
| -- doesn't work because sometimes there are two fricatives in
| |
| -- a row.
| |
| local before = pos > 1 and mw.ustring.sub(word, pos - 1, pos - 1)
| |
| -- mw.log(orig_word, before, stress, fricative)
| |
| if not before or (fricative == 'ɣ' or fricative == 'β') and ('mnɲ'):find(before)
| |
| or (fricative == 'ð' or fricative == 'ʝ') and ('lʎmnɲ'):find(before) then
| |
| return stress .. fricative_to_stop[fricative]
| |
| end -- else no change
| |
| end)
| |
| word = mw.ustring.gsub(word, '[td]', {['t']='t̪', ['d']='d̪'})
| |
| --nasal assimilation before consonants
| |
| local labiodental, dentialveolar, dental, alveolopalatal, palatal, velar =
| |
| 'ɱ', 'n̪', 'n̟', 'nʲ', 'ɲ', 'ŋ'
| |
| local nasal_assimilation = {
| |
| ['f'] = labiodental,
| |
| ['t'] = dentialveolar, ['d'] = dentialveolar,
| |
| ['θ'] = dental,
| |
| ['ʃ'] = alveolopalatal,
| |
| ['ɟ'] = palatal, ['ʎ'] = palatal,
| |
| ['k'] = velar, ['x'] = velar, ['ɡ'] = velar,
| |
| }
| |
|
| |
| word = mw.ustring.gsub(
| |
| word,
| |
| 'n([ˈˌ]?)(.)',
| |
| function (stress, following)
| |
| return (nasal_assimilation[following] or 'n') .. stress .. following
| |
| end)
| |
| --lateral assimilation before consonants
| |
| word = mw.ustring.gsub(
| |
| word,
| |
| 'l([ˈˌ]?)(.)',
| |
| function (stress, following)
| |
| local l = 'l'
| |
| if following == 't' or following == 'd' then -- dentialveolar
| |
| l = 'l̪'
| |
| elseif following == 'θ' then -- dental
| |
| l = 'l̟'
| |
| elseif following == 'ʃ' then -- alveolopalatal
| |
| l = 'lʲ'
| |
| end
| |
| return l .. stress .. following
| |
| end)
| |
| --semivowels
| |
| word = mw.ustring.gsub(word, '([aeouãẽõũ][iïĩ])', '%1̯')
| |
| word = mw.ustring.gsub(word, '([aeioãẽĩõ][uũ])', '%1̯')
| |
| end
| |
|
| |
| table.insert(debug, word)
| |
|
| |
| word = mw.ustring.gsub(word, 'h', '') --silent "h"
| |
| word = mw.ustring.gsub(word, 'ʃ', 't͡ʃ') --fake "ch" to real "ch"
| |
| word = mw.ustring.gsub(word, 'ɟ', 'ɟ͡ʝ') --fake "y" to real "y"
| |
| word = mw.ustring.gsub(word, 'ï', 'i') --fake "y$" to real "y$"
| |
|
| |
| if do_debug == 'yes' then
| |
| return word .. table.concat(debug, "")
| |
| else
| |
| return word
| |
| end
| |
|
| |
|
| |
| function export.phonetic(frame)
| |
| return export.show(frame, false, true)
| |
| end
| |
|
| |
| function export.phoneticLatinAmerica(frame)
| |
| return export.show(frame, true, true)
| |
| end
| |
|
| |
| return export
| |