48,355
edits
Tag: Undo |
No edit summary |
||
| Line 3: | Line 3: | ||
local force_cat = false -- for testing | local force_cat = false -- for testing | ||
local pron_qualifier_module = "Module:pron qualifier" | local pron_qualifier_module = "Module:pron qualifier" | ||
local qualifier_module = "Module:qualifier" | local qualifier_module = "Module:qualifier" | ||
local references_module = "Module:references" | local references_module = "Module:references" | ||
local string_utilities_module = "Module:string utilities" | |||
local syllables_module = "Module:syllables" | local syllables_module = "Module:syllables" | ||
local utilities_module = "Module:utilities" | local utilities_module = "Module:utilities" | ||
local m_data = mw.loadData("Module:IPA/data") | |||
local m_str_utils = require(string_utilities_module) | |||
local m_syllables -- [[Module:syllables]]; loaded below if needed | local m_syllables -- [[Module:syllables]]; loaded below if needed | ||
local m_symbols = mw.loadData("Module:IPA/data/symbols") | |||
local concat = table.concat | local concat = table.concat | ||
local decode_entities = m_str_utils.decode_entities | |||
local find = string.find | local find = string.find | ||
local gmatch = m_str_utils.gmatch | local gmatch = m_str_utils.gmatch | ||
| Line 23: | Line 26: | ||
local sub = string.sub | local sub = string.sub | ||
local u = m_str_utils.char | local u = m_str_utils.char | ||
local ugsub = m_str_utils.gsub | local ugsub = m_str_utils.gsub | ||
local umatch = m_str_utils.match | local umatch = m_str_utils.match | ||
| Line 113: | Line 115: | ||
prefix_text = '<span class="error">' .. err .. '</span>' | prefix_text = '<span class="error">' .. err .. '</span>' | ||
else | else | ||
prefix_text = langname .. " pronunciation" | if hasKey[lang:getCode()] then | ||
prefix_text = "Appendix:" .. langname .. " pronunciation" | |||
else | |||
prefix_text = "wikipedia:" .. langname .. " phonology" | |||
end | |||
prefix_text = "[[" .. prefix_text .. "|key]]" | prefix_text = "[[" .. prefix_text .. "|key]]" | ||
end | end | ||
| Line 155: | Line 161: | ||
local function determine_repr(pron) | local function determine_repr(pron) | ||
local | local reconstructed | ||
-- remove initial asterisk before representation marks, used on some Reconstruction pages | -- remove initial asterisk before representation marks, used on some Reconstruction pages | ||
if sub(pron, 1, 1) == "*" then | if sub(pron, 1, 1) == "*" then | ||
| Line 163: | Line 168: | ||
pron = sub(pron, 2) | pron = sub(pron, 2) | ||
end | end | ||
local | local opening = match(pron, "^.[\128-\191]*") | ||
local data = m_data.representation_types[opening] | |||
if data then | |||
local closing = data[2] | |||
if data and match(pron, closing .. "$", #opening + 1) then | |||
return data[1], opening, closing, reconstructed | |||
local | |||
if | |||
end | end | ||
end | end | ||
return | return nil, "", "", reconstructed | ||
end | end | ||
local function hasInvalidSeparators(transcription) | local function hasInvalidSeparators(transcription) | ||
if | if umatch(transcription, "%.[ˈˌ]") or umatch(transcription, "[ˈˌ][ .]") then | ||
return true | return true | ||
else | else | ||
| Line 339: | Line 331: | ||
local langcode = lang:getCode() | local langcode = lang:getCode() | ||
if m_data.langs_to_generate_syllable_count_categories[langcode] then | if m_data.langs_to_generate_syllable_count_categories[langcode] then | ||
local phonemic, phonetic = split_phonemic_phonetic(item.pron) | local phonemic, phonetic, use_it = split_phonemic_phonetic(item.pron) | ||
local | local repr = determine_repr(phonemic) | ||
if not phonetic then -- not a '/.../ [...]' combined pronunciation | if not phonetic then -- not a '/.../ [...]' combined pronunciation | ||
if m_data.langs_to_use_phonetic_notation[langcode] then | if m_data.langs_to_use_phonetic_notation[langcode] then | ||
use_it = repr == "phonetic" and phonemic or nil | use_it = repr == "phonetic" and phonemic or nil | ||
| Line 367: | Line 358: | ||
-- into its parts in order to process. | -- into its parts in order to process. | ||
if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then | if lang:getCode() == "en" and hasInvalidSeparators(item.pron) then | ||
insert(categories, "IPA | insert(categories, "English IPA pronunciations with invalid separators") | ||
end | end | ||
end | end | ||
| Line 383: | Line 374: | ||
]=] | ]=] | ||
local function format_one_IPA(lang, pron, err, categories) | local function format_one_IPA(lang, pron, err, categories) | ||
-- | -- Disallow wikilinks. | ||
if match(pron, "%[%[.-%]%]") then | |||
error("IPA input must not contain wikilinks.") | |||
end | end | ||
pron = decode_entities(pron) | |||
-- | -- Detect the type of transcription. | ||
local repr, opening, closing, reconstructed = determine_repr(pron) | |||
-- Strip any reconstruction asterisk and representation marks. | |||
pron = sub(pron, #opening + 1 + (reconstructed and 1 or 0), -#closing - 1) | |||
if not repr then | |||
insert(categories, "IPA pronunciations with invalid representation marks") | insert(categories, "IPA pronunciations with invalid representation marks") | ||
-- insert(err, "invalid representation marks") | -- insert(err, "invalid representation marks") | ||
| Line 419: | Line 397: | ||
-- Check for obsolete and nonstandard symbols | -- Check for obsolete and nonstandard symbols | ||
for | for _, symbol in ipairs(m_data.nonstandard) do | ||
local result | local result | ||
for nonstandard in gmatch(pron, symbol) do | for nonstandard in gmatch(pron, symbol) do | ||
| Line 442: | Line 420: | ||
3. bolding | 3. bolding | ||
4. italics | 4. italics | ||
5 | 5. asterisk at beginning of transcription | ||
6. comma followed by spacing characters | |||
7. superscripts enclosed in superscript parentheses ]] | |||
local found_HTML | local found_HTML | ||
local result = gsub( | local result = gsub(pron, "<(%a+)[^>]*>([^<]+)</%1>", | ||
function(tagName, content) | function(tagName, content) | ||
found_HTML = true | found_HTML = true | ||
| Line 454: | Line 431: | ||
result = gsub(result, "'''([^']*)'''", "%1") | result = gsub(result, "'''([^']*)'''", "%1") | ||
result = gsub(result, "''([^']*)''", "%1") | result = gsub(result, "''([^']*)''", "%1") | ||
result = gsub(result, "^%*", "") | result = gsub(result, "^%*", "") | ||
result = ugsub(result, ",%s+", "") | result = ugsub(result, ",%s+", "") | ||
| Line 466: | Line 442: | ||
pron = gsub(pron, vs15, "") | pron = gsub(pron, vs15, "") | ||
end | end | ||
pron = ugsub(pron, | pron = ugsub(pron, vs15_class, "%0" .. vs15) | ||
end | end | ||
| Line 505: | Line 481: | ||
end | end | ||
if repr == "phonemic" or repr == "rhyme" | if (repr == "phonemic" or repr == "rhyme") and lang and m_data.phonemes[lang:getCode()] then | ||
local valid_phonemes = m_data.phonemes[lang:getCode()] | |||
local rest = pron | |||
local phonemes = {} | |||
while #rest > 0 do | |||
local longestmatch, longestmatch_len = "", 0 | |||
local rest_init = sub(rest, 1, 1) | |||
if rest_init == "(" or rest_init == ")" then | |||
longestmatch = rest_init | |||
longestmatch_len = 1 | |||
else | |||
for _, phoneme in ipairs(valid_phonemes) do | |||
local phoneme_len = len(phoneme) | |||
if phoneme_len > longestmatch_len and usub(rest, 1, phoneme_len) == phoneme then | |||
longestmatch = phoneme | |||
longestmatch_len = len(longestmatch) | |||
end | end | ||
end | end | ||
end | |||
if longestmatch_len > 0 then | |||
insert(phonemes, longestmatch) | |||
rest = usub(rest, longestmatch_len + 1) | |||
else | |||
local phoneme = usub(rest, 1, 1) | |||
insert(phonemes, "<span style=\"color: red\">" .. phoneme .. "</span>") | |||
rest = usub(rest, 2) | |||
insert(categories, "IPA pronunciations with invalid phonemes/" .. lang:getCode()) | |||
end | end | ||
end | end | ||
pron = concat(phonemes) | |||
pron = | |||
end | end | ||
return pron | return (reconstructed and "*" or "") .. opening .. pron .. closing | ||
end | end | ||