48,404
edits
No edit summary |
No edit summary |
||
| Line 5: | Line 5: | ||
--[=[ | --[=[ | ||
A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic), | A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic), | ||
given as a table. The key is the opening | given as a table. The key is the opening character, the first value the | ||
representation type, and the second value the closing symbol. | representation type, and the second value the closing symbol.]=] | ||
]=] | |||
data.representation_types = { | data.representation_types = { | ||
["/"] = {"phonemic", "/"}, | ["/"] = {"phonemic", "/"}, | ||
| Line 14: | Line 13: | ||
["⟨"] = {"orthographic", "⟩"}, | ["⟨"] = {"orthographic", "⟩"}, | ||
["-"] = {"rhyme", ""}, | ["-"] = {"rhyme", ""}, | ||
} | |||
--[=[ | |||
A list of convenience inputs for certain representation types. The key is the | |||
opening character, and the table is a three-item array consisting of (1) an | |||
mw.ustring.gsub pattern which is anchored to the start and end of the string, | |||
with a single capture group that excludes the characters to be substituted, | |||
(2) a corresponding replacement pattern to be used with the pattern, and (3) the | |||
replacement opening character.]=] | |||
data.representation_subs = { | |||
["<"] = {"^<(.*)>$", "⟨%1⟩", "⟨"}, | |||
["/"] = {"^//(.*)//$", "⫽%1⫽", "⫽"}, | |||
} | } | ||
| Line 22: | Line 33: | ||
the generated pronunciation links to such pages; for other languages, it links | the generated pronunciation links to such pages; for other languages, it links | ||
to the "LANG phonology" page in Wikipedia (which may or may not exist). | to the "LANG phonology" page in Wikipedia (which may or may not exist). | ||
[[Module:IPA]] is responsible for this linking; see format_IPA_full(). | [[Module:IPA]] is responsible for this linking; see format_IPA_full().]=] | ||
]=] | |||
data.langs_with_infopages = list_to_set{ | data.langs_with_infopages = list_to_set{ | ||
"acw", | "acw", | ||
| Line 56: | Line 66: | ||
"ga", | "ga", | ||
"gd", | "gd", | ||
"gmh", | |||
"gmw-msc", | |||
"got", | "got", | ||
"he", | "he", | ||
| Line 122: | Line 134: | ||
"wlm", | "wlm", | ||
"yi", | "yi", | ||
"yrl", | |||
"yue", | "yue", | ||
"zlw-mas" | "zlw-mas" | ||
| Line 141: | Line 154: | ||
NOTE: There are some additional languages that have these categories. | NOTE: There are some additional languages that have these categories. | ||
For example: | For example: | ||
* Thai words have these categories added by [[Module:th-pron]]. | * Thai words have these categories added by [[Module:th-pron]].]=] | ||
]=] | |||
data.diphthongs = { | data.diphthongs = { | ||
["cs"] = { -- [[w:Czech phonology#Diphthongs]] | ["cs"] = { -- [[w:Czech phonology#Diphthongs]] | ||
| Line 161: | Line 173: | ||
"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects? | "[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects? | ||
}, | }, | ||
["eo"] = { | |||
"[aeiou][iu]̯", | |||
}, | |||
["grc"] = { | ["grc"] = { | ||
"[aeyo]i", | "[aeyo]i", | ||
| Line 179: | Line 194: | ||
"[aeɛoɔu]i", | "[aeɛoɔu]i", | ||
"[aeɛioɔ]u", | "[aeɛioɔ]u", | ||
}, | }, | ||
["lb"] = { | ["lb"] = { | ||
| Line 196: | Line 206: | ||
"LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be | "LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be | ||
generated. Do not list languages here if they have an entry above under | generated. Do not list languages here if they have an entry above under | ||
`data.diphthongs`; such languages are automatically added to this list. | `data.diphthongs`; such languages are automatically added to this list.]=] | ||
]=] | |||
local langs_to_generate_syllable_count_categories = list_to_set{ | local langs_to_generate_syllable_count_categories = list_to_set{ | ||
"ar", -- Arabic has diphthongs, but they are transcribed | "ar", -- Arabic has diphthongs, but they are transcribed | ||
| Line 203: | Line 212: | ||
"ary", -- Moroccan Arabic has diphthongs, but they are transcribed | "ary", -- Moroccan Arabic has diphthongs, but they are transcribed | ||
-- with semivowel symbols. | -- with semivowel symbols. | ||
"bg", -- Bulgarian has diphthongs with /j/ and marginally with /w/, | |||
-- but these are semivowels. | |||
"ca", -- Catalan has diphthongs, but they are generally transcribed using | "ca", -- Catalan has diphthongs, but they are generally transcribed using | ||
-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]]. | -- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]]. | ||
"es", -- Spanish has diphthongs, but they are transcribed with i̯ etc. | "es", -- Spanish has diphthongs, but they are transcribed with i̯ etc. | ||
"eu", -- Basque has dipthongs, but they are transcribed with i̯ and u̯. | |||
"fi", -- Finnish has diphthongs, but they are now automatically transcribed with | "fi", -- Finnish has diphthongs, but they are now automatically transcribed with | ||
-- the nonsyllabic diacritic | -- the nonsyllabic diacritic | ||
| Line 216: | Line 228: | ||
"kmr", | "kmr", | ||
"ku", | "ku", | ||
"la", -- All diphthongs transcribed with e̯ or /j/ etc. | |||
"mk", | "mk", | ||
"ms", -- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc. | "ms", -- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc. | ||
| Line 222: | Line 235: | ||
"pl", -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though. | "pl", -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though. | ||
"pt", -- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc. | "pt", -- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc. | ||
"rsk", -- No diphthongs but there are sequences of vowel and /j/ or /w/. | |||
"ru", -- No diphthongs, properly speaking; sequences of a vowel and /j/ though. | "ru", -- No diphthongs, properly speaking; sequences of a vowel and /j/ though. | ||
"sk", -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed. | "sk", -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed. | ||
| Line 238: | Line 252: | ||
-- Languages to use the phonetic not phonemic notation to compute syllables counts. | -- Languages to use the phonetic not phonemic notation to compute syllables counts. | ||
data.langs_to_use_phonetic_notation = list_to_set{ | data.langs_to_use_phonetic_notation = list_to_set{ | ||
"bg", | |||
"es", | "es", | ||
"id", | "id", | ||
"la", | |||
"mk", | "mk", | ||
"ms", | "ms", | ||
"rsk", | |||
"ru", | "ru", | ||
} | } | ||
| Line 250: | Line 267: | ||
so we can't put them in the line below. ]] | so we can't put them in the line below. ]] | ||
"ɑ̢", "ɔ̗", "ɔ̖", | "ɑ̢", "ɔ̗", "ɔ̖", | ||
"[? | "[?ƍσƺƪƞƛłščžǰǧǯẋⱻʚω∅ØȣᴀᴇⱻQKPT]" | ||
} | } | ||
| Line 276: | Line 293: | ||
"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k", | "a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k", | ||
"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ", | "l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ", | ||
"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ", | "u", "u̯", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ", | ||
"ˈ", ".", " ", "-", | "ˈ", ".", " ", "-", | ||
} | } | ||