Module:IPA/data: Difference between revisions

Line 5:

--[=[

A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic),

given as a table. The key is the opening ~~symbol~~, the first value the

given as a table. The key is the opening character, the first value the

representation type, and the second value the closing symbol.

representation type, and the second value the closing symbol.]=]

]=]

data.representation_types = {

["/"] = {"phonemic", "/"},

Line 14:

Line 13:

["⟨"] = {"orthographic", "⟩"},

["-"] = {"rhyme", ""},

}

--[=[

A list of convenience inputs for certain representation types. The key is the

opening character, and the table is a three-item array consisting of (1) an

mw.ustring.gsub pattern which is anchored to the start and end of the string,

with a single capture group that excludes the characters to be substituted,

(2) a corresponding replacement pattern to be used with the pattern, and (3) the

replacement opening character.]=]

data.representation_subs = {

["<"] = {"^<(.*)>$", "⟨%1⟩", "⟨"},

["/"] = {"^//(.*)//$", "⫽%1⫽", "⫽"},

}

Line 22:

Line 33:

the generated pronunciation links to such pages; for other languages, it links

to the "LANG phonology" page in Wikipedia (which may or may not exist).

[[Module:IPA]] is responsible for this linking; see format_IPA_full().

[[Module:IPA]] is responsible for this linking; see format_IPA_full().]=]

]=]

data.langs_with_infopages = list_to_set{

"acw",

Line 56:

Line 66:

"ga",

"gd",

"gmh",

"gmw-msc",

"got",

"he",

Line 122:

Line 134:

"wlm",

"yi",

"yrl",

"yue",

"zlw-mas"

Line 141:

Line 154:

NOTE: There are some additional languages that have these categories.

For example:

* Thai words have these categories added by [[Module:th-pron]].

* Thai words have these categories added by [[Module:th-pron]].]=]

]=]

data.diphthongs = {

["cs"] = { -- [[w:Czech phonology#Diphthongs]]

Line 161:

Line 173:

"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects?

},

["eo"] = {

"[aeiou][iu]̯",

},

["grc"] = {

"[aeyo]i",

Line 179:

Line 194:

"[aeɛoɔu]i",

"[aeɛioɔ]u",

},

~~["la"] = {~~

~~"[eaou]i",~~

~~"[eao]u",~~

~~"[ao]e",~~

},

["lb"] = {

Line 196:

Line 206:

"LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be

generated. Do not list languages here if they have an entry above under

`data.diphthongs`; such languages are automatically added to this list.

`data.diphthongs`; such languages are automatically added to this list.]=]

]=]

local langs_to_generate_syllable_count_categories = list_to_set{

"ar", -- Arabic has diphthongs, but they are transcribed

Line 203:

Line 212:

"ary", -- Moroccan Arabic has diphthongs, but they are transcribed

-- with semivowel symbols.

"bg", -- Bulgarian has diphthongs with /j/ and marginally with /w/,

-- but these are semivowels.

"ca", -- Catalan has diphthongs, but they are generally transcribed using

-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]].

"es", -- Spanish has diphthongs, but they are transcribed with i̯ etc.

"eu", -- Basque has dipthongs, but they are transcribed with i̯ and u̯.

"fi", -- Finnish has diphthongs, but they are now automatically transcribed with

-- the nonsyllabic diacritic

Line 216:

Line 228:

"kmr",

"ku",

"la", -- All diphthongs transcribed with e̯ or /j/ etc.

"mk",

"ms", -- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.

Line 222:

Line 235:

"pl", -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.

"pt", -- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.

"rsk", -- No diphthongs but there are sequences of vowel and /j/ or /w/.

"ru", -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.

"sk", -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.

Line 238:

Line 252:

-- Languages to use the phonetic not phonemic notation to compute syllables counts.

data.langs_to_use_phonetic_notation = list_to_set{

"bg",

"es",

"id",

"la",

"mk",

"ms",

"rsk",

"ru",

}

Line 250:

Line 267:

so we can't put them in the line below. ]]

"ɑ̢", "ɔ̗", "ɔ̖",

"[?~~ƍσƺƪƞƛłščžǰǧǯẋᵻᵿⱻʚω∅ØȣᴀᴇⱻQKPT~~]"

"[?ƍσƺƪƞƛłščžǰǧǯẋⱻʚω∅ØȣᴀᴇⱻQKPT]"

}

Line 276:

Line 293:

"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",

"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",

"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",

"u", "u̯", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",

"ˈ", ".", " ", "-",

}

@@ Line 5: / Line 5: @@
 --[=[
 A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic),
-given as a table. The key is the opening symbol, the first value the
+given as a table. The key is the opening character, the first value the
-representation type, and the second value the closing symbol.
+representation type, and the second value the closing symbol.]=]
-]=]
 data.representation_types = {
 	["/"] = {"phonemic", "/"},
@@ Line 14: / Line 13: @@
 	["⟨"] = {"orthographic", "⟩"},
 	["-"] = {"rhyme", ""},
+}
+--[=[
+A list of convenience inputs for certain representation types. The key is the
+opening character, and the table is a three-item array consisting of (1) an
+mw.ustring.gsub pattern which is anchored to the start and end of the string,
+with a single capture group that excludes the characters to be substituted,
+(2) a corresponding replacement pattern to be used with the pattern, and (3) the
+replacement opening character.]=]
+data.representation_subs = {
+	["<"] = {"^<(.*)>$", "⟨%1⟩", "⟨"},
+	["/"] = {"^//(.*)//$", "⫽%1⫽", "⫽"},
 }
@@ Line 22: / Line 33: @@
 the generated pronunciation links to such pages; for other languages, it links
 to the "LANG phonology" page in Wikipedia (which may or may not exist).
-[[Module:IPA]] is responsible for this linking; see format_IPA_full().
+[[Module:IPA]] is responsible for this linking; see format_IPA_full().]=]
-]=]
 data.langs_with_infopages = list_to_set{
 	"acw",
@@ Line 56: / Line 66: @@
 	"ga",
 	"gd",
+    "gmh",
+    "gmw-msc",
 	"got",
 	"he",
@@ Line 122: / Line 134: @@
 	"wlm",
 	"yi",
+	"yrl",
 	"yue",
 	"zlw-mas"
@@ Line 141: / Line 154: @@
 NOTE: There are some additional languages that have these categories.
 For example:
-* Thai words have these categories added by [[Module:th-pron]].
+* Thai words have these categories added by [[Module:th-pron]].]=]
-]=]
 data.diphthongs = {
 	["cs"] = { -- [[w:Czech phonology#Diphthongs]]
@@ Line 161: / Line 173: @@
 		"[aʌ][ʊɪ]ə",	-- May be a disyllabic sequence in some or all dialects?
 		},
+	["eo"] = {
+		"[aeiou][iu]̯",
+	},
 	["grc"] = {
 		"[aeyo]i",
@@ Line 179: / Line 194: @@
 		"[aeɛoɔu]i",
 		"[aeɛioɔ]u",
-		},
-	["la"] = {
-		"[eaou]i",
-		"[eao]u",
-		"[ao]e",
 		},
 	["lb"] = {
@@ Line 196: / Line 206: @@
 "LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be
 generated. Do not list languages here if they have an entry above under
-`data.diphthongs`; such languages are automatically added to this list.
+`data.diphthongs`; such languages are automatically added to this list.]=]
-]=]
 local langs_to_generate_syllable_count_categories = list_to_set{
 	"ar",	-- Arabic has diphthongs, but they are transcribed
@@ Line 203: / Line 212: @@
 	"ary",	-- Moroccan Arabic has diphthongs, but they are transcribed
 			-- with semivowel symbols.
+	"bg",   -- Bulgarian has diphthongs with /j/ and marginally with /w/,
+	        -- but these are semivowels.
 	"ca",	-- Catalan has diphthongs, but they are generally transcribed using
 			-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]].
 	"es",	-- Spanish has diphthongs, but they are transcribed with i̯ etc.
+	"eu",   -- Basque has dipthongs, but they are transcribed with i̯ and u̯.
 	"fi",	-- Finnish has diphthongs, but they are now automatically transcribed with
 			-- the nonsyllabic diacritic
@@ Line 216: / Line 228: @@
 	"kmr",
 	"ku",
+	"la",	-- All diphthongs transcribed with e̯ or /j/ etc.
 	"mk",
 	"ms",	-- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.
@@ Line 222: / Line 235: @@
 	"pl",	-- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.
 	"pt",	-- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.
+	"rsk",	-- No diphthongs but there are sequences of vowel and /j/ or /w/.
 	"ru",	-- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
 	"sk",	-- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
@@ Line 238: / Line 252: @@
 -- Languages to use the phonetic not phonemic notation to compute syllables counts.
 data.langs_to_use_phonetic_notation = list_to_set{
+    "bg",
 	"es",
 	"id",
+	"la",
 	"mk",
 	"ms",
+	"rsk",
 	"ru",
 }
@@ Line 250: / Line 267: @@
 			so we can't put them in the line below.		]]
 	"ɑ̢", "ɔ̗", "ɔ̖",
-	"[?ƍσƺƪƞƛłščžǰǧǯẋᵻᵿⱻʚω∅ØȣᴀᴇⱻQKPT]"
+	"[?ƍσƺƪƞƛłščžǰǧǯẋⱻʚω∅ØȣᴀᴇⱻQKPT]"
 }
@@ Line 276: / Line 293: @@
 	"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",
 	"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",
-	"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
+	"u", "u̯", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
 	"ˈ", ".", " ", "-",
 }