Module:IPA/data: Difference between revisions

(One intermediate revision by the same user not shown)

Line 1:

local list_to_set = require("Module:table").listToSet

local data = {}

--[=[

A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic),

given as a table. The key is the opening character, the first value the

representation type, and the second value the closing symbol.]=]

data.representation_types = {

["/"] = {"phonemic", "/"},

["["] = {"phonetic", "]"},

["⫽"] = {"morphophonemic", "⫽"},

["⟨"] = {"orthographic", "⟩"},

["-"] = {"rhyme", ""},

}

--[=[

A list of convenience inputs for certain representation types. The key is the

opening character, and the table is a three-item array consisting of (1) an

mw.ustring.gsub pattern which is anchored to the start and end of the string,

with a single capture group that excludes the characters to be substituted,

(2) a corresponding replacement pattern to be used with the pattern, and (3) the

replacement opening character.]=]

data.representation_subs = {

["<"] = {"^<(.*)>$", "⟨%1⟩", "⟨"},

["/"] = {"^//(.*)//$", "⫽%1⫽", "⫽"},

}

--[=[

Line 7:

Line 33:

the generated pronunciation links to such pages; for other languages, it links

to the "LANG phonology" page in Wikipedia (which may or may not exist).

[[Module:IPA]] is responsible for this linking; see format_IPA_full().

[[Module:IPA]] is responsible for this linking; see format_IPA_full().]=]

]=]

data.langs_with_infopages = list_to_set{

~~local~~ langs_with_infopages = {

"acw",

"ady",

Line 41:

Line 66:

"ga",

"gd",

"gmh",

"gmw-msc",

"got",

"he",

Line 47:

Line 74:

"hu",

"hy",

"id",

"ii",

"is",

Line 107:

Line 134:

"wlm",

"yi",

"yrl",

"yue",

"zlw-mas"

}

~~data.langs_with_infopages = {}~~

~~-- Convert the list in `langs_with_infopages` to a set.~~

~~for _, langcode in ipairs(langs_with_infopages) do~~

~~data.langs_with_infopages[langcode] = true~~

~~end~~

--[=[

Line 133:

Line 154:

NOTE: There are some additional languages that have these categories.

For example:

* Thai words have these categories added by [[Module:th-pron]].

* Thai words have these categories added by [[Module:th-pron]].]=]

]=]

data.diphthongs = {

["cs"] = { -- [[w:Czech phonology#Diphthongs]]

Line 153:

Line 173:

"[aʌ][ʊɪ]ə", -- May be a disyllabic sequence in some or all dialects?

},

["eo"] = {

"[aeiou][iu]̯",

},

["grc"] = {

"[aeyo]i",

Line 171:

Line 194:

"[aeɛoɔu]i",

"[aeɛioɔ]u",

},

~~["la"] = {~~

~~"[eaou]i",~~

~~"[eao]u",~~

~~"[ao]e",~~

},

["lb"] = {

Line 188:

Line 206:

"LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be

generated. Do not list languages here if they have an entry above under

`data.diphthongs`; such languages are automatically added to this list.

`data.diphthongs`; such languages are automatically added to this list.]=]

]=]

local langs_to_generate_syllable_count_categories = list_to_set{

local langs_to_generate_syllable_count_categories = {

"ar", -- Arabic has diphthongs, but they are transcribed

-- with semivowel symbols.

"ary", -- Moroccan Arabic has diphthongs, but they are transcribed

-- with semivowel symbols.

"bg", -- Bulgarian has diphthongs with /j/ and marginally with /w/,

-- but these are semivowels.

"ca", -- Catalan has diphthongs, but they are generally transcribed using

-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]].

"es", -- Spanish has diphthongs, but they are transcribed with i̯ etc.

"eu", -- Basque has dipthongs, but they are transcribed with i̯ and u̯.

"fi", -- Finnish has diphthongs, but they are now automatically transcribed with

-- the nonsyllabic diacritic

"fr", -- French has diphthongs, but they are transcribed

-- with semivowel symbols: [[w:French phonology#Glides and diphthongs]].

"id", -- Indonesian has diphthongs, but they are transcribed with i̯ or /j/ etc.

"hnn",

"id", -- Indonesian has diphthongs, but they are transcribed with i̯ or /j/ etc.

"ka",

"kmr",

"kne",

"kmr",

"ku",

"la", -- All diphthongs transcribed with e̯ or /j/ etc.

"mk",

"ms", -- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.

"mt", -- Maltese has diphthongs, but they are transcribed

-- with semivowel symbols.

"pl", -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.

"pt", -- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.

"rsk", -- No diphthongs but there are sequences of vowel and /j/ or /w/.

"ru", -- No diphthongs, properly speaking; sequences of a vowel and /j/ though.

"sk", -- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.

"sl", -- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though

"sq", -- [[w:Albanian language#Vowels]] doesn't mention anything about diphthongs.

"tl", -- Tagalog has diphthongs, but they are transcribed with i̯ or /j/ etc.

"tl", -- Tagalog has diphthongs, but they are transcribed with i̯ or /j/ etc

"tsg",

"ug", -- No diphthongs.

}

~~data.langs_to_generate_syllable_count_categories = {}~~

~~-- Convert the list in `langs_to_generate_syllable_count_categories` to a set.~~

~~for _, langcode in ipairs(langs_to_generate_syllable_count_categories) do~~

~~data.langs_to_generate_syllable_count_categories[langcode] = true~~

~~end~~

-- Also add languages listed under `data.diphthongs`.

for langcode, _ in pairs(data.diphthongs) do

~~data.~~langs_to_generate_syllable_count_categories[langcode] = true

langs_to_generate_syllable_count_categories[langcode] = true

end

data.langs_to_generate_syllable_count_categories = langs_to_generate_syllable_count_categories

-- Languages to use the phonetic not phonemic notation to compute syllables counts.

~~local~~ langs_to_use_phonetic_notation = {

data.langs_to_use_phonetic_notation = list_to_set{

"bg",

"es",

"id",

"la",

"mk",

"ms",

"rsk",

"ru",

}

~~data.langs_to_use_phonetic_notation = {}~~

~~-- Convert the list in `langs_to_use_phonetic_notation` to a set.~~

~~for _, langcode in ipairs(langs_to_use_phonetic_notation) do~~

~~data.langs_to_use_phonetic_notation[langcode] = true~~

~~end~~

-- Non-standard or obsolete IPA symbols.

Line 252:

Line 267:

so we can't put them in the line below. ]]

"ɑ̢", "ɔ̗", "ɔ̖",

"[?~~ƍσƺƪƞƛłščžǰǧǯẋᵻᵿⱻʚω∅ØȣᴀᴇⱻQKPT~~]"

"[?ƍσƺƪƞƛłščžǰǧǯẋⱻʚω∅ØȣᴀᴇⱻQKPT]"

}

Line 278:

Line 293:

"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",

"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",

"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",

"u", "u̯", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",

"ˈ", ".", " ", "-",

}

@@ Line 1: / Line 1: @@
+local list_to_set = require("Module:table").listToSet
 local data = {}
+--[=[
+A list of representation types (e.g. /foo/ for phonemic and [bar] for phonetic),
+given as a table. The key is the opening character, the first value the
+representation type, and the second value the closing symbol.]=]
+data.representation_types = {
+	["/"] = {"phonemic", "/"},
+	["["] = {"phonetic", "]"},
+	["⫽"] = {"morphophonemic", "⫽"},
+	["⟨"] = {"orthographic", "⟩"},
+	["-"] = {"rhyme", ""},
+}
+--[=[
+A list of convenience inputs for certain representation types. The key is the
+opening character, and the table is a three-item array consisting of (1) an
+mw.ustring.gsub pattern which is anchored to the start and end of the string,
+with a single capture group that excludes the characters to be substituted,
+(2) a corresponding replacement pattern to be used with the pattern, and (3) the
+replacement opening character.]=]
+data.representation_subs = {
+	["<"] = {"^<(.*)>$", "⟨%1⟩", "⟨"},
+	["/"] = {"^//(.*)//$", "⫽%1⫽", "⫽"},
+}
 --[=[
@@ Line 7: / Line 33: @@
 the generated pronunciation links to such pages; for other languages, it links
 to the "LANG phonology" page in Wikipedia (which may or may not exist).
-[[Module:IPA]] is responsible for this linking; see format_IPA_full().
+[[Module:IPA]] is responsible for this linking; see format_IPA_full().]=]
-]=]
+data.langs_with_infopages = list_to_set{
-local langs_with_infopages = {
 	"acw",
 	"ady",
@@ Line 41: / Line 66: @@
 	"ga",
 	"gd",
+    "gmh",
+    "gmw-msc",
 	"got",
 	"he",
@@ Line 47: / Line 74: @@
 	"hu",
 	"hy",
-    "id",
+	"id",
 	"ii",
 	"is",
@@ Line 107: / Line 134: @@
 	"wlm",
 	"yi",
+	"yrl",
 	"yue",
 	"zlw-mas"
 }
-data.langs_with_infopages = {}
--- Convert the list in `langs_with_infopages` to a set.
-for _, langcode in ipairs(langs_with_infopages) do
-	data.langs_with_infopages[langcode] = true
-end
 --[=[
@@ Line 133: / Line 154: @@
 NOTE: There are some additional languages that have these categories.
 For example:
-* Thai words have these categories added by [[Module:th-pron]].
+* Thai words have these categories added by [[Module:th-pron]].]=]
-]=]
 data.diphthongs = {
 	["cs"] = { -- [[w:Czech phonology#Diphthongs]]
@@ Line 153: / Line 173: @@
 		"[aʌ][ʊɪ]ə",	-- May be a disyllabic sequence in some or all dialects?
 		},
+	["eo"] = {
+		"[aeiou][iu]̯",
+	},
 	["grc"] = {
 		"[aeyo]i",
@@ Line 171: / Line 194: @@
 		"[aeɛoɔu]i",
 		"[aeɛioɔ]u",
-		},
-	["la"] = {
-		"[eaou]i",
-		"[eao]u",
-		"[ao]e",
 		},
 	["lb"] = {
@@ Line 188: / Line 206: @@
 "LANG #-syllable words", e.g. [[:Category:Russian 3-syllable words]], should be
 generated. Do not list languages here if they have an entry above under
-`data.diphthongs`; such languages are automatically added to this list.
+`data.diphthongs`; such languages are automatically added to this list.]=]
-]=]
+local langs_to_generate_syllable_count_categories = list_to_set{
-local langs_to_generate_syllable_count_categories = {
 	"ar",	-- Arabic has diphthongs, but they are transcribed
 			-- with semivowel symbols.
 	"ary",	-- Moroccan Arabic has diphthongs, but they are transcribed
 			-- with semivowel symbols.
+	"bg",   -- Bulgarian has diphthongs with /j/ and marginally with /w/,
+	        -- but these are semivowels.
 	"ca",	-- Catalan has diphthongs, but they are generally transcribed using
 			-- /w/ and /j/, so do not need to be listed (see [[w:Catalan language#Diphthongs and triphthongs]].
 	"es",	-- Spanish has diphthongs, but they are transcribed with i̯ etc.
+	"eu",   -- Basque has dipthongs, but they are transcribed with i̯ and u̯.
 	"fi",	-- Finnish has diphthongs, but they are now automatically transcribed with
 			-- the nonsyllabic diacritic
 	"fr",	-- French has diphthongs, but they are transcribed
 			-- with semivowel symbols: [[w:French phonology#Glides and diphthongs]].
-    "id",   -- Indonesian has diphthongs, but they are transcribed with i̯ or /j/ etc.
+	"hnn",
+	"id",	-- Indonesian has diphthongs, but they are transcribed with i̯ or /j/ etc.
 	"ka",
-    "kmr",
+	"kne",
+	"kmr",
 	"ku",
+	"la",	-- All diphthongs transcribed with e̯ or /j/ etc.
 	"mk",
-    "ms",   -- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.
+	"ms",	-- Malay has diphthongs, but they are transcribed with i̯ or /j/ etc.
-    "mt",	-- Maltese has diphthongs, but they are transcribed
+	"mt",	-- Maltese has diphthongs, but they are transcribed
 			-- with semivowel symbols.
-	"pl",   -- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.
+	"pl",	-- No diphthongs, properly speaking; sequences of a vowel and /w/ or /j/ though.
 	"pt",	-- Portuguese has diphthongs, but they are transcribed with i̯ or /j/ etc.
+	"rsk",	-- No diphthongs but there are sequences of vowel and /j/ or /w/.
 	"ru",	-- No diphthongs, properly speaking; sequences of a vowel and /j/ though.
 	"sk",	-- Slovak has rising diphthongs, /i̯e, i̯a, i̯u, u̯o/, which are probably always spelled with the nonsyllabic diacritic, so do not need to be listed.
 	"sl",	-- No diphthongs, properly speaking; sequences of a vowel, /j/ and /w/ though
 	"sq",	-- [[w:Albanian language#Vowels]] doesn't mention anything about diphthongs.
-    "tl",   -- Tagalog has diphthongs, but they are transcribed with i̯ or /j/ etc.
+	"tl",	-- Tagalog has diphthongs, but they are transcribed with i̯ or /j/ etc
+	"tsg",
 	"ug",	-- No diphthongs.
 }
-data.langs_to_generate_syllable_count_categories = {}
--- Convert the list in `langs_to_generate_syllable_count_categories` to a set.
-for _, langcode in ipairs(langs_to_generate_syllable_count_categories) do
-	data.langs_to_generate_syllable_count_categories[langcode] = true
-end
 -- Also add languages listed under `data.diphthongs`.
 for langcode, _ in pairs(data.diphthongs) do
-	data.langs_to_generate_syllable_count_categories[langcode] = true
+	langs_to_generate_syllable_count_categories[langcode] = true
 end
+data.langs_to_generate_syllable_count_categories = langs_to_generate_syllable_count_categories
 -- Languages to use the phonetic not phonemic notation to compute syllables counts.
-local langs_to_use_phonetic_notation = {
+data.langs_to_use_phonetic_notation = list_to_set{
+    "bg",
 	"es",
+	"id",
+	"la",
 	"mk",
+	"ms",
+	"rsk",
 	"ru",
 }
-data.langs_to_use_phonetic_notation = {}
--- Convert the list in `langs_to_use_phonetic_notation` to a set.
-for _, langcode in ipairs(langs_to_use_phonetic_notation) do
-	data.langs_to_use_phonetic_notation[langcode] = true
-end
 -- Non-standard or obsolete IPA symbols.
@@ Line 252: / Line 267: @@
 			so we can't put them in the line below.		]]
 	"ɑ̢", "ɔ̗", "ɔ̖",
-	"[?ƍσƺƪƞƛłščžǰǧǯẋᵻᵿⱻʚω∅ØȣᴀᴇⱻQKPT]"
+	"[?ƍσƺƪƞƛłščžǰǧǯẋⱻʚω∅ØȣᴀᴇⱻQKPT]"
 }
@@ Line 278: / Line 293: @@
 	"a", "b", "d", "d͡ʒ", "d͡z", "e", "f", "h", "i", "j", "k",
 	"l", "m", "n", "o", "p", "r", "s", "t", "t͡s", "t͡ʃ",
-	"u", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
+	"u", "u̯", "v", "w", "x", "z", "ɡ", "ʃ", "ʒ",
 	"ˈ", ".", " ", "-",
 }