Module:grc-utilities/data: Difference between revisions

Created page with "local data = {} local U = mw.ustring.char local macron = U(0x304) local spacing_macron = U(0xAF) local modifier_macron = U(0x2C9) local breve = U(0x306) local spacing_breve =..."
 
No edit summary
 
Line 1: Line 1:
local data = {}
local data = {}


local U = mw.ustring.char
local concat = table.concat
local insert = table.insert
 
local U = require("Module:string/char")
local macron = U(0x304)
local macron = U(0x304)
local spacing_macron = U(0xAF)
local spacing_macron = U(0xAF)
Line 16: Line 19:
local coronis = U(0x343)
local coronis = U(0x343)
local subscript = U(0x345)
local subscript = U(0x345)
local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below"
local undertie = U(0x35C) -- actually "combining double breve below"


data["diacritics"] = {
data["diacritics"] = {
Line 35: Line 38:
}
}


data.diacritics.all = ""
local diacritics_all, diacritics_combining = {}, {}
for name, diacritic in pairs(data.diacritics) do
for _, diacritic in pairs(data.diacritics) do
data.diacritics.all = data.diacritics.all .. diacritic
insert(diacritics_all, diacritic)
if not (diacritic == spacing_macron or diacritic == modifier_macron or diacritic == spacing_breve) then
insert(diacritics_combining, diacritic)
end
end
end
diacritics_all = concat(diacritics_all)
data.diacritics.all = diacritics_all
diacritics_combining = concat(diacritics_combining)
data.diacritics.combining = diacritics_combining


data["named"] = data["diacritics"]
data["named"] = data["diacritics"]


data["diacritic"] = "[" .. data.diacritics.all .. "]"
data["diacritic"] = "[" .. diacritics_all .. "]"
data["combining_diacritic"] = "[" .. diacritics_combining .. "]"
data["all"] = data["diacritic"]
data["all"] = data["diacritic"]


Line 53: Line 64:
data["groups"] = data["diacritic_groups"]
data["groups"] = data["diacritic_groups"]
data["diacritic_groups"]["accents"] = data["groups"][3]
data["diacritic_groups"]["accents"] = data["groups"][3]
data["length"] = {
optional = macron .. "?" .. breve .. "?"
}
data["length"]["mandatory"] = "%f[" .. macron .. breve .. "]" .. data["length"]["optional"]


data["diacritic_order"] = {
data["diacritic_order"] = {
[macron] = 1,
[macron] = 1,
[breve] = 1,
[breve] = 2,
[rough] = 2,
[rough] = 3,
[smooth] = 2,
[smooth] = 3,
[diaeresis] = 2,
[diaeresis] = 3,
[acute] = 3,
[acute] = 4,
[grave] = 3,
[grave] = 4,
[circum] = 3,
[circum] = 4,
[subscript] = 4,
[subscript] = 5,
}
}


Line 75: Line 91:
["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
["ʼ"] = smooth,
["ʼ"] = smooth,
[coronis] = smooth,
["´"] = acute, -- acute
["´"] = acute, -- acute
["`"] = grave, -- grave
["`"] = grave, -- grave
Line 81: Line 96:
["ˆ"] = circum,
["ˆ"] = circum,
[Latin_circum] = circum,
[Latin_circum] = circum,
["῎"] = smooth ..  acute, -- smooth and acute
["῍"] = smooth ..  grave, -- smooth and grave
["῏"] = smooth ..  circum, -- smooth and circumflex
["῞"] = rough ..  acute, -- rough and acute
["῝"] = rough ..  grave, -- rough and grave
["῟"] = rough ..  circum, -- rough and circumflex
["¨"] = diaeresis,
["¨"] = diaeresis,
["΅"] = diaeresis ..  acute,
["῭"] = diaeresis ..  grave,
["῁"] = diaeresis ..  circum,
}
}
data["conversions"] = data["diacritical_conversions"]


data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ"
-- Canonical letter forms
data["canonical"] = {
["ϴ"] = "Θ",
["Ϗ"] = "Καί",
["Ϗ̀"] = "Καὶ",
["Ϟ"] = "Ϙ",
["Ϲ"] = "Σ",
["ϒ"] = "Υ",
["ϓ"] = "Ύ",
["ϔ"] = "Ϋ",
["Ϡ"] = "Ͳ",
 
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϗ"] = "καί",
["ϗ̀"] = "καὶ",
["ϖ"] = "π",
["ϟ"] = "ϙ",
["ϱ"] = "ρ",
["ς"] = "σ",
["ϲ"] = "σ",
["ϕ"] = "φ",
["ϡ"] = "ͳ",
}
 
data["consonants"] = "ΒβΓγΔδϜϝͶͷϚϛΖζͰͱΘθͿϳΚκΛλΜμΝνΞξΠπϺϻϘϙϞϟΡρΣσςϹϲΤτΦφΧχΨψͲͳϠϡϷϸ"
data["consonant"] = "[" .. data.consonants .. "]"
data["consonant"] = "[" .. data.consonants .. "]"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowel"] = "[" .. data.vowels .. "]"
data["vowel"] = "[" .. data.vowels .. "]"
data["combining_diacritics"] = table.concat{
macron, breve,
rough, smooth, diaeresis,
acute, grave, circum,
subscript
}
data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]"


-- Basic letters with and without diacritics
-- Basic letters with and without diacritics
local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' ..
data.word_characters = "%*'’" .. require("Module:scripts").getByCode("Polyt"):getCharacters() .. diacritics_combining .. undertie
-- capital iota with oxia, normalized to capital iota with tonos if entered
-- literally in a string
U(0x1FDB) ..
'Ὶῠ-Ῥῲ-ῼ'
data.word_characters = letters_with_diacritics .. data.combining_diacritics ..
undertie
data.word_character = "[" .. data.word_characters .. "]"
data.word_character = "[" .. data.word_characters .. "]"


return data
return data