Module:grc-utilities/data: Difference between revisions

From Linguifex
Jump to navigation Jump to search
Created page with "local data = {} local U = mw.ustring.char local macron = U(0x304) local spacing_macron = U(0xAF) local modifier_macron = U(0x2C9) local breve = U(0x306) local spacing_breve =..."
 
No edit summary
 
Line 1: Line 1:
local data = {}
local data = {}


local U = mw.ustring.char
local concat = table.concat
local insert = table.insert
 
local U = require("Module:string/char")
local macron = U(0x304)
local macron = U(0x304)
local spacing_macron = U(0xAF)
local spacing_macron = U(0xAF)
Line 16: Line 19:
local coronis = U(0x343)
local coronis = U(0x343)
local subscript = U(0x345)
local subscript = U(0x345)
local undertie = mw.ustring.char(0x35C) -- actually "combining double breve below"
local undertie = U(0x35C) -- actually "combining double breve below"


data["diacritics"] = {
data["diacritics"] = {
Line 35: Line 38:
}
}


data.diacritics.all = ""
local diacritics_all, diacritics_combining = {}, {}
for name, diacritic in pairs(data.diacritics) do
for _, diacritic in pairs(data.diacritics) do
data.diacritics.all = data.diacritics.all .. diacritic
insert(diacritics_all, diacritic)
if not (diacritic == spacing_macron or diacritic == modifier_macron or diacritic == spacing_breve) then
insert(diacritics_combining, diacritic)
end
end
end
diacritics_all = concat(diacritics_all)
data.diacritics.all = diacritics_all
diacritics_combining = concat(diacritics_combining)
data.diacritics.combining = diacritics_combining


data["named"] = data["diacritics"]
data["named"] = data["diacritics"]


data["diacritic"] = "[" .. data.diacritics.all .. "]"
data["diacritic"] = "[" .. diacritics_all .. "]"
data["combining_diacritic"] = "[" .. diacritics_combining .. "]"
data["all"] = data["diacritic"]
data["all"] = data["diacritic"]


Line 53: Line 64:
data["groups"] = data["diacritic_groups"]
data["groups"] = data["diacritic_groups"]
data["diacritic_groups"]["accents"] = data["groups"][3]
data["diacritic_groups"]["accents"] = data["groups"][3]
data["length"] = {
optional = macron .. "?" .. breve .. "?"
}
data["length"]["mandatory"] = "%f[" .. macron .. breve .. "]" .. data["length"]["optional"]


data["diacritic_order"] = {
data["diacritic_order"] = {
[macron] = 1,
[macron] = 1,
[breve] = 1,
[breve] = 2,
[rough] = 2,
[rough] = 3,
[smooth] = 2,
[smooth] = 3,
[diaeresis] = 2,
[diaeresis] = 3,
[acute] = 3,
[acute] = 4,
[grave] = 3,
[grave] = 4,
[circum] = 3,
[circum] = 4,
[subscript] = 4,
[subscript] = 5,
}
}


Line 75: Line 91:
["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
["ʼ"] = smooth,
["ʼ"] = smooth,
[coronis] = smooth,
["´"] = acute, -- acute
["´"] = acute, -- acute
["`"] = grave, -- grave
["`"] = grave, -- grave
Line 81: Line 96:
["ˆ"] = circum,
["ˆ"] = circum,
[Latin_circum] = circum,
[Latin_circum] = circum,
["῎"] = smooth ..  acute, -- smooth and acute
["῍"] = smooth ..  grave, -- smooth and grave
["῏"] = smooth ..  circum, -- smooth and circumflex
["῞"] = rough ..  acute, -- rough and acute
["῝"] = rough ..  grave, -- rough and grave
["῟"] = rough ..  circum, -- rough and circumflex
["¨"] = diaeresis,
["¨"] = diaeresis,
["΅"] = diaeresis ..  acute,
["῭"] = diaeresis ..  grave,
["῁"] = diaeresis ..  circum,
}
}
data["conversions"] = data["diacritical_conversions"]


data["consonants"] = "ΒβΓγΔδΖζΘθΚκΛλΜμΝνΞξΠπΡρΣσςΤτΦφΧχΨψ"
-- Canonical letter forms
data["canonical"] = {
["ϴ"] = "Θ",
["Ϗ"] = "Καί",
["Ϗ̀"] = "Καὶ",
["Ϟ"] = "Ϙ",
["Ϲ"] = "Σ",
["ϒ"] = "Υ",
["ϓ"] = "Ύ",
["ϔ"] = "Ϋ",
["Ϡ"] = "Ͳ",
 
["ϐ"] = "β",
["ϵ"] = "ε",
["ϑ"] = "θ",
["ϰ"] = "κ",
["ϗ"] = "καί",
["ϗ̀"] = "καὶ",
["ϖ"] = "π",
["ϟ"] = "ϙ",
["ϱ"] = "ρ",
["ς"] = "σ",
["ϲ"] = "σ",
["ϕ"] = "φ",
["ϡ"] = "ͳ",
}
 
data["consonants"] = "ΒβΓγΔδϜϝͶͷϚϛΖζͰͱΘθͿϳΚκΛλΜμΝνΞξΠπϺϻϘϙϞϟΡρΣσςϹϲΤτΦφΧχΨψͲͳϠϡϷϸ"
data["consonant"] = "[" .. data.consonants .. "]"
data["consonant"] = "[" .. data.consonants .. "]"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowel"] = "[" .. data.vowels .. "]"
data["vowel"] = "[" .. data.vowels .. "]"
data["combining_diacritics"] = table.concat{
macron, breve,
rough, smooth, diaeresis,
acute, grave, circum,
subscript
}
data["combining_diacritic"] = "[" .. data.combining_diacritics .. "]"


-- Basic letters with and without diacritics
-- Basic letters with and without diacritics
local letters_with_diacritics = 'ΆΈ-ώϜϝἀ-ᾼῂ-ῌῐ-' ..
data.word_characters = "%*'’" .. require("Module:scripts").getByCode("Polyt"):getCharacters() .. diacritics_combining .. undertie
-- capital iota with oxia, normalized to capital iota with tonos if entered
-- literally in a string
U(0x1FDB) ..
'Ὶῠ-Ῥῲ-ῼ'
data.word_characters = letters_with_diacritics .. data.combining_diacritics ..
undertie
data.word_character = "[" .. data.word_characters .. "]"
data.word_character = "[" .. data.word_characters .. "]"


return data
return data

Latest revision as of 14:27, 7 May 2026



local data = {}

local concat = table.concat
local insert = table.insert

local U = require("Module:string/char")
local macron = U(0x304)
local spacing_macron = U(0xAF)
local modifier_macron = U(0x2C9)
local breve = U(0x306)
local spacing_breve = U(0x2D8)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circum = U(0x342)
local Latin_circum = U(0x302)
local coronis = U(0x343)
local subscript = U(0x345)
local undertie = U(0x35C) -- actually "combining double breve below"

data["diacritics"] = {
	["macron"] = macron,
	["spacing_macron"] = spacing_macron,
	["modifier_macron"] = modifier_macron,
	["breve"] = breve,
	["spacing_breve"] = spacing_breve,
	["rough"] = rough,
	["smooth"] = smooth,
	["diaeresis"] = diaeresis,
	["acute"] = acute,
	["grave"] = grave,
	["circum"] = circum,
	["Latin_circum"] = Latin_circum,
	["coronis"] = coronis,
	["subscript"] = subscript,
}

local diacritics_all, diacritics_combining = {}, {}
for _, diacritic in pairs(data.diacritics) do
	insert(diacritics_all, diacritic)
	if not (diacritic == spacing_macron or diacritic == modifier_macron or diacritic == spacing_breve) then
		insert(diacritics_combining, diacritic)
	end
end
diacritics_all = concat(diacritics_all)
data.diacritics.all = diacritics_all
diacritics_combining = concat(diacritics_combining)
data.diacritics.combining = diacritics_combining

data["named"] = data["diacritics"]

data["diacritic"] = "[" .. diacritics_all .. "]"
data["combining_diacritic"] = "[" .. diacritics_combining .. "]"
data["all"] = data["diacritic"]

data["diacritic_groups"] = {
	[1] = "[".. macron .. breve .."]",
	[2] = "[".. diaeresis .. smooth .. rough .."]",
	[3] = "[".. acute .. grave .. circum .. "]",
	[4] = subscript,
}
data["groups"] = data["diacritic_groups"]
data["diacritic_groups"]["accents"] = data["groups"][3]

data["length"] = {
	optional = macron .. "?" .. breve .. "?"
}
data["length"]["mandatory"] = "%f[" .. macron .. breve .. "]" .. data["length"]["optional"]

data["diacritic_order"] = {
	[macron] = 1,
	[breve] = 2,
	[rough] = 3,
	[smooth] = 3,
	[diaeresis] = 3,
	[acute] = 4,
	[grave] = 4,
	[circum] = 4,
	[subscript] = 5,
}

data["diacritical_conversions"] = {
	-- Convert spacing to combining diacritics
	[spacing_macron] = macron, -- macron
	[modifier_macron] = macron,
	[spacing_breve] = breve, -- breve
	["῾"] = rough, -- rough breathing, modifier letter reversed comma
	["ʽ"] = rough,
	["᾿"] = smooth, -- smooth breathing, modifier letter apostrophe, coronis, combining coronis
	["ʼ"] = smooth,
	["´"] = acute, -- acute
	["`"] = grave, -- grave
	["῀"] = circum, -- Greek circumflex (perispomeni), circumflex, combining circumflex
	["ˆ"] = circum,
	[Latin_circum] = circum,
	["¨"] = diaeresis,
}

-- Canonical letter forms
data["canonical"] = {
	["ϴ"] = "Θ",
	["Ϗ"] = "Καί",
	["Ϗ̀"] = "Καὶ",
	["Ϟ"] = "Ϙ",
	["Ϲ"] = "Σ",
	["ϒ"] = "Υ",
	["ϓ"] = "Ύ",
	["ϔ"] = "Ϋ",
	["Ϡ"] = "Ͳ",

	["ϐ"] = "β",
	["ϵ"] = "ε",
	["ϑ"] = "θ",
	["ϰ"] = "κ",
	["ϗ"] = "καί",
	["ϗ̀"] = "καὶ",
	["ϖ"] = "π",
	["ϟ"] = "ϙ",
	["ϱ"] = "ρ",
	["ς"] = "σ",
	["ϲ"] = "σ",
	["ϕ"] = "φ",
	["ϡ"] = "ͳ",
}

data["consonants"] = "ΒβΓγΔδϜϝͶͷϚϛΖζͰͱΘθͿϳΚκΛλΜμΝνΞξΠπϺϻϘϙϞϟΡρΣσςϹϲΤτΦφΧχΨψͲͳϠϡϷϸ"
data["consonant"] = "[" .. data.consonants .. "]"
data["vowels"] = "ΑαΕεΗηΙιΟοΥυΩω"
data["vowel"] = "[" .. data.vowels .. "]"

-- Basic letters with and without diacritics
data.word_characters = "%*'’" .. require("Module:scripts").getByCode("Polyt"):getCharacters() .. diacritics_combining .. undertie
data.word_character = "[" .. data.word_characters .. "]"

return data