Module:sa-utilities/translit/IAST-to-SLP1

From Linguifex
Revision as of 12:44, 21 April 2026 by Sware (talk | contribs) (1 revision imported)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:sa-utilities/translit/IAST-to-SLP1/doc

local export = {}

local gsub = string.gsub
local toNFC = mw.ustring.toNFC
local U = require("Module:string/char")
local ugsub = mw.ustring.gsub
local ulower = mw.ustring.lower

local acute = U(0x301) -- combining acute
local grave = U(0x300) -- combining grave

local composed_accents = {
	['á'] = 'a/', ['à'] = 'a\\',
	['í'] = 'i/', ['ì'] = 'i\\',
	['ú'] = 'u/', ['ù'] = 'u\\',
	['é'] = 'e/', ['è'] = 'e\\',
	['ó'] = 'o/', ['ò'] = 'o\\',
	[acute] = '/', [grave] = '\\',
}

local normalize_accents = {
	['á'] = '/', ['à'] = '\\',
}

local function change_accent(text)
	-- normalize accent placement
	text = ugsub(text, '([áà])([iu])', function(a, iu) return "a" .. iu .. normalize_accents[a] end)
	text = ugsub(text, '.', composed_accents)
	return text
end

local function detect_lateral_flap(text)
	-- convert jihvamuliya first
	text =  gsub(text, "x", "Z")
	-- an "ḷ" initially before a non-vowel or between two non-vowels vowel is assumed to be "x"
	text =  ugsub(text, "%f[aāiīuūeoṛṝḷḹ/\\]ḷ%f[^aāiīuūeoṛṝḷḹ]", "x")
	-- all others are assumed to be "L"
	text =  gsub(text, "ḷ", "L")
	return text
end

local tt = {
	[1] = {
		--consonants
		["kh"] = "K", ["gh"] = "G",
		["ch"] = "C", ["jh"] = "J",
		["ṭh"] = "W", ["ḍh"] = "Q",
		["th"] = "T", ["dh"] = "D",
		["ph"] = "P", ["bh"] = "B",
	},
	[2] = {
		-- vowels
		["ai"] = "E",
		["au"] = "O",
	},
	[3] = {
		-- chandrabindu
		['m̐'] = '~'
	},
	[4] = {
		
		--consonants
		["ṅ"] = "N",
		["ñ"] = "Y",
		["ṭ"] = "w", ["ḍ"] = "q", ["ṇ"] = "R",
		["ś"] = "S", ["ṣ"] = "z", ["ḻ"] = "L",		-- ["ḷ"] = "L"
		
		--vowels
		["ā"] = "A",
		["ī"] = "I",
		["ū"] = "U",
		["ï"] = "i",
		["ü"] = "u",
		["ṛ"] = "f",
		["ṝ"] = "F",
		-- ["ḷ"] = "x",
		["ḹ"] = "X", -- ["l̤"] = "X", FIXME: l̤ is two chars.
		
		-- avagraha
		["ʼ"] = "'", ["’"] = "'",
		
		--other
		["ṃ"] = "M", ["ṁ"] = "M",
		["ḥ"] = "H",
		["ẖ"] = "Z",								--	["x"] = "Z"
		["f"] = "V", ["ḫ"] = "V",
	},
}

function export.tr(text, lang, sc)
	-- TODO: normalise IAST variants first, and propagate it to displayed transliteration as well.
--	text = toNFC(ulower(text)) -- FIXME: lowercasing breaks conjugation tables, for some reason.
	text = toNFC(text)
	text = change_accent(text)
	text = detect_lateral_flap(text)
	text = ugsub(text, '[kgcjṭḍtdpb]h', tt[1])
	text = gsub(text, 'a[iu]', tt[2])
	text = gsub(text, 'm̐', tt[3])
	text = ugsub(text, '.', tt[4])
	return text
end

return export