Module:zle-ort-Arab-translit

Revision as of 15:02, 8 March 2026 by wikt>AshFox (Undid revision 89786228 by AshFox (talk))
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:zle-ort-Arab-translit/doc

local export = {}

local gsub = mw.ustring.gsub
local match = mw.ustring.match
local sub = mw.ustring.sub
local U = mw.ustring.char

-- Arabic diacritics
local fatha = U(0x64E)
local damma = U(0x64F)
local kasra = U(0x650)
local shadda = U(0x651)
local sukun = U(0x652)
local sup_alif = U(0x670)
local sub_alif = U(0x656)
local vowels = fatha..sup_alif..kasra..sub_alif..damma

-- Arabic and Arabic-Extended A
local range = string.format("%s-%s%s-%s", U(0x600), U(0x6FF), U(0x8A0), U(0x8FF))

-- Latin diacritics
local dot_above = U(0x0307)

local replacements = {
	-- vowels
	["آ"] = "ã", [sup_alif] = "ȧ", [sub_alif] = "i", [fatha] = "e",
	[kasra] = "i", [damma] = "u", [sukun] = "",
	-- consonants
	["ب"] = "b", ["و"] = "v", ["ه"] = "h", ["غ"] = "g", ["د"] = "d",
	["ج"] = "dž", ["ࢮ"] = "dzʹ", ["ژ"] = "ž", ["ض"] = "z", ["ظ"] = "ẓ",
	["ز"] = "zʹ", ["ذ"] = "ẓʹ", ["ي"] = "j", ["ى"] = "j", ["ق"] = "k", ["ك"] = "kʹ",
	["ل"] = "l", ["م"] = "m", ["ن"] = "n", ["پ"] = "p", ["ر"] = "r",
	["ص"] = "s", ["ث"] = "sʹ", ["س"] = "ṣʹ", ["ڛ"] = "s̱ʹ", ["ط"] = "t",
	["ت"] = "tʹ", ["ف"] = "f", ["ح"] = "x", ["خ"] = "x̣", ["ࢯ"] = "c",
	["چ"] = "č", ["ش"] = "š", ["ا"] = "", ["ع"] = "ʽ", ["ء"] = "ʼ", ["ـ"] = "",
	["ڬ"] = "g̣", ["ڭ"] = "nʹ", ["ة"] = "ṭʹ",
}

local long_vowels = {
	[fatha.."ا"] = "a",
	[fatha.."و"] = "o",
	[damma.."و"] = "ū",
	[fatha.."ي"] = "ā", [fatha.."ى"] = "ā",
	[kasra.."ي"] = "ī", [kasra.."ى"] = "ī",
	[sup_alif.."ي"] = "ā"..dot_above, [sup_alif.."ى"] = "ā"..dot_above,
	[sub_alif.."ي"] = "ī"..dot_above, [sub_alif.."ى"] = "ī"..dot_above,
}

function export.tr(text, lang, sc)
	-- Reorder the shadda before other harakat.
	text = gsub(text, "(["..vowels.."])"..shadda, shadda.."%1")
	-- The only three-character replacement, damma + waw + alif, is handled separately.
	text = gsub(text, damma.."وا", damma.."و"..dot_above)
	-- Convert long vowels.
	local match_diacritic = "["..vowels..sukun.."]"
	text = gsub(text, "(["..vowels.."][اويى])()", function (from, pos)
		-- Only convert if the long vowel letter has no diacritic on top of it.
		if not match(sub(text, pos, pos), match_diacritic) then
			return long_vowels[from]
		end
	end)
	-- Initial alif + fatha transcribed as [a].
	text = gsub(text, "%f["..range.."aoūāī"..dot_above.."]ا"..fatha, "a")
	-- Preceding alif palatises [l], [m] and [n].
	text = gsub(text, "ا([لمن])", "%1ʹ")
	text = gsub(text, ".", replacements)
	text = gsub(text, "(.)"..shadda, "%1%1")
	return text
end

return export