Module:Mand-translit

From Linguifex
Jump to navigation Jump to search

This module will transliterate text in the Mandaic script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Mand-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

-- Author: Saam-andar

local export = {}

local m_str_utils = require("Module:string utilities")

local gcodepoint = m_str_utils.gcodepoint
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local U = m_str_utils.char
local unpack = unpack or table.unpack -- Lua 5.2 compatibility

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

-- Mandaic Unicode block: U+0840–U+085F
local zwnj = U(0x200C) -- zero-width non-joiner
local consonants = "ࡁࡂࡃࡄࡆࡇࡈࡊࡋࡌࡍࡎࡐࡑࡒࡓࡔࡕࡖࡗࡘ"
local consonant_pattern = "[" .. consonants .. "]"
local vowels1 = "ࡀࡅࡉ"
local letters = "ࡐࡇࡏࡅࡈࡕࡓࡖࡔࡒࡋࡊࡉࡄࡂࡗࡃࡎࡀࡌࡍࡁࡘࡑࡆ"
local gemination_mark = U(0x085B) -- ◌࡛ MANDAIC GEMINATION MARK
local affriction_mark = U(0x0859) -- ◌࡙ MANDAIC AFFRICTION MARK
local vocalization_mark = U(0x085A) -- ◌࡚  MANDAIC VOCALIZATION MARK

-- mapping
local tt = {
	-- consonants
	["ࡁ"] = "b",  -- MANDAIC LETTER AB (beth)
	["ࡂ"] = "g",  -- MANDAIC LETTER AG (gimel)
	["ࡃ"] = "d",  -- MANDAIC LETTER AD (daleth)
	["ࡄ"] = "h",  -- MANDAIC LETTER AH (he)
	["ࡆ"] = "z",  -- MANDAIC LETTER AZ (zayin)
	["ࡈ"] = "ṭ",  -- MANDAIC LETTER ATT (teth)
	["ࡊ"] = "k",  -- MANDAIC LETTER AK (kaph)
	["ࡋ"] = "l",  -- MANDAIC LETTER AL (lamedh)
	["ࡌ"] = "m",  -- MANDAIC LETTER AM (mem)
	["ࡍ"] = "n",  -- MANDAIC LETTER AN (nun)
	["ࡎ"] = "s",  -- MANDAIC LETTER AS (semkath)
	["ࡐ"] = "p",  -- MANDAIC LETTER AP (pe)
	["ࡑ"] = "ṣ",  -- MANDAIC LETTER ASZ (sadhe)
	["ࡒ"] = "q",  -- MANDAIC LETTER AQ (qoph)
	["ࡓ"] = "r",  -- MANDAIC LETTER AR (resh)
	["ࡔ"] = "š",  -- MANDAIC LETTER ASH (shin)
	["ࡕ"] = "t",  -- MANDAIC LETTER AT (taw)
	["ࡖ"] = "ḏ-", -- MANDAIC LETTER DUSHENNA
	["ࡗ"] = "kḏ", -- MANDAIC LETTER KAD
	["ࡘ"] = "ʕ", -- MANDAIC LETTER AIN
	
	-- Vowels
	["ࡀ"] = "a",  -- MANDAIC LETTER HALQA (aleph)
	["ࡅ"] = "u",  -- MANDAIC LETTER USHENNA (waw)
	["ࡉ"] = "i",  -- MANDAIC LETTER AKSA (yodh)
	["ࡏ"] = "ʿ",  -- MANDAIC LETTER IN (ayin)
	["ࡇ"] = "ẖ",  -- MANDAIC LETTER IT (heth)
	
	-- Punctuation
    ["࡞"] = ".",  -- MANDAIC PUNCTUATION
	["ـ"] = "-", -- tatweel/kashida
	["،"] = ",",
	["؛"] = ";",
	["؟"] = "?",
	["«"] = '"',
	["»"] = '"',
	
	[zwnj] = "-",
}

-- Main function
function export.tr(text, lang, sc, options)
	if not text or text == "" then
		return nil
	end
	
	if type(text) == "table" then
		local function f(x) return (x ~= "") and x or nil end
		text, lang, sc = f(text.args[1]), f(text.args[2]), f(text.args[3])
		options = text.args[4] and {} or nil
	end
	
	-- Only process if script is Mandaic
	if sc and sc ~= "Mand" then
		return nil
	end
	
	local preprocessing = {
		-- gemination
		{"([" .. consonants ..  "])" .. gemination_mark, "%1" .. U(0x0323)},
		
		{"([" .. consonants ..  "])" .. affriction_mark, "%1" .. U(0x0324)},
		
		{"([" .. vowels1 .. "])" .. vocalization_mark, "%1" .. U(0x0331)},
	}
	
	-- Apply preprocessing
	for _, sub in ipairs(preprocessing) do
		text = rsub(text, sub[1], sub[2])
	end
	
	text = rsubn(text, ".", function(char)
		return tt[char] or char
	end)
	
	return text
end

return export