Module:chg-translit
Jump to navigation
Jump to search
Documentation for this module may be created at Module:chg-translit/doc
-- Author: Saam-andar
local export = {}
local m_str_utils = require("Module:string utilities")
local gcodepoint = m_str_utils.gcodepoint
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rmatch = m_str_utils.match
local rsplit = m_str_utils.split
local U = m_str_utils.char
local unpack = unpack or table.unpack -- Lua 5.2 compatibility
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local zwnj = U(0x200C) -- zero-width non-joiner
local alif_madda = U(0x622)
local alif_hamza_below = U(0x625)
local alif = U(0x627)
local taa_marbuta = U(0x629)
local laam = U(0x644)
local waaw = U(0x648)
local yaa = U(0x64A)
local fatHataan = U(0x64B)
local Dammataan = U(0x64C)
local kasrataan = U(0x64D)
local shadda = U(0x651)
local sukuun = U(0x652)
local dagger_alif = U(0x670)
local alif_wasl = U(0x671)
local fatha = U(0x64E)
local kasra = U(0x650)
local zamma = U(0x64F)
local highhmz = U(0x654)
local sukun = "ْ"
--local zwj = U(0x200D) -- zero-width joiner
local lrm = U(0x200E) -- left-to-right mark
local rlm = U(0x200F) -- right-to-left mark
local consonants = "بپتثجچحخدذرزژسشصضطظعغفقکگلمنوؤهیئء"
local diacritics = fatha .. kasra .. zamma .. highhmz .. sukun .. shadda .. dagger_alif .. fatHataan .. Dammataan .. kasrataan
-- mapping
local tt = {
["آ"] = "ʾā",
["ا"] = "ʾ",
["ب"] = "b",
["پ"] = "p",
["ت"] = "t",
["ث"] = "s̱",
["ج"] = "j",
["چ"] = "č",
["ح"] = "ḥ",
["خ"] = "x",
["د"] = "d",
["ذ"] = "ẕ",
["ر"] = "r",
["ز"] = "z",
["ژ"] = "ž",
["س"] = "s",
["ش"] = "š",
["ص"] = "ṣ",
["ض"] = "ż",
["ط"] = "ṭ",
["ظ"] = "ẓ",
["ع"] = "ʿ",
["غ"] = "ġ",
["ف"] = "f",
["ق"] = "q",
["ک"] = "k",
["گ"] = "g",
["ل"] = "l",
["م"] = "m",
["ن"] = "n",
["و"] = "w",
["ه"] = "h",
["ی"] = "y",
[taa_marbuta] = "t",
["ݣ"] = "ñ",
["ء"] = "ʾ",
["ئ"] = "ʾ",
["ؤ"] = "ʾ",
["أ"] = "ʾ",
["إ"] = "ʾ",
[zwnj] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lʾ",
["اللّٰه"] = "ʾllh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- alif_wasla
[alif_wasl] = "ʾ̃",
-- numerals
["۱"] = "1",
["۲"] = "2",
["۳"] = "3",
["۴"] = "4",
["۵"] = "5",
["۶"] = "6",
["۷"] = "7",
["۸"] = "8",
["۹"] = "9",
["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = "“", -- quotation mark
["»"] = "”", -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousan
}
-- Main function
function export.tr(text, lang, sc, options)
if not text or text == "" then
return nil
end
if type(text) == "table" then
local function f(x) return (x ~= "") and x or nil end
text, lang, sc = f(text.args[1]), f(text.args[2]), f(text.args[3])
options = text.args[4] and {} or nil
end
-- Only process if script is Arabic
if sc and sc ~= "Arab" then
return nil
end
-- Strip diacritics
text = rsubn(text, "[" .. diacritics .. "]", "")
text = rsubn(text, "([" .. consonants .. "]+)ا", "%1ā" )
text = rsubn(text, ".", function(char)
return tt[char] or char
end)
return text
end
return export