Module:ur-Arab-Deva
Documentation for this module may be created at Module:ur-Arab-Deva/doc
local U = require("Module:string/char")
local gsub = mw.ustring.gsub
local export = {}
local fatHatan = U(0x64B)
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local halant = U(0x94D)
local hri = U(0x93F)
local hri2 = U(0x907)
local diri = U(0x940)
local diri2 = U(0x908)
local hru = U(0x941)
local hru2 = U(0x909)
local diru = U(0x942)
local diru2 = U(0x90A)
local E = U(0x947)
local E2 = U(0x90F)
local AI = U(0x948)
local AI2 = U(0x910)
local O = U(0x94B)
local O2 = U(0x913)
local AU = U(0x94C)
local AU2 = U(0x914)
local A = U(0x905)
local LA = U(0x93E)
local ret = U(0x615)
local tashdid = U(0x651)
local jazm = "ْ"
local he = "ہ"
local consonants = "ببپتثجچحخدذرزژسشصضطعظغفقکگلࣇمنݨوہھٹڈڑںشؕ"
local consonantS = "ببپتثجچحخدذرزژسشصضطظغفقکگلࣇمنݨہٹڈڑںشؕ"
local consonantS2 = "ببپتثجچحخدذرزژسشصضطعظغفقکگلࣇمنݨیہٹڈڑشؕ"
local consonantS3 = "یببپتثجچحخدذرزژسشصضطعظغفقکگلࣇمنݨوہھٹڈڑشؕں"
local consonantS4 = "ببپتثجچحخدذرزژسشصضطعظغفقکگلࣇمنݨوھٹڈڑںشؕ"
local vowels = "ایئےۓوؤ"
local diacritics = "َُِّْ"
local ZZP = "َُِ"
local mapping = {
["آ"] = 'आ',
["ب"] = 'ब', ["پ"] = 'प', ["ت"] = 'त', ["ٹ"] = 'ट', ["ث"] = 'स',
["ج"] = 'ज', ["چ"] = 'च', ["ح"] = 'ह', ["خ"] = 'ख़',
["د"] = 'द', ["ڈ"] = 'ड', ["ذ"] = 'ज़', ["ر"] = 'र', ["ڑ"] = "ड़",
["ز"] = 'ज़',
["ژ"] = 'श़',
["س"] = 'स', ["ش"] = 'श', ["ݨ"] = 'ण', ["ص"] = 'स', ["ض"] = 'ज़',
["ط"] = 'त', ["ظ"] = 'ज़', ["غ"] = 'ग़', ["ف"] = 'फ़', ["ق"] = 'क़',
["ک"] = 'क', ["ك"] = 'क', ["گ"] = 'ग', ["ࣇ"] = 'ळ',
["ل"] = 'ल', ["م"] = 'म', ["ن"] = 'न', ["و"] = 'व', ["ہ"] = 'ह', ["ی"] = 'य',
["۔"] = "।",
["ں"] = 'ं',
["ع"] = 'अ',
["ء"] = '',
["ئ"] = '',
["ؤ"] = 'ओ',
["أ"] = '',
-- diacritics
[zabar] = "॑",
[zer] = "" .. hri .. "",
[pesh] = "" .. hru .. "",
[jazm] = "" .. halant .. "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "ला",
["ﷲ"] = "अल्लाह",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- numerals
["١"] = "१", ["٢"] = "२", ["٣"] = "३", ["٤"] = "४", ["٥"] = "५",
["٦"] = "६", ["٧"] = "७", ["٨"] = "८", ["٩"] = "९", ["٠"] = "०",
["۱"] = "१", ["۲"] = "२", ["۳"] = "३", ["۴"] = "४", ["۵"] = "५",
["۶"] = "६", ["۷"] = "७", ["۸"] = "८", ["۹"] = "९", ["۰"] = "०",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۓ"] = "-ये",
["ۂ"] = "-ए" -- he ye (in ezâfe)
}
local alif = 'ا'
local kzabar = 'ٰ'
local ain = 'ع'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local ye4 = "ۓ"
local vao = "و"
local he2 = "ۂ"
local aspirate = 'ھ'
local gunDia = '٘'
local noon = 'ن'
function export.tr(text, script)
text = gsub(text, 'وہ', "वह")
text = gsub(text, alif .. noon .. gunDia .. "", "ाँ")
text = gsub(text, '([' .. consonants .. '])' .. ye .. "ں", "%1ें")
text = gsub(text, '([' .. consonants .. '])' .. zer .. " ", "%1-ए-")
text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])', "%1" .. E .. "%2")
text = gsub(text, "ؤ" .. pesh, "ऊ")
text = gsub(text, alif .. ye2 .. '([' .. zabar .. ']?)' .. '([' .. consonants .. '])', "ाय%2")
text = gsub(text, "ش([" .. ZZP .. "])(ؕ)", "ष%1")
text = gsub(text, "شِؕی", "षी")
text = gsub(text, "شُؕو", "षू")
text = gsub(text, "شؕی", "षे")
text = gsub(text, "شَؕی", "षै")
text = gsub(text, "شؕو", "षो")
text = gsub(text, "شَؕو", "षौ")
-- medial/final consonants
text = gsub(text, zabar .. he .. alif, "हा")
text = gsub(text, zer .. he .. alif , "िहा")
text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ह%1")
text = gsub(text, zabar .. he .. zer .. ye, "ही")
text = gsub(text, zabar .. he .. alif, "हा")
text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ह%1")
text = gsub(text, '([' .. consonants .. '])' .. alif, "%1" .. LA .. "")
text = gsub(text, '([' .. consonants .. '])' .. kzabar, "%1" .. LA .. "")
text = gsub(text, '([' .. vowels .. '])' .. kzabar, "" .. LA .. "")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1" .. halant .. "%1" .. LA .. "")
-- tanween diacritic
text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1न")
text = gsub(text, alif .. 'ً', "न")
text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1न")
text = gsub(text, '([' .. consonants .. '])' .. vao, "%1" .. O .. "")
text = gsub(text, '([' .. consonants .. '])' .. ye, "%1" .. diri .. "")
text = gsub(text, '([' .. consonants .. '])' .. ye3, "%1" .. E .. "")
text = gsub(text, '([' .. consonants .. '])' .. tashdid .. vao, "%1" .. halant .. "%1वो")
text = gsub(text, alif .. '([' .. consonants .. '])', "अ%1")
text = gsub(text, alif .. zabar .. '([' .. consonantS .. '])', "अ%1")
text = gsub(text, alif .. zer .. '([' .. consonants .. '])', "" .. hri2 .. "%1")
text = gsub(text, alif .. pesh .. '([' .. consonantS .. '])', "" .. hru2 .. "%1")
text = gsub(text, alif .. zabar .. vao .. '([' .. consonants .. '])', "" .. AU2 .. "%1")
text = gsub(text, alif .. zabar .. ye .. '([' .. consonants .. '])', "" .. AI2 .. "%1")
text = gsub(text, alif .. ye .. '([' .. consonants .. '])', "" .. E2 .. "%1")
text = gsub(text, alif .. vao .. '([' .. consonants .. '])', "" .. O2 .. "%1")
text = gsub(text, zabar .. ye, "" .. AI .. "")
text = gsub(text, alif .. zer .. ye .. '([' .. consonants .. '])', "" .. diri2 .. "%1")
text = gsub(text, alif .. pesh .. vao .. '([' .. consonantS .. '])', "" .. diru2 .. "%1")
-- ‘ain
text = gsub(text, '([' .. consonants .. '])' .. zabar .. ain .. zabar, "%1ा")
text = gsub(text, '([' .. consonants .. '])' .. ain .. zabar .. he, "%1" .. LA .. "")
text = gsub(text, ain .. alif .. ain, "आ")
text = gsub(text, alif .. ain .. '([' .. consonants .. '])', "" .. AI2 .. "%1")
text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1अ")
text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2")
text = gsub(text, ain .. zabar .. vao .. '([' .. consonants .. '])', "औ%1")
text = gsub(text, ain .. zabar .. ye .. '([' .. consonants .. '])', "ऐ%1")
text = gsub(text, ain .. zer .. '([' .. consonants .. '])', "इ%1")
text = gsub(text, ain .. pesh .. '([' .. consonants .. '])', "उ%1")
text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ई%1")
text = gsub(text, ain .. pesh .. vao .. '([' .. consonantS .. '])', "ऊ%1")
text = gsub(text, ain .. jazm, "" .. LA .. "")
-- Tashdeed
text = gsub(text, '([' .. consonantS3 .. '])' .. tashdid, "%1" .. halant .. "%1")
text = gsub(text, '([' .. consonantS3 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1" .. halant .. "%1%2")
text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1य्य%2")
text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1व्व%2")
-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1" .. halant .. "%1%2")
text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])', "%1" .. E .. "%2")
text = gsub(text, ye2 .. ye, "ई")
text = gsub(text, ye2 .. 'ے', "ए")
text = gsub(text,'([' .. consonants .. '])' .. ye .. ye3, "%1" .. diri .. "ए")
text = gsub(text, alif .. ye2 .. '([' .. zabar .. ']?)' .. '([' .. consonants .. '])', "ाय%2")
-- do-chashme-he zabar, zer, pesh
text = gsub(text, "(ک)" .. '([' .. jazm .. ']?)' .. "([" .. ZZP .. "])" .. aspirate, "ख%3")
text = gsub(text, "(گ)" .. "([" .. ZZP .. "])" .. aspirate, "घ%2")
text = gsub(text, "(چ)" .. "([" .. ZZP .. "])" .. aspirate, "छ%2")
text = gsub(text, "(ج)" .. "([" .. ZZP .. "])" .. aspirate, "झ%2")
text = gsub(text, "(ٹ)" .. "([" .. ZZP .. "])" .. aspirate, "ठ%2")
text = gsub(text, "(ڈ)" .. "([" .. ZZP .. "])" .. aspirate, "ढ%2")
text = gsub(text, "(ت)" .. "([" .. ZZP .. "])" .. aspirate, "थ%2")
text = gsub(text, "(د)" .. "([" .. ZZP .. "])" .. aspirate, "ध%2")
text = gsub(text, "(پ)" .. "([" .. ZZP .. "])" .. aspirate, "फ%2")
text = gsub(text, "(ب)" .. "([" .. ZZP .. "])" .. aspirate, "भ%2")
text = gsub(text, "(ڑ)" .. "([" .. ZZP .. "])" .. aspirate, "ढ़%2")
text = gsub(text, "(م)" .. "([" .. ZZP .. "])" .. aspirate, "म्ह%2")
text = gsub(text, "(ن)" .. "([" .. ZZP .. "])" .. aspirate, "न्ह%2")
text = gsub(text, "(ل)" .. "([" .. ZZP .. "])" .. aspirate, "ल्ह%2")
-- e, instead of i
text = gsub(text, jazm .. '([' .. consonants .. '])' .. zer .. '([' .. consonantS4 .. '])', "्%1" .. E .. "%2")
-- diacritics
text = gsub(text, "([" .. consonants .. "])" .. zabar .. vao, "%1" .. AU .. "")
text = gsub(text, "([" .. consonants .. "])" .. zabar .. ye, "%1" .. AI .. "")
text = gsub(text, "([" .. consonants .. "])" .. zabar .. ye3, "%1" .. AI .. "")
text = gsub(text, alif .. zabar .. ye3, "" .. AI2 .. "")
text = gsub(text, alif .. ye3, "" .. E2 .. "")
text = gsub(text, "([" .. consonants .. "])" .. zer .. ye, "%1" .. diri .. "")
text = gsub(text, "([" .. consonantS2 .. "])" .. pesh .. vao, "%1" .. diru .. "")
-- final he + short vowel disregards the he and transliterates the vowel
--text = gsub(text, ye .. he , "ये")
text = gsub(text, zer .. ye .. alif, "िया")
text = gsub(text, ye .. ye3 , "ये")
text = gsub(text, ye .. vao , "यो")
text = gsub(text, ye .. ye , "यी")
text = gsub(text, vao .. alif , "वा")
text = gsub(text, vao .. vao , "वो")
--VAO alone
text = gsub(text, " و ", " ओ ")
text = gsub(text, "([" .. consonants .. "])" .. zabar .. he .. jazm , "%1ह")
text = gsub(text, zabar .. he .. "([" .. ZZP .. "])" , "ह%1")
text = gsub(text, '([' .. zabar .. '])' .. he, "ा")
text = gsub(text, '([' .. zabar .. '])' .. he2 .. " ", "ा-ए-")
---text = gsub(text, '([' .. zabar .. '])' .. ye4 .. " ", "ा-ये-")
text = gsub(text, '([' .. zer .. '])' .. he, "ि")
text = gsub(text, zabar .. he .. alif , "हा")
text = gsub(text, he .. alif , "हा")
text = gsub(text, zer .. he .. alif , "िहा")
text = mw.ustring.gsub(text, "([" .. consonants .. "]) .. he$", "%1े")
-- Fatha Majhool --
text = gsub(text, "([" .. consonants .. "])" .. zabar .. he .. jazm .. "([" .. ZZP .. "])" , "%1ह%2")
text = gsub(text, "ڃ", "ञ")
text = gsub(text, "ोا", "वा")
text = gsub(text, "ौا", "वा")
text = gsub(text, "ौा", "वा")
text = gsub(text, "ोा", "वा")
text = gsub(text, "ن٘", "ङ")
text = gsub(text, "ے", "े")
text = gsub(text, "ۂ ", "-ए-")
text = gsub(text, "ۓ ", "-ये-")
text = gsub(text, "ࣇ", "ऴ")
text = gsub(text, "کھ", "ख")
text = gsub(text, "گھ", "घ")
text = gsub(text, "چھ", "छ")
text = gsub(text, "جھ", "झ")
text = gsub(text, "ٹھ", "ठ")
text = gsub(text, "ڈھ", "ढ")
text = gsub(text, "تھ", "थ")
text = gsub(text, 'دھ', "ध")
text = gsub(text, "پھ", "फ")
text = gsub(text, "بھ", "भ")
text = gsub(text, "ڑھ", "ढ़")
text = gsub(text, "مھ", "म्ह")
text = gsub(text, "نھ", "न्ह")
text = gsub(text, "لھ", "ल्ह")
text = gsub(text, "نْ([کگجچٹڈتدن])" , "ं%1")
text = gsub(text, "ن٘([کگجچٹڈتدن])" , "ं%1")
text = gsub(text, "مْ([بپمو])" , "ं%1")
text = mw.ustring.gsub(text, '.', mapping)
text = mw.ustring.gsub(text, "ललह", "ल्लाह")
text = mw.ustring.gsub(text, "ا", "अ")
text = mw.ustring.gsub(text, 'ी॑ा', "िया")
text = mw.ustring.gsub(text, 'ी॑', "िय")
--text = mw.ustring.gsub(text, 'अअ', "आ")
text = mw.ustring.gsub(text, 'अै', "ऐ")
text = mw.ustring.gsub(text, 'अा', "आ")
text = mw.ustring.gsub(text, 'अौ', "औ")
text = mw.ustring.gsub(text, 'ुो', "ू")
text = mw.ustring.gsub(text, "ाि", "ाइ")
text = mw.ustring.gsub(text, '॑ि', "इ")
text = mw.ustring.gsub(text, '॑े', "ै")
text = mw.ustring.gsub(text, '॑ो', "ौ")
text = mw.ustring.gsub(text, '॑', "")
text = mw.ustring.gsub(text, 'िे', "ी")
text = mw.ustring.gsub(text, 'ीا', "िया")
text = mw.ustring.gsub(text, 'ोا', "वा")
text = mw.ustring.gsub(text, 'ौا', "वा")
text = mw.ustring.gsub(text, 'ौा', "वा")
text = mw.ustring.gsub(text, 'ोा', "वा")
text = mw.ustring.gsub(text, "ौ([॑िु])", "व%1")
text = mw.ustring.gsub(text, "ुु॑", "ुुव")
text = mw.ustring.gsub(text, 'ीआ', "िया")
text = mw.ustring.gsub(text, 'ीअ', "िय")
text = mw.ustring.gsub(text, 'यअ', "या")
text = mw.ustring.gsub(text, 'ाً', "न")
text = mw.ustring.gsub(text, "शؕ", "ष")
text = mw.ustring.gsub(text, "श्(ؕ)", "ष्")
text = mw.ustring.gsub(text, '+', "")
return text
end
return export