Module:sd-Arab-translit
Jump to navigation
Jump to search
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate text in the Arabic script. It is also used to transliterate Kachchi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:sd-Arab-translit/testcases.
Functions
tr(text, lang, sc)- Transliterates a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the transliteration fails, returns
nil.
--- Taken from [[Module:pa-Arab-translit]]
local m_str_utils = require("Module:string utilities")
local U = m_str_utils.char
local gsub = m_str_utils.gsub
local export = {}
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local hamza = 'ء'
local alif = 'ا'
local vav = 'و'
local ye = 'ي'
local he = 'ه'
local nun = 'ن'
local aspirates = '[ڙمنڻجگلn]'
local diacritics = '[' .. zabar .. zer .. pesh .. jazm .. ']'
local diacritics_or_alif = '[' .. zabar .. zer .. pesh .. jazm .. alif .. ']'
local diacritics_or_matres = '[' .. zabar .. zer .. pesh .. jazm .. alif .. vav .. ye .. ']'
local not_diacritics = '[^' .. zabar .. zer .. pesh .. jazm .. ']'
local mapping = {
["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["ڀ"] = 'bh', ["ت"] = 't', ["ٿ"] = 'th',
["ٽ"] = 'ṭ', ["ٺ"] = 'ṭh', ["ث"] = 's̱', ["پ"] = 'p', ["ج"] = 'j', ["ڄ"] = 'j̄',
["ڃ"] = "ñ", ["چ"] = 'c', ["ڇ"] = 'ch', ["ح"] = 'ḥ', ["خ"] = 'x',
["د"] = 'd', ["ڌ"] = 'dh', ["ڏ"] = 'ḏ', ["ڊ"] = 'ḍ', ["ڍ"] = 'ḍh', ["ذ"] = 'ẕ',
["ر"] = 'r', ['ڙ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž', ["س"] = 's',
["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', ["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ',
["غ"] = 'ġ', ["ف"] = 'f', ["ڦ"] = 'ph', ["ق"] = 'q', ["ڪ"] = 'k', ["ک"] = 'kh',
["گ"] = 'g', ["ڳ"] = 'g̠', ["ڱ"] = 'ṅ', ["ل"] = 'l',
["م"] = 'm', ["ن"] = 'n', ["ڻ"] = 'ṇ',
["و"] = 'v', ["ہ"] = 'h', ["ي"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',
["۾"] = 'mẽ', ["۽"] = 'a͠i',
["ھ"] = "h", ["ه"] = "h",
["ؤ"] = "'o",
-- diacritics
[zabar] = "a",
[zer] = "i",
[pesh] = "u",
[jazm] = "", -- also sukun - no vowel
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "lā",
["ﷲ"] = "allāh",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- hamza
[hamza] = "", -- nothing
["ئ"] = "",
-- numerals
["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
-- punctuation (leave on separate lines)
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۓ"] = "-ye",
["ۀ"] = "h-e" -- he ye (in izafat)
}
function export.tr(text, lang, sc)
-- nun with diacritics / matres is consonant, else nasalisation
text = gsub(text, nun .. '(' .. diacritics_or_matres .. ')', 'n%1')
text = gsub(text, nun, '̃')
-- handle initial او (= o) so alif doesn't later become ā
text = gsub(text, alif .. vav, 'o')
-- alif
text = gsub(text, alif .. zabar, 'a')
text = gsub(text, alif .. zer, 'i')
text = gsub(text, alif .. pesh, 'u')
text = gsub(text, zabar .. alif, 'ā')
-- aspirates should have diacritics moved after
text = gsub(text, he .. '(' .. diacritics_or_alif .. ')', 'h%1')
text = gsub(text, he .. he .. '(' .. not_diacritics .. ')', he .. '%1')
text = gsub(text, he .. he .. '$', he)
text = gsub(text, '(' .. aspirates .. ')(' .. diacritics .. ')' .. he, '%1' .. he .. '%2')
-- vav + ye: DO VOWEL RULES FIRST (fixes لَوْنگُ)
text = gsub(text, zabar .. vav, 'au')
text = gsub(text, pesh .. vav, 'ū')
text = gsub(text, zabar .. ye, 'ai')
text = gsub(text, zer .. ye, 'ī')
-- extra consonant heuristics demanded by your tests
text = gsub(text, vav .. ye, 'v' .. ye) -- وي... -> ve...
text = gsub(text, ye .. vav .. '$', ye .. 'v') -- ...يو -> ...ev
-- now mark consonantal vav/ye when they have their own marks
text = gsub(text, vav .. '(' .. diacritics_or_alif .. ')', 'v%1')
text = gsub(text, ye .. '(' .. diacritics_or_alif .. ')', 'y%1')
-- default case
text = gsub(text, vav, 'o')
text = gsub(text, ye, 'e')
-- all rules
text = gsub(text, '.', mapping)
-- remaining alif
text = gsub(text, alif, 'ā')
return text
end
return export