Module:mr-Modi-translit
Jump to navigation
Jump to search
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Marathi language text. It is also used to transliterate Varhadi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:mr-Modi-translit/testcases.
Functions
tr(text, lang, sc)- Transliterates a given piece of
textwritten in the script specified by the codesc, and language specified by the codelang. - When the transliteration fails, returns
nil.
local export = {}
local u = require("Module:string/char")
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local ZWJ = u(0x200D)
local conv = {
-- consonants
['𑘎']='k', ['𑘏']='kh', ['𑘐']='g', ['𑘑']='gh', ['𑘒']='ṅ',
['𑘓']='c', ['𑘔']='ch', ['𑘕']='j', ['𑘖']='jh', ['𑘗']='ñ',
['𑘘']='ṭ', ['𑘙']='ṭh', ['𑘚']='ḍ', ['𑘛']='ḍh', ['𑘜']='ṇ',
['𑘝']='t', ['𑘞']='th', ['𑘟']='d', ['𑘠']='dh', ['𑘡']='n',
['𑘢']='p', ['𑘣']='ph', ['𑘤']='b', ['𑘥']='bh', ['𑘦']='m',
['𑘧']='y', ['𑘨']='r', ['𑘩']='l', ['𑘪']='v', ['𑘯']='ḷ',
['𑘫']='ś', ['𑘬']='ṣ', ['𑘭']='s', ['𑘮']='h',
['𑘨𑘿'..ZWJ] = 'r',
-- ['𑘕𑘿𑘗'] = 'dny',
-- vowel diacritics
---- only in script charts: ['𑘱'] = 'i', ['𑘴'] ='ū',
['𑘳'] = 'u', ['𑘹'] = 'e', ['𑘻'] = 'o',
['𑘰'] = 'ā', ['𑘲'] = 'ī',
['𑘵'] = 'ru',
['𑘺'] = 'ai', ['𑘼'] = 'au',
-- ['𑘰𑙀'] = 'ŏ',
['𑙀'] = 'ĕ',
-- vowel signs
---- only in script charts: ['𑘂'] = 'i', ['𑘅'] ='ū',
['𑘀'] = 'a', ['𑘄'] = 'u', ['𑘊'] = 'e', ['𑘌'] = 'o',
['𑘁'] = 'ā', ['𑘃'] = 'ī',
['𑘆'] = 'ŕ',
['𑘋'] = 'ai', ['𑘍'] = 'au',
['𑘁𑙀'] = 'ŏ',
['𑘀𑙀'] = 'ĕ', ['𑘊𑙀'] = 'ĕ',
['𑘌𑘦𑘿'] = 'om',
-- chandrabindu
--- ['𑙀𑘽'] = '̃',
-- anusvara
['𑘽'] = 'ṁ',
-- visarga
['𑘾'] = 'ḥ',
-- virama
['𑘿'] = '',
-- numerals
['𑙐'] = '0', ['𑙑'] = '1', ['𑙒'] = '2', ['𑙓'] = '3', ['𑙔'] = '4',
['𑙕'] = '5', ['𑙖'] = '6', ['𑙗'] = '7', ['𑙘'] = '8', ['𑙙'] = '9',
-- punctuation
['𑙁'] = '.', -- danda
['𑙂'] = '.', -- double danda
['+'] = '', -- compound separator
-- abbreviation sign
['𑙃'] = '.',
}
local nasal_assim = {
['𑘎'] = '𑘒', ['𑘏'] = '𑘒', ['𑘐'] = '𑘒', ['𑘑'] = '𑘒',
['𑘓'] = '𑘗', ['𑘔'] = '𑘗', ['𑘕'] = '𑘗', ['𑘖'] = '𑘗',
['𑘘'] = '𑘜', ['𑘙'] = '𑘜', ['𑘚'] = '𑘜', ['𑘛'] = '𑘜',
['𑘢'] = '𑘦', ['𑘣'] = '𑘦', ['𑘤'] = '𑘦', ['𑘥'] = '𑘦', ['𑘦'] = '𑘦',
['𑘧'] = 'i', ['𑘨'] = '𑘄', ['𑘩'] = '𑘩', ['𑘪'] = '𑘄',
['𑘫'] = '𑘄', ['𑘬'] = '𑘄', ['𑘭'] = '𑘄', ['𑘮'] = '𑘄',
}
local perm_cl = {
['𑘦𑘿𑘩'] = true, ['𑘪𑘿𑘩'] = true, ['𑘡𑘿𑘩'] = true,
}
local all_cons, special_cons = '𑘎𑘏𑘐𑘑𑘒𑘓𑘔𑘕𑘖𑘗𑘘𑘙𑘚𑘛𑘝𑘞𑘟𑘠𑘢𑘣𑘤𑘥𑘫𑘬𑘭𑘧𑘨𑘩𑘪𑘮𑘜𑘡𑘦𑘯', '𑘟𑘝𑘧𑘨𑘩𑘪𑘮𑘡𑘦'
local vowel, vowel_sign = '%*a𑘱𑘳𑘵𑘹𑘻𑘰𑘲𑘴𑘺𑘼𑙀', '𑘀𑘂𑘄𑘊𑘌𑘁𑘃𑘅𑘆𑘋𑘍𑘀𑙀'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])([' .. all_cons .. '])a([' .. all_cons .. '])([ं]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local char_array, i = {}, 1
for char in string.gmatch(text, "[%z\1-\127\194-\244][\128-\191]*") do -- UTF-8 character pattern
char_array[i] = char
i = i + 1
end
return table.concat(require("Module:table").reverse(char_array))
end
function export.tr(text, lang, sc)
-- text = gsub(text, 'ाँ', 'ॉ' .. 'ं')
-- text = gsub(text, 'ँ', 'ॅ' .. 'ं')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽 ', '%1𑘀 ')
text = gsub(text, '([^' .. vowel .. vowel_sign .. '])𑘽$', '%1𑘀')
text = gsub(text, '([' .. all_cons .. '])([' .. vowel .. '𑘿]?)', function(c, d)
return c .. (d == "" and 'a' or d) end)
for word in mw.ustring.gmatch(text, "[𑘀-𑙙a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(word, '^a([' .. all_cons .. '][' .. vowel .. vowel_sign .. '])', '%1')
while find(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(word, '(.?)𑘽(.)', function(succ, prev)
return succ .. (succ..prev == "a" and "𑘿𑘦" or
(succ == "" and find(prev, '[' .. vowel .. ']') and "̃" or nasal_assim[succ] or "n")) .. prev end)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.', conv)
text = gsub(text, 'a([iu])̃', 'a͠%1')
text = gsub(text, 'aa', 'a')
text = gsub(text, 'ñjñ', 'ndny')
text = gsub(text, 'jñ', 'dny')
return mw.ustring.toNFC(text)
end
return export