Module:sa-utilities/translit/SLP1-to-Deva
Jump to navigation
Jump to search
Documentation for this module may be created at Module:sa-utilities/translit/SLP1-to-Deva/doc
local export = {}
local consonant_list = "kKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh"
local consonant = "[" .. consonant_list .. "]"
local vowel_list = "aAiIuUfFxXeEoO"
local vowel = "[" .. vowel_list .. "]"
local U = require("Module:string/char")
local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local virAma = U(0x94D)
local svar = U(0x951)
local anud = U(0x952)
local diacritics = {
['a'] = '', ['A'] = 'ा',
['i'] = 'ि', ['I'] = 'ी',
['u'] = 'ु', ['U'] = 'ू',
['f'] = 'ृ', ['F'] = 'ॄ',
['x'] = 'ॢ', ['X'] = 'ॣ',
['e'] = 'े', ['E'] = 'ै',
['o'] = 'ो', ['O'] = 'ौ',
}
local tt = {
-- consonants
['k'] = 'क', ['K'] = 'ख', ['g'] = 'ग', ['G'] = 'घ', ['N'] = 'ङ',
['c'] = 'च', ['C'] = 'छ', ['j'] = 'ज', ['J'] = 'झ', ['Y'] = 'ञ',
['w'] = 'ट', ['W'] = 'ठ', ['q'] = 'ड', ['Q'] = 'ढ', ['R'] = 'ण',
['t'] = 'त', ['T'] = 'थ', ['d'] = 'द', ['D'] = 'ध', ['n'] = 'न',
['p'] = 'प', ['P'] = 'फ', ['b'] = 'ब', ['B'] = 'भ', ['m'] = 'म',
['y'] = 'य', ['r'] = 'र', ['l'] = 'ल', ['v'] = 'व', ['L'] = 'ळ',
['S'] = 'श', ['z'] = 'ष', ['s'] = 'स', ['h'] = 'ह',
-- vowels
['a'] = "अ", ['A'] = "आ",
['i'] = "इ", ['I'] = "ई",
['u'] = "उ", ['U'] = "ऊ",
['f'] = "ऋ", ['F'] = "ॠ",
['x'] = "ऌ", ['X'] = "ॡ",
['e'] = "ए", ['E'] = "ऐ",
['o'] = "ओ", ['O'] = "औ",
-- chandrabindu
['~'] = 'ँ',
-- anusvara
['M'] = 'ं',
-- visarga
['H'] = 'ः',
-- avagraha
["'"] = 'ऽ',
--numerals
['0'] = '०', ['1'] = '१', ['2'] = '२', ['3'] = '३', ['4'] = '४', ['5'] = '५', ['6'] = '६', ['7'] = '७', ['8'] = '८', ['9'] = '९',
--Vedic extensions
['Z'] = 'ᳵ',
['V'] = 'ᳶ',
-- ['/'] = '',
-- ['\\'] = '',
}
function export.tr(text, lang, sc)
-- Introduce underscore as extra 'accent' for syllables where dependent svarita becomes anudatta (viz. between two udatta's).
text = gsub(text, "(/[" .. consonant_list .. " 'HM~]*".. vowel ..")([" .. consonant_list .. " 'HM~]*".. vowel .."/)", "%1_%2")
text = gsub(text, "(/[" .. consonant_list .. " 'HM~]*".. vowel ..")([" .. consonant_list .. " 'HM~]*".. vowel .."/)", "%1_%2") -- twice
-- capture twice so that all adjacent pairs are covered
text = gsub(text, "(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2")
text = gsub(text, "(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2")
-- whitespace and end of string
text = gsub(text, "(" .. consonant .. ")%f[%s%z-]", "%1" .. virAma)
-- all vowel diacritics
text = gsub(text, "(" .. consonant .. ")(" .. vowel .. ")", function(c, v) return c .. diacritics[v] end)
-- everything else
text = gsub(text, '.', tt)
-- Vedic accent
do
-- Build sets of Devanagari consonants/vowels (post-transliteration).
local dev_consonant = {}
for c in gmatch(consonant_list, ".") do
dev_consonant[tt[c]] = true
end
local dev_vowel = {}
for v in gmatch(vowel_list, ".") do
dev_vowel[tt[v]] = true
end
local vowel_sign = {}
for _, sign in pairs(diacritics) do
if sign ~= "" then
vowel_sign[sign] = true
end
end
-- Common Devanagari marks that can trail a syllable.
local mark = {
["ँ"] = true, ["ं"] = true, ["ः"] = true,
["ᳵ"] = true, ["ᳶ"] = true,
}
-- Turn text into a codepoint array for easy indexing.
local chars = {}
for ch in gmatch(text, ".") do
chars[#chars + 1] = ch
end
local n = #chars
-- Collect syllable spans and explicit accent markers.
local starts, ends, afters, accents = {}, {}, {}, {}
local i = 1
while i <= n do
local ch = chars[i]
if dev_consonant[ch] or dev_vowel[ch] then
local start = i
local j = i
if dev_consonant[ch] then
j = j + 1
-- (virama + consonant)* for conjuncts
while j <= n - 1 and chars[j] == virAma and dev_consonant[chars[j + 1]] do
j = j + 2
end
-- optional final virama (dead consonant)
if j <= n and chars[j] == virAma then
j = j + 1
end
-- optional vowel sign
if j <= n and vowel_sign[chars[j]] then
j = j + 1
end
else
-- independent vowel
j = j + 1
end
-- trailing marks (anusvara, candrabindu, visarga, etc.)
while j <= n and mark[chars[j]] do
j = j + 1
end
-- optional explicit accent marker at the very end of the syllable
local accent = "none"
local after = j - 1
local accent_name = { ['/'] = 'acute', ['\\'] = 'grave', ['_'] = 'underscore' }
if j <= n and (chars[j] == "/" or chars[j] == "\\" or chars[j] == "_") then
accent = accent_name[chars[j]]
after = j -- include the marker in "after"
j = j + 1
end
local end_idx = (accent == "none") and after or (after - 1) -- exclude marker from end
starts[#starts + 1] = start
ends[#ends + 1] = end_idx
afters[#afters + 1] = after
accents[#accents + 1] = accent
i = after + 1
else
i = i + 1
end
end
-- Propagate accents:
-- * Explicit acute "/" marks udatta (no glyph inserted), makes previous syllables anudatta,
-- and the following syllable (if unmarked) becomes svarita.
-- * Explicit grave "\" becomes svarita, and previous syllables become anudatta.
local last_accent = 0
local prev_acute = false
for si, accent in ipairs(accents) do
if accent == "acute" then
for j = last_accent + 1, si - 1 do
accents[j] = "anudatta"
end
last_accent = si
prev_acute = true
elseif accent == "grave" then
for j = last_accent + 1, si - 1 do
accents[j] = "anudatta"
end
last_accent = si
accents[si] = "svarita"
prev_acute = false
elseif accent == "underscore" then
last_accent = si
accents[si] = "anudatta"
prev_acute = false
elseif prev_acute then
accents[si] = "svarita"
prev_acute = false
last_accent = si
end
end
-- Rebuild text:
-- - drop the explicit "/" "\" "_" markers
-- - insert anud/svar after syllable end as needed
local out = {}
local offset = 1
for si, start in ipairs(starts) do
local end_idx = ends[si]
local after = afters[si]
-- copy any non-syllable chars before this syllable
if offset <= start - 1 then
for k = offset, start - 1 do
out[#out + 1] = chars[k]
end
end
-- copy syllable body (excluding explicit marker)
for k = start, end_idx do
out[#out + 1] = chars[k]
end
-- insert accent glyphs
local a = accents[si]
if a == "anudatta" then
out[#out + 1] = anud
elseif a == "svarita" then
out[#out + 1] = svar
end
-- jump past syllable (+ explicit marker if it existed)
offset = after + 1
end
-- copy any trailing chars after the last syllable
for k = offset, n do
out[#out + 1] = chars[k]
end
text = table.concat(out)
end
-- fix case where accent mark is placed before diacritic
text = gsub(text, "([".. anud .. svar .. "])([ंँः]+)", "%2%1")
-- a virAma followed by accent mark should not have the accent mark at all
text = gsub(text, virAma .. "[".. anud .. svar .. "]", virAma)
return mw.ustring.toNFC(text)
end
return export