Module:Shrd-translit

From Linguifex
Revision as of 16:09, 9 August 2024 by wikt>Svartava
(diff) โ† Older revisionย | Latest revision (diff)ย | Newer revision โ†’ (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Shrd-translit/doc

local export = {}

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local consonants = {
	['๐‘†‘']='k', ['๐‘†’']='kh', ['๐‘†“']='g', ['๐‘†”']='gh', ['๐‘†•']='แน…',
	['๐‘†–']='c', ['๐‘†—']='ch', ['๐‘†˜']='j', ['๐‘†™']='jh', ['๐‘†š']='รฑ', 
	['๐‘†›']='แนญ', ['๐‘†œ']='แนญh', ['๐‘†']='แธ', ['๐‘†ž']='แธh', ['๐‘†Ÿ']='แน‡', 
	['๐‘† ']='t', ['๐‘†ก']='th', ['๐‘†ข']='d', ['๐‘†ฃ']='dh', ['๐‘†ค']='n', 
	['๐‘†ฅ']='p', ['๐‘†ฆ']='ph', ['๐‘†ง']='b', ['๐‘†จ']='bh', ['๐‘†ฉ']='m',
	['๐‘†ช']='y', ['๐‘†ซ']='r', ['๐‘†ฌ']='l', ['๐‘†ฎ']='v', ['๐‘†ญ']='แธท',
	['๐‘†ฏ']='ล›', ['๐‘†ฐ']='แนฃ', ['๐‘†ฑ']='s', ['๐‘†ฒ']='h',
}

local diacritics = {
	['๐‘†ณ']='ฤ', ['๐‘†ด']='i', ['๐‘†ต']='ฤซ', ['๐‘†ถ']='u', ['๐‘†ท']='ลซ', ['๐‘†ธ']='แน›', ['๐‘†น']='แน',
	['๐‘†บ']='แธท', ['๐‘†ป']='แธน', ['๐‘†ผ'] = 'e', ['๐‘†ฝ']='ai', ['๐‘†พ']='o', ['๐‘†ฟ']='au',  ['๐‘‡€']='',
}

local diatrema = {
	['๐‘†…']='รฏ', ['๐‘†‡']='รผ',
}

local tt = {
	-- vowels
	['๐‘†ƒ']='a', ['๐‘†„']='ฤ', ['๐‘†…']='i', ['๐‘††']='ฤซ', ['๐‘†‡']='u', ['๐‘†ˆ']='ลซ', ['๐‘†‰']='แน›', ['๐‘†Š']='แน',
	['๐‘†‹']='แธท', ['๐‘†Œ']='แธน', ['๐‘†']='e', ['๐‘†Ž']='ai', ['๐‘†']='o', ['๐‘†']='au', 
	-- chandrabindu    
	['๐‘†€']='mฬ', --until a better method is found
	-- anusvara    
	['๐‘†']='แนƒ', --until a better method is found
	-- visarga    
	['๐‘†‚']='แธฅ',
	-- avagraha
	['๐‘‡']='โ€™',
	--numerals
	['๐‘‡']='0', ['๐‘‡‘']='1', ['๐‘‡’']='2', ['๐‘‡“']='3', ['๐‘‡”']='4', ['๐‘‡•']='5', ['๐‘‡–']='6', ['๐‘‡—']='7', ['๐‘‡˜']='8', ['๐‘‡™']='9',
	--punctuation        
    ['๐‘‡†']='.', --double danda
	['๐‘‡…']='.', --danda
    --Vedic extensions
    ['๐‘‡‚']='x', ['๐‘‡ƒ']='f',
    --Om
    ['๐‘‡„']='oแนƒ',
    --reconstructed
    ['*'] = '',
}

function export.tr(text, lang, sc)
	text = mw.ustring.gsub(
		text,
		'([๐‘†‘๐‘†’๐‘†“๐‘†”๐‘†•๐‘†–๐‘†—๐‘†˜๐‘†™๐‘†š๐‘†›๐‘†œ๐‘†๐‘†ž๐‘†Ÿ๐‘† ๐‘†ก๐‘†ข๐‘†ฃ๐‘†ค๐‘†ฅ๐‘†ฆ๐‘†ง๐‘†จ๐‘†ฉ๐‘†ช๐‘†ซ๐‘†ฌ๐‘†ฎ๐‘†ญ๐‘†ฏ๐‘†ฐ๐‘†ฑ๐‘†ฒ])'..
		'([๐‘†ณ๐‘†ด๐‘†ต๐‘†ถ๐‘†ท๐‘†ธ๐‘†น๐‘†บ๐‘†ป๐‘†ผ๐‘†ฝ๐‘†พ๐‘†ฟ๐‘‡€]?)'..
		'([๐‘†…๐‘†‡]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				return consonants[c] .. 'a' .. diatrema[e]
			elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then        
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

-- Adjacent vowel letters needing dieresis
	text = gsub(text, '([๐‘†ƒ])([๐‘†…๐‘†‡])', function(a, b) return tt[a]..diatrema[b] end)

	text = gsub(text, '.', tt)
	
	return text
end
 
return export