Module:pi-translit/testcases

From Linguifex
Revision as of 11:11, 22 June 2022 by wikt>RichardW57m (Testing Lao script -dm- for -sm-.)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Documentation for this module may be created at Module:pi-translit/testcases/doc

local tests = require('Module:UnitTests')
local m_xlit = require('Module:pi-translit')
local m_Latn = require('Module:pi-Latn-translit')

-- aThai and aLaoo use explicit vowels, but the transliterator has to deduce that for itself.
-- eThai and eLaoo use explicit vowels, and the transliterator is told that the words do.
local scripts = {"Beng", "Brah", "Deva", "Khmr", "Lana", "Laoo", "Mymr", "Shan", "Sinh",
				"Thai", "aThai", "aLaoo", "eThai", "eLaoo"}
-- words[1] are fed to abugidas and aThai and aLaoo.
-- words[2] are fed to abugidas and eThai and eLaoo.
local toohard = false	-- Whether words to be fed to eThai and eLaoo are instead
						-- fed to aThai and aLaoo.  Hack it to see if ESP now works.
local words = {}
words[1] = {"kukhīgaṅghā", "ṭaṭṭhoḍḍhaṇḍīḍhant", "jacchuṃsajjhejña",
			"ipaṇṭhambiphabho", "vyabboraḷhaḷinā", "puṃliṅga", 
			"āsnoklatvāyhalo", "ugayho", "nigrodha", "pacavho", "agaṃchiṃ",
			"pāpo", "nibbāna", "ūkatabbo", "okāsamhi", "ehi"}
words[2] = {"ītreddhothūnim", "ḷhāsāḷūṃ"}

indi = { -- Per script tests
	Beng = {
		{"vaṃśa", "ৱংশ"},
		{"baṃśa", "বংশ"},
		{"viṣṇu", "ৱিষ্ণু"},
		{"biṣṇu", "বিষ্ণু"},
		{"gauḥ", "গৌঃ"},
		{"jaina", "জৈন"},
		{"ṛṣḷmṝṇī", "ঋষৢমৄণী"},
		{"ṝkṣūpḹta", "ৠক্ষূপৣত"},
		{"ḷpṛ", "ঌপৃ"},
		{"ḹnā", "ৡনা"},
		{"aiśbarya", "ঐশ্বর্য"}, 
		{"aiśvarya", "ঐশ্ৱর্য"}, 
		{"aiśvarya", "ঐশ্ৰর্য"},
		{"auṣadha", "ঔষধ"},
	},
	Brah = {
		{"vaṃśa", "𑀯𑀁𑀰"},
		{"viṣṇu", "𑀯𑀺𑀱𑁆𑀡𑀼"},
		{"gauḥ", "𑀕𑁅𑀂"},
		{"jaina", "𑀚𑁃𑀦"},
		{"ṛṣḷmṝṇī", "𑀋𑀱𑁀𑀫𑀿𑀡𑀻"},
		{"ṝkṣūpḹta", "𑀌𑀓𑁆𑀱𑀽𑀧𑁁𑀢"},
		{"ḷpṛ", "𑀍𑀧𑀾"},
		{"ḹnā", "𑀎𑀦𑀸"},
		{"aiśvarya", "𑀐𑀰𑁆𑀯𑀭𑁆𑀬"},
		{"auṣadha", "𑀒𑀱𑀥"},
	},
	Deva = {
		{"gauḥ", "गौः"},
		{"ṛṣḷmṝṇī", "ऋषॢमॄणी"},
		{"ṝkṣūpḹta", "ॠक्षूपॣत"},
		{"ḷpṛ", "ऌपृ"},
		{"ḹnā", "ॡना"},
		{"rṛ", "रृ"},
--		{"rṛ", "र्अ"}, -- Microsoft Devanagari - disallow!
		{"aiśvarya", "ऐश्वर्य"} ,
		{"auṣadha", "औषध"},
	},
	Khmr = {
		{"vaṃśa", "វំឝ"},
		{"viṣṇu", "វិឞ្ណុ"},
		{"gauḥ", "គៅះ"},
		{"jaina", "ជៃន"},
		{"ṛṣḷmṝṇī", "ឫឞ្ឭម្ឬណី"},
		{"ṝkṣūpḹta", "ឬក្ឞូប្ឮត"},
		{"ḷpṛ", "ឭប្ឫ"},
		{"aiśvarya", "ឰឝ្វយ៌"},
		{"aiśvarya", "ឰឝ្វរ្យ"},
		{"auṣadha", "ឳឞធ"},
	},
	Lana = {
		{"vaṃśa", "ᩅᩴᩆ"},
		{"viṣṇu", "ᩅᩥᩇ᩠ᨱᩩ"},
		{"gauḥ", "ᨣᩮᩢᩣᩡ"},
		{"gauḥ", "ᨣᩮᩫᩢᩣᩡ"},
		{"gauḥ", "ᨣᩮᩫᩣᩡ"},
		{"gauḥ", "ᨣᩮᩢᩤᩡ"},
		{"gauḥ", "ᨣᩮᩫᩢᩤᩡ"},
		{"gauḥ", "ᨣᩮᩫᩣᩡ"},
		{"jaina", "ᨩᩱᨶ"},
		{"ḷpṛ", "ᩄᨷᩂ"},
		{"nigrodha", "ᨶᩥᨣᩕᩮᩤᨵ"},
		{"nigrodha", "ᨶᩥᨣᩕᩮᩣᨵ"},
		{"nigrodha", "ᨶᩥᨣᩕᩰᨵ"},
		{"pacavho", "ᨷᨧᩅ᩠ᩉᩮᩤ"},
		{"pacavho", "ᨷᨧᩅ᩠ᩉᩮᩣ"},
		{"pāpo", "ᨷᩣᨷᩮᩣ"},
		{"pāpo", "ᨷᩤᨷᩮᩤ"},
		{"pāpo", "ᨸᩣᨸᩮᩣ"},
		{"nibbāna", "ᨶᩥᨻᩛᩣᨶ"},
		{"nibbāna", "ᨶᩥᨻᩛᩤᨶ"},
		{"nibbāna", "ᨶᩥᨻ᩠ᨻᩣᨶ"},
		{"nibbāna", "ᨶᩥᨻ᩠ᨻᩤᨶ"},
		{"katabbo", "ᨠᨲᨻᩛᩮᩣ"},
		{"katabbo", "ᨠᨲᨻᩛᩮᩤ"},
		{"okāsa", "ᩒᨠᩣᩈ"},
		{"okāsa", "ᩋᩰᨠᩣᩈ"},
		{"au", "ᩐᩣ"},
		{"karont", "ᨠᩁᩮᩣᨶ᩠ᨲ᩼"},
		{"ṛṣḷ", "ᩂᩇᩄ"},
--		{"ṛṣḷmṝṇī", "ၒၑၘမၗဏီ"},
--		{"ṝkṣūpḹta", "ၓက္ၑူပၙတ"},
		{"ḷpṛ", "ᩄᨷᩂ"},
--		{"ḹnā", "ၕနာ"},
--		{"aiśvarya", "ᩑᩮᩆ᩠ᩅᩁ᩠ᨿ"}, -- TBC 
		{"aiśvarya", "ᩋᩱᩆ᩠ᩅᩁ᩠ᨿ"}, -- Dodgy
		{"auṣadha", "ᩐᩣᩇᨵ"},
	},
	Mymr = {
		{"vaṃśa", "ဝံၐ"},
		{"viṣṇu", "ဝိၑ္ဏု"},
		{"gauḥ", "ဂော်း"},
		{"jaina", "ဇဲန"},
		{"ṛṣḷmṝṇī", "ၒၑၘမၗဏီ"},
		{"ṝkṣūpḹta", "ၓက္ၑူပၙတ"},
		{"ḷpṛ", "ၔပၖ"},
		{"ḹnā", "ၕနာ"},
		{"aiśvarya", "အဲၐွရျ"},
		{"auṣadha", "ဪၑဓ"},
	},
	Sinh = {
		{"ṛṣḷmṝṇī", "ඍෂෟමෲණී"},
		{"ṝkṣūpḹta", "ඎක්‍ෂූපෳත"},
		{"ḷpṛ", "ඏපෘ"},
		{"ḹnā", "ඐනා"},
		{"aiśvarya", "ඓශ‍්වර්‍ය"},
		{"auṣadha", "ඖෂධ"},
	},
	Thai = {
		{"vaṃśa", "วํศ"},
		{"viṣṇu", "วิษฺณุ"},
		{"gauḥ", "เคาะ"},
		{"jaina", "ไชน"},
		{"nigrodha", "นิคฺโรธ"},
		{"nigrodha", "นิโคฺรธ"},
		{"gayho", "คยฺโห"},
		{"gayho", "คโยฺห"},
		{"ṛṣḷmṝṇī", "ฤษฺฦมฺฤๅณี"},
		{"ṝkṣūpḹta", "ฤๅกฺษูปฺฦๅต"},
		{"ḷpṛ", "ฦปฺฤ"},
		{"ḹnā", "ฦๅนา"},
		{"aiśvarya", "ไอศฺวรฺย"},
		{"auṣadha", "เอาษธ"},
		{"āment", "อาเมนฺต์"},
		{"sve", "เสฺว"},
	},
	aThai = {
		{"vaṃśa", "วังศะ"},
		{"gauḥ", "เคาะ"},
		{"jaina", "ไชนะ"},
		{"nigrodha", "นิคโรธะ"},
--		{"nigrodha", "นิโครธะ"},
		{"gayho", "คัยโห"},
--		{"gayho", "คัโยห"},
		{"ṝkṣūpḹta", "ฤๅกษูปฦๅตะ"},
		{"ḹnā", "ฦๅนา"},
		{"aiśvarya", "ไอศวัรยะ"},
		{"auṣadha", "เอาษะธะ"},
		{"sve", "เสฺว"},
	},
	eThai = {
		{"viṣṇu", "วิษณุ"},
		{"ṛṣḷmṝṇī", "ฤษฦมฤๅณี"},
		{"ḷpṛ", "ฦปฤ"},
		{"āment", "อาเมนต์"},
		{"sve", "สเว"},
	},
	Laoo = {
		{"vaṃśa", "ວໍຨ"},
		{"viṣṇu", "ວິຩ຺ຓຸ"},
		{"gauḥ", "ເຄົາະ"},
		{"jaina", "ໄຊນ"},
		{"nigrodha", "ນິຄ຺ໂຣຘ"},
		{"nigrodha", "ນິໂຄ຺ຣຘ"},
		{"gayho", "ຄຍ຺ໂຫ"},
		{"gayho", "ຄໂຍ຺ຫ"},
		{"aiśvarya", "ໄອຨ຺ວຣ຺ຍ"},
		{"auṣadha", "ເອົາຩຘ"},
		{"āment", "ອາເມນ຺ຕ໌"},
		{"sve", "ເສ຺ວ"},
	},
	aLaoo = {
		{"vaṃśa", "ວັງຨະ"},
--		{"gauḥ", "ເຄົາະ"},
		{"jaina", "ໄຊນະ"},
		{"nigrodha", "ນິຄໂຣຘະ"},
--		{"nigrodha", "ນິໂຄຣຘະ"},
		{"gayho", "ຄັຍໂຫ"},
--		{"gayho", "ຄັໂຍຫ"}, 
--		{"gayho", "ຄະໂຍຫ"},
		{"aiśvarya", "ໄອຨວັຣຍະ"},
		{"auṣadha", "ເອົາຩະຘະ"},
		{"dammasmā", "ທັມມັດມາ"},
-- Other Lao systems:
		{"gassati", "ຄັດສະຕິ"},
		{"ñāya", "ຍ຺າຢະ"},
		{"paccaya", "ປັຈຈະຍະ"},
		{"pāhuṇeyya", "ປາຫຸເນ຺ຢຢະ"},
		{"puñña", "ປຸຍ຺ຍ຺ະ"},
		{"budda", "ພຸດທະ"},
		{"buddha", "ພຸທທ຺ະ"},
		{"viññū", "ວິຍ຺ຍ຺ູ"},
		{"saṅgha", "ສັງຄ຺ະ"},
		{"sandiṭṭhika", "ສັນທິຕ຺ຖ຺ິກະ"},
		{"sambudda", "ສັມພຸດທະ"},
		{"sambuddha", "ສັມພຸທທ຺ະ"},
		{"sāmīcipaṭipanna", "ສາມີຈິປະຕ຺ິປັນນະ"},
		{"añjali", "ອັຍ຺ຊະລິ"},
		{"sāmīciṃ", "ສາມີຈິງ"},
		{"ujuṃ", "ອຸຊຸງ"},
		{"svākkhātaṃ", "ສ໌ວາກຂາຕັງ"},
		{"sve", "ສ໌ເວ"},
	},
	eLaoo = {
		{"viṣṇu", "ວິຩຓຸ"},
		{"budde", "ພຸດເທ"},
		{"āment", "ອາເມນຕ໌"},
		{"pāhuneyyo", "ປາຫຸເນຍໂຍ"},
--		{"sve", "ເສວ"}, -- Disbelieved and unattested.
-- Other Lao systems
--		{"sve", "ເສ໌ວ"}, -- Disbelieved and unattested.
	},
 }

function tests:test_big_bang()
	--[[ here be the tests ]]
--preprocess_equals(text, expected, options)
	options = {}
	for _, sc in ipairs(scripts) do
		local scin = sc
		local v1, v2 = 0, 0
		options.impl = "yes"
		if scin == "aThai" or scin == "eThai" then
			sc = "Thai"
			options.impl = "no"
		elseif scin == "aLaoo" or scin == "eLaoo" then
			sc = "Laoo"
			options.impl = "no"
		elseif scin == "Mymr" then
			v2 = 1
		elseif scin == "Shan" then
			v1 = 2
			v2 = 3
			sc = "Mymr"
		else
			options.impl = "yes"
		end
		for iset = 1, 2 do
			local doit = true
			if scin == "aThai" or scin == "eThai" or scin == "aLaoo" or scin == "eLaoo" then
				if iset == 1 or iset == 2 and toohard then
					doit = scin == "aThai" or scin == "aLaoo"
				else
					doit = scin == "eThai" or scin == "eLaoo"
				end
			end
			if doit then
				for iw, word in ipairs(words[iset]) do
					osword = " never done "
					for iv = v1, v2 do
						options.variation = tostring(iv)
						local sword = m_Latn.tr(word, sc, options)
						local bword
						if scin == "eThai" or scin == "eLaoo" then
							bword = m_xlit.trwo(sword, 'pi', sc, {impl="no"}) or "(nil)"
						else
							bword = m_xlit.tr(sword, 'pi', sc) or "(nil)"
						end
						if sword ~= osword then
							tests:equals(scin..':', sword..' = '..bword, sword..' = '..word,
								{show_difference = true})
						end
						osword = sword
						if (scin == "aLaoo" or scin == "eLaoo") and string.match(word, 'y') then
							sword = mw.ustring.gsub(sword, "ຍ", "ຢ")
							if scin == "eLaoo" then
								bword = m_xlit.trwo(sword, 'pi', sc, {impl="no"}) or "(nil)"
							else
								bword = m_xlit.tr(sword, 'pi', sc) or "(nil)"
							end
							tests:equals(scin..':', sword..' = '..bword, sword..' = '..word,
								{show_difference = true})
						end
					end
				end
			end
		end
	end
end

function tests:test_indi()
	options = {}
	for _, sc in ipairs(scripts) do
		local scin = sc
		if scin == "aThai" or scin == "eThai" then
			sc = "Thai"
		elseif scin == "aLaoo" or scin == "eLaoo" then
			sc = "Laoo"
		elseif scin == "Shan" then
			sc = "Mymr"
		end
		for iw, tpair in ipairs(indi[scin] or {}) do
			local word = tpair[1]
			local sword = tpair[2]
			local bword
			if (scin == "eThai" or scin == "eLaoo") and not too_hard then
				bword = m_xlit.trwo(sword, 'pi', sc, {impl="no"}) or '(nil)'
			else
				bword = m_xlit.tr(sword, 'pi', sc) or '(nil)'
			end
			tests:equals(scin..':', sword..' = '..bword, sword..' = '..word,
				{show_difference = true})
		end
	end
end

return tests