Module:zm-pron

From Linguifex
Revision as of 05:16, 29 April 2022 by Aleisi Galan (talk | contribs) (Created page with "local export = {} local categoryKeywords = { common = "Utility", utilities = "Utility", headword = "Headword-line", translit = "Transliteration", decl = "Inflection", c...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search


local export = {}

local categoryKeywords = {
	common = "Utility",
	utilities = "Utility",
	headword = "Headword-line",
	translit = "Transliteration",
	decl = "Inflection",
	conj = "Inflection",
	pronun = "Pronunciation",
	pronunc = "Pronunciation",
	pronunciation = "Pronunciation",
	IPA = "Pronunciation",
	sortkey = "Sortkey-generating",
}

-- returnTable set to true makes function return table of categories with
-- "[[Category:" and "]]" stripped away. It is used by [[Module:documentation]].
function export.categorize(frame, returnTable)
	local title = mw.title.getCurrentTitle()
	local subpage = title.subpageText
	
	-- To ensure no categories are added on documentation pages.
	if subpage == "documentation" then
		return ""
	end
	
	local output, categories = {}, {}
	local namespace = title.nsText
	local pagename, mode
	
	if frame.args[1] then
		pagename = frame.args[1]
		pagename = pagename:gsub("^Module:", "")
		mode = "testing"
		mw.log("arg", pagename)
	else
		if namespace ~= "Module" then
			error("This template should only be used in the Module namespace.")
		end
		
		pagename = title.text
		
		if subpage ~= pagename then
			pagename = title.rootText
		end
	end
	
	--[[
		If this is a transliteration module, parameter 1 is used as the code,
		rather than the code in the page title.
	]]
	local code, categoryKeyword = pagename:match("([-%a]+)[- ]([^/]+)$")
	
	if not code then
		error("Category name was not recognized.")
	end
	
	local lang, sc
	
	if subpage == "sandbox" then
		table.insert(categories, "Sandbox modules")
	else
		local category = categoryKeywords[categoryKeyword]
		if category == "Transliteration" then
			code = frame:getParent().args[1] or code
		end
		
		if code then
			if category then
				local getByCode = require("Module:languages").getByCode
				lang = getByCode(code) or getByCode(code .. "-pro")
				
				if category == "Transliteration" then
					if not lang then
						sc = require("Module:scripts").getByCode(code)
						
						if sc then
							table.insert(categories, "Transliteration modules by script|" .. sc:getCanonicalName())
						else
							error('The language or script code "' .. code .. '" in the page title is not recognized by [[Module:languages]] or [[Module:scripts]].')
						end
					end
				end
				
				if not ( sc or lang ) then
					error('The language code "' .. code .. '" in the page title is not recognized by Module:languages.')
				end
				
				local function languageCategory(lang, sortkey)
					return lang:getCanonicalName() .. " modules|" .. sortkey
				end
				
				local function generalCategory(category, sortkey)
					return category .. " modules|" .. sortkey
				end
				
				if category == "Transliteration" then
					local langs = require("Module:languages/byTranslitModule")(pagename)
					
					local sortkey = category
					
					if sc then
						sortkey = sortkey .. ", " .. sc:getCanonicalName()
					end
					
					if langs[1] then
						for i, lang in ipairs(langs) do
							table.insert(categories, languageCategory(lang, sortkey))
						end
					elseif lang then
						table.insert(categories, languageCategory(lang, sortkey))
					end
					
					if sc then
						table.insert(categories, generalCategory(category, sc:getCanonicalName()))
					else
						table.insert(categories, generalCategory(category, lang:getCanonicalName()))
					end
				else
					table.insert(categories, languageCategory(lang, category))
					table.insert(categories, generalCategory(category, lang:getCanonicalName()))
				end
			else
				error('The category keyword "' .. categoryKeyword .. '" was not recognized.')
			end
		end
	end
	
	if returnTable then
		return categories
	else
		categories = table.concat(
			require "Module:fun".map(
				function (category)
					return "[[Category:" .. category .. "]]"
				end,
				categories))
	end
	
	if testing then
		table.insert(output, pagename)
		
		if categories == "" then
			categories = '<span class="error">failed to generate categories for ' .. pagename .. '</span>'
		else
			categories = mw.ustring.gsub(categories, "%]%]%[%[", "]]\n[[")
			categories = frame:extensionTag{ name = "syntaxhighlight", content = categories }
		end
	end
	
	return table.concat(output) .. categories
end

local export = {}

local m_params = require("Module:parameters")
local m_IPA = require("Module:IPA")

local m_template_link = require("Module:zm-IPA")


local lang = require("Module:languages").getByCode("zm")
local sc = require("Module:scripts").getByCode("Latn")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link(
		{ term = term, lang = lang, sc = sc }, face
		)
end

local U = mw.ustring.char
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local long = "ː"
local nonsyllabic = U(0x32F)	-- inverted breve below
local syllabic = U(0x0329)
local syllabic_below = U(0x030D)
local raised = U(0x31D)			-- uptack below
local voiceless = U(0x30A)		-- ring above
local caron = U(0x30C)			-- combining caron
local tie = U(0x361)			-- combining double inverted breve
local primary_stress = "ˈ"
local secondary_stress = "ˌ"

local data = {
	["c"] = "t" .. tie .. "s",
	["č"] = "t" .. tie .. "ʃ",
	["dź"] = "d" .. tie .. "ʑ",
	["e"] = "e",
	["è"] = "ɛ",
	["g"] = "ɡ",
	["h"] = "x",
	["i"] = "i",
	["nj"] = "ɲ",
	["q"] = "k",
	["š"] = "ʃ",
	["t"] = "t",
	["ć"] = "t" .. tie .. "ɕ",
	["x"] = "ks",
	["y"] = "ɪ",
	["ž"] = "ʒ",
	["ai"] = "ɛ" .. long,
	["au"] = "ɔ" .. long,
	["\""] = primary_stress,
	["%"] = secondary_stress,
	["?"] = "ʔ",
}

-- Add data["a"] = "a", data["b"] = "b", etc.
for character in gmatch("abdfjklmnoprstuvz ", ".") do
	data[character] = character
end

--[[	This allows multiple-character sounds to be replaced
		with single characters to make them easier to process.	]]

local multiple_to_single = {
	["t" .. tie .. "s"			] = "ʦ",
	["t" .. tie .. "ɕ"			] = "ʨ",
	["d" .. tie .. "ʑ"			] = "ʥ",
	["t" .. tie .. "ʃ"			] = "ʧ",
	["d" .. tie .. "z"			] = "ʣ",
	["d" .. tie .. "ʒ"			] = "ʤ",
}

--[[	"voiceless" and "voiced" are obstruents only;
		sonorants are not involved in voicing assimilation.	]]

-- ʦ, ʧ, "ṙ" replace t͡s, t͡ʃ, r̝̊
local voiceless	= { "p", "t", "k", "f", "s", "ʃ", "x", "ʦ", "ʧ", "ʨ", "ʔ" }
-- "ʣ", ʤ, ř replace d͡z, d͡ʒ, r̝
local voiced	= { "b", "d", "ʥ", "ɡ", "v", "z", "ʒ", "ʣ", "ʤ" }
local sonorants = { "m", "n", "ɲ", "r", "l", "j", }
local consonant = "[" .. table.concat(sonorants) .. "ŋ"
	.. table.concat(voiceless) .. table.concat(voiced) .. "]"
assimil_consonants = {}
assimil_consonants.voiceless = voiceless
assimil_consonants.voiced = voiced

local features = {}
local indices = {}
for index, consonant in pairs(voiceless) do
	if not features[consonant] then
		features[consonant] = {}
	end
	features[consonant]["voicing"] = "voiceless"
	indices[consonant] = index
end

for index, consonant in pairs (voiced) do
	if not features[consonant] then
		features[consonant] = {}
	end
	features[consonant]["voicing"] = "voiced"
	indices[consonant] = index
end
	
local short_vowel = "[aeɛiɪou]"
local long_vowel = "[ɛɔ]" .. long
local syllabic_consonant = "[mnrl]" .. syllabic

-- all but v and r̝
local causing_assimilation =
	gsub(
		"[" .. table.concat(voiceless) .. table.concat(voiced) .. "ʔ]",
		"[vř]",
		""
	)

local assimilable = "[" .. table.concat(voiceless):gsub("ʔ", "") .. table.concat(voiced) .. "]"

local function regressively_assimilate(IPA)
	IPA = gsub(
		IPA,
		"(" .. assimilable .. "+)(" .. causing_assimilation .. ")",
		function (assimilated, assimilator)
			local voicing = features[assimilator] and features[assimilator].voicing
				or error('The consonant "' .. consonant
					.. '" is not recognized by the function "regressively_assimilate".')
			return gsub(
				assimilated,
				".",
				function (consonant)
					return assimil_consonants[voicing][indices[consonant]]
				end)
				.. assimilator
			end)
	
	IPA = gsub(IPA, "smus", "zmus")
	
	return IPA	
end

local function devoice_finally(IPA)
	local obstruent = "[" .. table.concat(voiced) .. table.concat(voiceless) .. "]"
	
	IPA = gsub(
		IPA,
		"(" .. obstruent .. "+)#",
		function (final_obstruents)
			return gsub(
				final_obstruents,
				".",
				function (obstruent)
					return voiceless[indices[obstruent]]
				end)
				.. "#"
		end)
	
	return IPA
end

local function devoice_fricative_r(IPA)
	-- all but r̝̊, which is added by this function
	local voiceless = gsub("[" .. table.concat(voiceless) .. "]", "ṙ", "")
	
	-- ř represents r̝, "ṙ" represents r̝̊
	IPA = gsub(IPA, "(" .. voiceless .. ")" .. "ř", "%1ṙ")
	IPA = gsub(IPA, "ř" .. "(" .. voiceless .. ")", "ṙ%1")
	
	return IPA
end

local function syllabicize_sonorants(IPA)
	 -- all except ɲ and j
	local sonorant = gsub("[" .. table.concat(sonorants) .. "]", "[ɲj]", "")
	local obstruent = "[" .. table.concat(voiced) .. table.concat(voiceless) .. "]"
	
	-- between a consonant and an obstruent
	IPA = gsub(
		IPA,
		"(" .. consonant .. "+" .. sonorant .. ")(" .. consonant .. ")",
		"%1" .. syllabic .. "%2"
		)
	
	-- at the end of a word after an obstruent
	IPA = gsub(IPA, "(" .. obstruent .. sonorant .. ")#", "%1" .. syllabic)
	
	return IPA
end

local function assimilate_nasal(IPA)
	local velar = "[ɡk]"
	
	IPA = gsub(IPA, "n(" .. velar .. ")", "ŋ%1")
	
	return IPA
end

local function add_stress(IPA)
	local syllable_count = m_syllables.getVowels(IPA, lang)
	
	if not ( nostress or find(IPA, ".#.") or find(IPA, primary_stress) ) then
		IPA = primary_stress .. IPA
	end
	
	return IPA
end

local function syllabify(IPA)
	local syllables = {}
	
	local working_string = IPA
	
	local noninitial_cluster = match(working_string, ".(" .. consonant .. consonant .. ").")
	local has_cluster = noninitial_cluster and not find(noninitial_cluster, "(.)%1")
	
	if not ( has_cluster or find(working_string, " ") ) then
		while #working_string > 0 do
			local syllable = match(working_string, "^" .. consonant .. "*" .. diphthong)
				or match(working_string, "^" .. consonant .. "*" .. long_vowel)
				or match(working_string, "^" .. consonant .. "*" .. short_vowel)
				or match(working_string, "^" .. consonant .. "*" .. syllabic_consonant)
			if syllable then
				table.insert(syllables, syllable)
				working_string = gsub(working_string, syllable, "", 1)
			elseif find(working_string, "^" .. consonant .. "+$")
				or find(working_string, primary_stress)
				then
			
				syllables[#syllables] = syllables[#syllables] .. working_string
				working_string = ""
			else
			error('The function "syllabify" could not find a syllable '
				.. 'in the IPA transcription "' .. working_string .. '".')
			end
		end
	end
	
	if #syllables > 0 then
		IPA = table.concat(syllables, ".")
	end
	
	return IPA
end

local function apply_rules(IPA)
	--[[	Adds # at word boundaries and in place of spaces, to
			unify treatment of initial and final conditions.
			# is commonly used in phonological rule notation
			to represent word boundaries.						]]
	IPA = "#" .. IPA .. "#"
	IPA = gsub(IPA, "%s+", "#")
	
	-- Handle consonantal prepositions: v, z.
	IPA = gsub(
		IPA,
		"(#[vz])#(.)",
		function (preposition, initial_sound)
			if find(initial_sound, short_vowel) then
				return preposition .. "ʔ" .. initial_sound
			else
				return preposition .. initial_sound
			end
		end)
	
	for sound, character in pairs(multiple_to_single) do
		IPA = gsub(IPA, sound, character)
	end
	
	IPA = regressively_assimilate(IPA)
	IPA = devoice_finally(IPA)
	IPA = devoice_fricative_r(IPA)
	IPA = syllabicize_sonorants(IPA)
	IPA = assimilate_nasal(IPA)
	IPA = add_stress(IPA, nostress)
	
	for sound, character in pairs(multiple_to_single) do
		IPA = gsub(IPA, character, sound)
	end
	
	--[[	This replaces double (geminate) with single consonants,
			and changes a stop plus affricate to affricate:
			for instance, [tt͡s] to [t͡s].								]]
	IPA = gsub(IPA, "(" .. consonant .. ")%1", "%1")
	
	-- Replace # with space or remove it.
	IPA = gsub(IPA, "([^" .. primary_stress .. secondary_stress .. "])#(.)", "%1 %2")
	IPA = gsub(IPA, "#", "")
	
	
	return IPA
end

function export.toIPA(term, nostress)
	local IPA = {}
	
	local transcription = mw.ustring.lower(term)
	transcription = gsub(transcription, "^%-", "")
	transcription = gsub(transcription, "%-?$", "")
	transcription = gsub(transcription, "nn", "n") -- similar operation is applied to IPA above
	
	for regex, replacement in pairs(replacements) do
		transcription = gsub(transcription, regex, replacement)
	end
	transcription = mw.ustring.toNFC(transcription)	-- Recompose combining caron.
	
	local working_string = transcription
	
	while mw.ustring.len(working_string) > 0 do
		local IPA_letter
		
		local letter = sub(working_string, 1, 1)
		local twoletters = sub(working_string, 1, 2) or ""
		
		if data[twoletters] then
			IPA_letter = data[twoletters]
			working_string = sub(working_string, 3)
		else
			IPA_letter = data[letter]
				or error('The letter "' .. tostring(letter)
					.. '" is not a member of the Czech alphabet.')
			working_string = sub(working_string, 2)
		end
		
		table.insert(IPA, IPA_letter)
	end
	
	IPA = table.concat(IPA)
	IPA = apply_rules(IPA, nostress)
	
	return IPA, transcription
end

function export.show(frame)
	local params = {
		[1] = {},
		["nostress"] = { type = "boolean" },
	}
	
	local args = m_params.process(frame:getParent().args, params)
	local title = mw.title.getCurrentTitle()
	local namespace = title.nsText
	local term = args[1] or namespace == "Template" and "příklad" or title.text
	
	local IPA = export.toIPA(term, nostress)
	
	IPA = "[" .. IPA .. "]"
	IPA = m_IPA.format_IPA_full(lang, { { pron = IPA } } )
	
	return IPA
end

function export.example(frame)
	local output = {
[[
{| class="wikitable"
]]
	}
	local row
	
	local namespace = mw.title.getCurrentTitle().nsText
	
	if namespace == "Template" then
		table.insert(
			output, 
[[
! headword !! code !! result
]]
		)
		row =
[[
|-
| link || template_code || IPA
]]
	else
		table.insert(
			output, 
[[
! headword !! result
]]
		)
		row =
[[
|-
| link || IPA
]]
	end
	
	local params = {
		[1] = { required = true },
	}
	
	local args = m_params.process(frame:getParent().args, params)
	local terms = mw.text.split(args[1] or "příklad", ", ")
	
	for _, term in ipairs(terms) do
		local template_parameter
		local respelling_regex = "[%a\"%?%% ]+"
		local respelling = match(term, "(" .. respelling_regex .. ") %(")
			or match(term, respelling_regex)
		local entry = match(term, "%(([%a ]+)%)") or respelling
		local link = export.link(entry)
		
		local IPA, transcribable = export.toIPA(respelling)
		IPA = m_IPA.format_IPA_full(lang, { { pron = "[" .. IPA .. "]" } } )
		
		if term ~= respelling then
			template_parameter = respelling
		end
		
		if term ~= transcribable then
			link = link .. " (" .. export.tag_text(transcribable) .. ")"
		end
		
		template_code = m_template_link.format_link{ "cs-IPA", template_parameter }
		
		local content = {
			link = link,
			template_code = template_code,
			IPA = IPA
		}
		
		local function add_content(name)
			if content[name] then
				return content[name]
			else
				error('No content for "' .. name .. '".')
			end
		end
		
		local current_row = gsub(row, "[%a_]+", add_content)
		
		table.insert(output, current_row)
	end
	
	table.insert(output, "|}")
	
	return table.concat(output)
end

return export