Module:pine-pron

From Linguifex
Revision as of 23:09, 25 November 2025 by Sware (talk | contribs)
Jump to navigation Jump to search


local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local u = mw.ustring.char
local split = mw.text.split
local gsplit = mw.text.gsplit

local lang = require("Module:languages").getByCode("pine")
local c = require("Module:languages/data").chars
local m_IPA = require("Module:IPA")

local vowels_spelling = "aeiouyůảẻỉỏủỷ"
local vowels = "[aɑæɔoʊuʏyɛœøiɪe]"
local lazy_consonants = "[^" .. vowels_spelling .."]"
local consonants_spelling = "[rṛtįpsṡdḍgġhḥkḳlḷƛvbnṇmṃʧʦ]"

local laxen = {
	["y"] = "ʏ", ["i"] = "ɪ", ["e"] = "ɛ",
	["u"] = "ʊ", ["o"] = "ɔ", ["a"] = "ɑ",
}

local long_vowels = {
	["a"] = "æː", ["e"] = "eː", ["i"] = "iː",
	["o"] = "oː", ["u"] = "uː", ["y"] = "yː",
	["ả"] = "æː", ["ẻ"] = "eː", ["ỉ"] = "iː",
	["ỏ"] = "oː", ["ủ"] = "uː", ["ỷ"] = "yː",
}

-- version of gsub() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function gsubb(term, foo, bar)
	local retval, nsubs = gsub(term, foo, bar)
	return retval, nsubs > 0
end

-- apply gsub() repeatedly until no change
local function gsub_repeatedly(term, foo, bar)
	while true do
		local new_term = gsub(term, foo, bar)
		if new_term == term then
			return term
		end
		term = new_term
	end
end


local export = {}

local word_internal_clusters = require("Module:table").listToSet({
	"m·pp", "m·p", "m·b", "n·tt", "n·t", "n·d", "n·dv",
	"n·ts", "n·tsv", "n·tsġ", "n·tsġv", "n·ttṡ", "n·tṡ",
	"n·tṡv", "n·ƛƛ", "n·ƛ", "n·ƛv", "n·kk", "n·kkv", "n·k", "n·kv",
	"p·ṃ", 
})

--[[m ṃ p b v
t d ḍ s ṡ ts tṡ r ṛ l ḷ ƛ lį ḷį n ṇ nį ṇį dn ng kn
k g ġ h hį ḥ kṇ tġ]]

function export.syllabify_from_spelling(term)
	local vowels = "[" .. vowels_spelling .. "]"
	local consonants = consonants_spelling --lazy_consonants
	
	term = term:gsub("(t[sṡ])", {["ts"] = "ʦ", ["tṡ"] = "ʧ"})
	--term = term:gsub("([ṛḍḥḳḷṇṃ])", function(c) return mw.ustring.toNFC(mw.ustring.toNFD(c)) end)
	--term = term:gsub("([ṡ])", function(c) return mw.ustring.toNFC(mw.ustring.toNFD(c)) end)
	
	local words = split(term, "%s")
	for i, word in ipairs(words) do
		word = word:gsub("(" .. consonants .. "*)(" .. vowels .. "+)(" .. consonants .. "*)", "%1%2.%3")
		word = word:gsub("%.$", "") --word = word:gsub("%.(" .. consonants .. "?)$", "%1")
		--word = word:gsub("%.(" .. consonants .. ")(" .. consonants .. "+)", "%1.%2")
		
		words[i] = word
	end
	
	
	
	--[[ substitutions for easier processing
	local digraphs = {
		["tṡ"] = "ʧ", ["ng"] = "ŋ", ["ts"] = "ʦ",
		["gį"] = "ɟ", ["nį"] = "ɲ", ["ṇį"] = "ɳ",
		["kį"] = "c", ["ḥį"] = "ʔ", ["ḍį"] = "θ",
		["hį"] = "ɕ", ["lį"] = "ʎ", ["ḷį"] = "ʟ",
	}
	
	term = term:gsub("(.)%1į", "%1įː") -- e.g. nnį > nį:
	for digraph, repl in pairs(digraphs) do
        term = term:gsub(digraph, repl)
    end
	
	-- Split between spaces if term is multiword
	local words = split(term, "%s")
	for i, word in ipairs(words) do
		word = word:gsub("(" .. consonants .. "ː?)(" .. consonants .. "+)", "%1·%2")
		word = word:gsub("^(" .. consonants .. ")·", "%1"); word = word:gsub("·(" .. consonants .. ")$", "%1")
		
		words[i] = word
	end]]
	
	return table.concat(words, " ")
end

function export.crux(term)
	term = mw.ustring.toNFC(mw.ustring.toNFD(term))
	
	term = export.syllabify_from_spelling(term)
	local debug_syllables = true
	if debug_syllables then
		return term
	end
	
	-- default to short lax vowels
	term = term:gsub("[aeiouy]", laxen); term = term:gsub("ů", "œ")
	-- long vowels
	local nfd_term = mw.ustring.toNFD(term)
	nfd_term = nfd_term:gsub("([aeiouy])" .. c.hook, function(v)
		return long_vowels[v] or long_vowels[v .. c.hook]
	end)
	term = mw.ustring.toNFC(nfd_term)
	term = term:gsub("ɔɑ", "ɑː"); term = term:gsub("ɛœ", "øː")
	-- diphthongized vowels
	term = term:gsub("ʊɔ", "ʊu"); term = term:gsub("œɑ", "øy"); term = term:gsub("œɛ", "ʏy")
	-- labiodiphthongized vowels
	term = term:gsub("vʏy", "ᶣy"); term = term:gsub("vøy", "ᶣø"); term = term:gsub("vʊu", "ʷu") -- different in eastern
	term = term:gsub("^ᶣ", "ɥ"); term = term:gsub("^ʷ", "w")
	-- diphthongs
	term = term:gsub("([ɛɑ])ʏ", "%1y"); term = term:gsub("ɑy", "æy")
	term = term:gsub("([ɛɑɪ]ʊ)", "%1u"); term = term:gsub("ɪœ", "ɪy")
	
	-- simple consonant subtitutions
	term = term:gsub("ṡ", "ʃ")
	term = term:gsub("ḷ", "ɬ"); term = term:gsub("ḥ", "ʔ"); 
	
		-- geminate consonants
	term = term:gsub("(.)%1", "%1ː"); term = term:gsub("ng", "ŋː")
	
	-- final substitutions
	local final_subs = {
		["ʧ"] = "t͡ʃ",
		["ʦ"] = "t͡s",
		["g"] = "ɡ",
		["ƛ"] = "t͡ɬ",
		["ṃ"] = "m" .. c.ringbelow,
		["ṇ"] = "n" .. c.ringbelow,
		["ṛ"] = "r" .. c.ringbelow,
	}
	
	term = term:gsub("[ʧʦgƛṃṇṛ]", final_subs)
	
	return term
end

function export.show(frame)
	local parent_args = frame:getParent().args
	local params = {
		[1] = { default = mw.title.getCurrentTitle().nsText == 'Template' and "Niṡkullit" or mw.title.getCurrentTitle().text },
	}
	local args = require("Module:parameters").process(parent_args, params)
	local term = mw.ustring.lower(args[1])
	
	local pronunciations = {
		{pron = "[" .. export.crux(term) .. "]"}
	}

	return "* " .. m_IPA.format_IPA_full{lang = lang, items = pronunciations}
end

return export