Module:string/char: Difference between revisions

From Linguifex
Jump to navigation Jump to search
(Created page with "local char = string.char local concat = table.concat local tonumber = tonumber local function err(cp) error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2) end local function utf8_char(cp) cp = tonumber(cp) if cp < 0 then err("-0x" .. ("%X"):format(-cp + 1)) elseif cp < 0x80 then return char(cp) elseif cp < 0x800 then return char( 0xC0 + cp / 0x40, 0x80 + cp % 0x40 ) elseif cp < 0x10000 then if cp >= 0xD8...")
 
No edit summary
 
Line 1: Line 1:
local math_module = "Module:math"
local char = string.char
local char = string.char
local concat = table.concat
local error = error
local format = string.format
local pcall = pcall
local select = select
local tonumber = tonumber
local tonumber = tonumber
local type = type
local function to_hex(...)
to_hex = require(math_module).to_hex
return to_hex(...)
end


local function err(cp)
local function codepoint_err(cp, i)
error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2)
-- Throw error: to_hex can only return integers, so only show the bad value
-- if it can be converted into something that looks like a codepoint.
local success, result = pcall(to_hex, cp, true)
error(format(
"bad argument #%d to 'string/char' (codepoint between 0x0 and 0x10FFFF expected%s)",
i, success and "; got " .. result or ""),
i + 3)
end
end


local function utf8_char(cp)
local function utf8_char(n, i, v, ...)
cp = tonumber(cp)
local cp = tonumber(v)
if cp < 0 then
if cp == nil then
err("-0x" .. ("%X"):format(-cp + 1))
error(format("bad argument #%d to 'char' (number expected; got %s)", i, type(v)), i + 2)
elseif cp < 0 then
codepoint_err(cp, i)
elseif cp < 0x80 then
elseif cp < 0x80 then
return char(cp)
if i == n then
return cp
end
return cp, utf8_char(n, i + 1, ...)
elseif cp < 0x800 then
elseif cp < 0x800 then
return char(
if i == n then
0xC0 + cp / 0x40,
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40
0x80 + cp % 0x40
)
end
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
elseif cp < 0x10000 then
elseif cp < 0x10000 then
if cp >= 0xD800 and cp < 0xE000 then
-- Don't return "?" for surrogates, like mw.ustring.char does, as they
return "?" -- mw.ustring.char returns "?" for surrogates.
-- have legitimate uses (e.g. in JSON).
if i == n then
return 0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
end
return char(
return 0xE0 + cp / 0x1000,
0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
0x80 + cp % 0x40,
)
utf8_char(n, i + 1, ...)
elseif cp < 0x110000 then
elseif cp < 0x110000 then
return char(
if i == n then
0xF0 + cp / 0x40000,
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
0x80 + cp % 0x40,
)
utf8_char(n, i + 1, ...)
end
end
err("0x" .. ("%X"):format(cp))
codepoint_err(cp, i)
end
end


return function(cp, ...)
return function(...)
if ... == nil then
local n = select("#", ...)
return utf8_char(cp)
if n ~= 0 then
end
return char(utf8_char(n, 1, ...))
local ret = {cp, ...}
for i = 1, #ret do
ret[i] = utf8_char(ret[i])
end
end
return concat(ret)
end
end

Latest revision as of 21:33, 16 November 2025

Documentation for this module may be created at Module:string/char/doc

local math_module = "Module:math"

local char = string.char
local error = error
local format = string.format
local pcall = pcall
local select = select
local tonumber = tonumber
local type = type

local function to_hex(...)
	to_hex = require(math_module).to_hex
	return to_hex(...)
end

local function codepoint_err(cp, i)
	-- Throw error: to_hex can only return integers, so only show the bad value
	-- if it can be converted into something that looks like a codepoint.
	local success, result = pcall(to_hex, cp, true)
	error(format(
		"bad argument #%d to 'string/char' (codepoint between 0x0 and 0x10FFFF expected%s)",
		i, success and "; got " .. result or ""),
	i + 3)
end

local function utf8_char(n, i, v, ...)
	local cp = tonumber(v)
	if cp == nil then
		error(format("bad argument #%d to 'char' (number expected; got %s)", i, type(v)), i + 2)
	elseif cp < 0 then
		codepoint_err(cp, i)
	elseif cp < 0x80 then
		if i == n then
			return cp
		end
		return cp, utf8_char(n, i + 1, ...)
	elseif cp < 0x800 then
		if i == n then
			return 0xC0 + cp / 0x40,
				0x80 + cp % 0x40
		end
		return 0xC0 + cp / 0x40,
			0x80 + cp % 0x40,
			utf8_char(n, i + 1, ...)
	elseif cp < 0x10000 then
		-- Don't return "?" for surrogates, like mw.ustring.char does, as they
		-- have legitimate uses (e.g. in JSON).
		if i == n then
			return 0xE0 + cp / 0x1000,
				0x80 + cp / 0x40 % 0x40,
				0x80 + cp % 0x40
		end
		return 0xE0 + cp / 0x1000,
			0x80 + cp / 0x40 % 0x40,
			0x80 + cp % 0x40,
			utf8_char(n, i + 1, ...)
	elseif cp < 0x110000 then
		if i == n then
			return 0xF0 + cp / 0x40000,
				0x80 + cp / 0x1000 % 0x40,
				0x80 + cp / 0x40 % 0x40,
				0x80 + cp % 0x40
		end
		return 0xF0 + cp / 0x40000,
			0x80 + cp / 0x1000 % 0x40,
			0x80 + cp / 0x40 % 0x40,
			0x80 + cp % 0x40,
			utf8_char(n, i + 1, ...)
	end
	codepoint_err(cp, i)
end

return function(...)
	local n = select("#", ...)
	if n ~= 0 then
		return char(utf8_char(n, 1, ...))
	end
end