Module:string/char: Difference between revisions
Jump to navigation
Jump to search
(Created page with "local char = string.char local concat = table.concat local tonumber = tonumber local function err(cp) error("Codepoint " .. cp .. " is out of range: codepoints must be between 0x0 and 0x10FFFF.", 2) end local function utf8_char(cp) cp = tonumber(cp) if cp < 0 then err("-0x" .. ("%X"):format(-cp + 1)) elseif cp < 0x80 then return char(cp) elseif cp < 0x800 then return char( 0xC0 + cp / 0x40, 0x80 + cp % 0x40 ) elseif cp < 0x10000 then if cp >= 0xD8...") |
No edit summary |
||
| Line 1: | Line 1: | ||
local math_module = "Module:math" | |||
local char = string.char | local char = string.char | ||
local | local error = error | ||
local format = string.format | |||
local pcall = pcall | |||
local select = select | |||
local tonumber = tonumber | local tonumber = tonumber | ||
local type = type | |||
local function to_hex(...) | |||
to_hex = require(math_module).to_hex | |||
return to_hex(...) | |||
end | |||
local function | local function codepoint_err(cp, i) | ||
error( | -- Throw error: to_hex can only return integers, so only show the bad value | ||
-- if it can be converted into something that looks like a codepoint. | |||
local success, result = pcall(to_hex, cp, true) | |||
error(format( | |||
"bad argument #%d to 'string/char' (codepoint between 0x0 and 0x10FFFF expected%s)", | |||
i, success and "; got " .. result or ""), | |||
i + 3) | |||
end | end | ||
local function utf8_char( | local function utf8_char(n, i, v, ...) | ||
cp = tonumber( | local cp = tonumber(v) | ||
if cp | if cp == nil then | ||
error(format("bad argument #%d to 'char' (number expected; got %s)", i, type(v)), i + 2) | |||
elseif cp < 0 then | |||
codepoint_err(cp, i) | |||
elseif cp < 0x80 then | elseif cp < 0x80 then | ||
return | if i == n then | ||
return cp | |||
end | |||
return cp, utf8_char(n, i + 1, ...) | |||
elseif cp < 0x800 then | elseif cp < 0x800 then | ||
return | if i == n then | ||
return 0xC0 + cp / 0x40, | |||
0x80 + cp % 0x40 | 0x80 + cp % 0x40 | ||
end | |||
return 0xC0 + cp / 0x40, | |||
0x80 + cp % 0x40, | |||
utf8_char(n, i + 1, ...) | |||
elseif cp < 0x10000 then | elseif cp < 0x10000 then | ||
-- Don't return "?" for surrogates, like mw.ustring.char does, as they | |||
-- have legitimate uses (e.g. in JSON). | |||
if i == n then | |||
return 0xE0 + cp / 0x1000, | |||
0x80 + cp / 0x40 % 0x40, | |||
0x80 + cp % 0x40 | |||
end | end | ||
return | return 0xE0 + cp / 0x1000, | ||
0x80 + cp / 0x40 % 0x40, | 0x80 + cp / 0x40 % 0x40, | ||
0x80 + cp % 0x40 | 0x80 + cp % 0x40, | ||
utf8_char(n, i + 1, ...) | |||
elseif cp < 0x110000 then | elseif cp < 0x110000 then | ||
return | if i == n then | ||
return 0xF0 + cp / 0x40000, | |||
0x80 + cp / 0x1000 % 0x40, | |||
0x80 + cp / 0x40 % 0x40, | |||
0x80 + cp % 0x40 | |||
end | |||
return 0xF0 + cp / 0x40000, | |||
0x80 + cp / 0x1000 % 0x40, | 0x80 + cp / 0x1000 % 0x40, | ||
0x80 + cp / 0x40 % 0x40, | 0x80 + cp / 0x40 % 0x40, | ||
0x80 + cp % 0x40 | 0x80 + cp % 0x40, | ||
utf8_char(n, i + 1, ...) | |||
end | end | ||
codepoint_err(cp, i) | |||
end | end | ||
return function( | return function(...) | ||
local n = select("#", ...) | |||
return utf8_char( | if n ~= 0 then | ||
return char(utf8_char(n, 1, ...)) | |||
end | end | ||
end | end | ||
Latest revision as of 21:33, 16 November 2025
Documentation for this module may be created at Module:string/char/doc
local math_module = "Module:math"
local char = string.char
local error = error
local format = string.format
local pcall = pcall
local select = select
local tonumber = tonumber
local type = type
local function to_hex(...)
to_hex = require(math_module).to_hex
return to_hex(...)
end
local function codepoint_err(cp, i)
-- Throw error: to_hex can only return integers, so only show the bad value
-- if it can be converted into something that looks like a codepoint.
local success, result = pcall(to_hex, cp, true)
error(format(
"bad argument #%d to 'string/char' (codepoint between 0x0 and 0x10FFFF expected%s)",
i, success and "; got " .. result or ""),
i + 3)
end
local function utf8_char(n, i, v, ...)
local cp = tonumber(v)
if cp == nil then
error(format("bad argument #%d to 'char' (number expected; got %s)", i, type(v)), i + 2)
elseif cp < 0 then
codepoint_err(cp, i)
elseif cp < 0x80 then
if i == n then
return cp
end
return cp, utf8_char(n, i + 1, ...)
elseif cp < 0x800 then
if i == n then
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40
end
return 0xC0 + cp / 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
elseif cp < 0x10000 then
-- Don't return "?" for surrogates, like mw.ustring.char does, as they
-- have legitimate uses (e.g. in JSON).
if i == n then
return 0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
return 0xE0 + cp / 0x1000,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
elseif cp < 0x110000 then
if i == n then
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40
end
return 0xF0 + cp / 0x40000,
0x80 + cp / 0x1000 % 0x40,
0x80 + cp / 0x40 % 0x40,
0x80 + cp % 0x40,
utf8_char(n, i + 1, ...)
end
codepoint_err(cp, i)
end
return function(...)
local n = select("#", ...)
if n ~= 0 then
return char(utf8_char(n, 1, ...))
end
end