|
|
Line 2: |
Line 2: |
|
| |
|
| local ustring = mw.ustring | | local ustring = mw.ustring |
| | local is_combining = require "Module:Unicode data".is_combining |
| local UTF8_char = '[\1-\127\194-\244][\128-\191]*' | | local UTF8_char = '[\1-\127\194-\244][\128-\191]*' |
|
| |
| local function memo_lookup(data_module_subpage, match_func, ...)
| |
| local dots = { ... }
| |
| local cache = {}
| |
| local singles, ranges
| |
|
| |
| return function (codepoint)
| |
| if not singles then
| |
| local data_module = loader[data_module_subpage]
| |
| singles, ranges = data_module.singles, data_module.ranges
| |
| end
| |
|
| |
| if singles[codepoint] then
| |
| return match_func(codepoint, singles[codepoint])
| |
| end
| |
|
| |
| local range = binary_range_search(codepoint, cache)
| |
| if range then
| |
| return match_func(codepoint, manual_unpack(range, 3))
| |
| end
| |
|
| |
| local range, index = binary_range_search(codepoint, ranges)
| |
| if range then
| |
| table.insert(cache, range)
| |
| table.sort(cache, compare_ranges)
| |
| return match_func(codepoint, manual_unpack(range, 3))
| |
| end
| |
|
| |
| if ranges[index] then
| |
| local dots_range
| |
| if codepoint > ranges[index][2] then
| |
| dots_range = {
| |
| ranges[index][2] + 1,
| |
| ranges[index + 1] and ranges[index + 1][1] - 1 or 0x10FFFF,
| |
| unpack(dots)
| |
| }
| |
| else -- codepoint < range[index][1]
| |
| dots_range = {
| |
| ranges[index - 1] and ranges[index - 1][2] + 1 or 0,
| |
| ranges[index][1] - 1,
| |
| unpack(dots)
| |
| }
| |
| end
| |
| table.sort(cache, compare_ranges)
| |
| end
| |
|
| |
| return match_func(codepoint, unpack(dots))
| |
| end
| |
| end
| |
|
| |
| local is_combining= memo_lookup(
| |
| "combining",
| |
| function (codepoint, combining_class)
| |
| return combining_class and combining_class ~= 0 or false
| |
| end,
| |
| 0)
| |
|
| |
|
| |
|
| local sorted_pairs = require('Module:table').sortedPairs | | local sorted_pairs = require('Module:table').sortedPairs |