Module:template parser/data

From Linguifex
Jump to navigation Jump to search

Documentation for this module may be created at Module:template parser/data/doc

local string = string

local gsub = string.gsub
local load_data = mw.loadData
local pairs = pairs
local upper = string.upper

local data = {}

do
	local magic_words = load_data("Module:data/magic words")
	
	local parser_functions_s = {}
	local parser_functions_i = {}
	local parser_variables_s = {}
	local parser_variables_i = {}
	local transclusion_modifiers = {}
	
	for k, v in pairs(magic_words) do
		local name, s = v.name, v.case_sensitive
		if v.parser_function then
			(s and parser_functions_s or parser_functions_i)[k .. ":"] = name .. ":"
		end
		if v.parser_variable then
			(s and parser_variables_s or parser_variables_i)[k] = name
		end
		if v.transclusion_modifier then
			transclusion_modifiers[k .. ":"] = v.priority
		end
	end
	
	data.parser_functions_case_sensitive = parser_functions_s
	data.parser_functions_case_insensitive = parser_functions_i
	data.parser_variables_case_sensitive = parser_variables_s
	data.parser_variables_case_insensitive = parser_variables_i
	data.transclusion_modifiers = transclusion_modifiers
end

do
	local tags = load_data("Module:data/parser extension tags")
	
	-- Generates the string pattern for the end tag.
	local function end_tag_pattern(tag)
		return "</" .. gsub(tag, ".[\128-\191]*", function(ch)
			return "[" .. upper(ch) .. ch .. "]"
		end) .. "%s*>"
	end
	
	local data_tags = {}
	
	for tag in pairs(tags) do
		data_tags[tag] = end_tag_pattern(tag)
	end
	data_tags["includeonly"] = end_tag_pattern("includeonly")
	data_tags["noinclude"] = end_tag_pattern("noinclude")
	
	data.tags = data_tags
end

-- %w is equivalent to \p{L}\p{Nd}, but the native regex uses \p{L}\p{N}.
data.invalid_tag_attribute_name_char = "[^%-%.:_%w" ..
	"\194\178" .. -- U+00B2
	"\194\179" .. -- U+00B3
	"\194\185" .. -- U+00B9
	"\194\188-\194\190" .. -- U+00BC-U+00BE
	"\224\167\180-\224\167\185" .. -- U+09F4-U+09F9
	"\224\173\178-\224\173\183" .. -- U+0B72-U+0B77
	"\224\175\176-\224\175\178" .. -- U+0BF0-U+0BF2
	"\224\177\184-\224\177\190" .. -- U+0C78-U+0C7E
	"\224\181\152-\224\181\158" .. -- U+0D58-U+0D5E
	"\224\181\176-\224\181\184" .. -- U+0D70-U+0D78
	"\224\188\170-\224\188\179" .. -- U+0F2A-U+0F33
	"\225\141\169-\225\141\188" .. -- U+1369-U+137C
	"\225\155\174-\225\155\176" .. -- U+16EE-U+16F0
	"\225\159\176-\225\159\185" .. -- U+17F0-U+17F9
	"\225\167\154" .. -- U+19DA
	"\226\129\176" .. -- U+2070
	"\226\129\180-\226\129\185" .. -- U+2074-U+2079
	"\226\130\128-\226\130\137" .. -- U+2080-U+2089
	"\226\133\144-\226\134\130" .. -- U+2150-U+2182
	"\226\134\133-\226\134\137" .. -- U+2185-U+2189
	"\226\145\160-\226\146\155" .. -- U+2460-U+249B
	"\226\147\170-\226\147\191" .. -- U+24EA-U+24FF
	"\226\157\182-\226\158\147" .. -- U+2776-U+2793
	"\226\179\189" .. -- U+2CFD
	"\227\128\135" .. -- U+3007
	"\227\128\161-\227\128\169" .. -- U+3021-U+3029
	"\227\128\184-\227\128\186" .. -- U+3038-U+303A
	"\227\134\146-\227\134\149" .. -- U+3192-U+3195
	"\227\136\160-\227\136\169" .. -- U+3220-U+3229
	"\227\137\136-\227\137\143" .. -- U+3248-U+324F
	"\227\137\145-\227\137\159" .. -- U+3251-U+325F
	"\227\138\128-\227\138\137" .. -- U+3280-U+3289
	"\227\138\177-\227\138\191" .. -- U+32B1-U+32BF
	"\234\155\166-\234\155\175" .. -- U+A6E6-U+A6EF
	"\234\160\176-\234\160\181" .. -- U+A830-U+A835
	"\240\144\132\135-\240\144\132\179" .. -- U+10107-U+10133
	"\240\144\133\128-\240\144\133\184" .. -- U+10140-U+10178
	"\240\144\134\138" .. -- U+1018A
	"\240\144\134\139" .. -- U+1018B
	"\240\144\139\161-\240\144\139\187" .. -- U+102E1-U+102FB
	"\240\144\140\160-\240\144\140\163" .. -- U+10320-U+10323
	"\240\144\141\129" .. -- U+10341
	"\240\144\141\138" .. -- U+1034A
	"\240\144\143\145-\240\144\143\149" .. -- U+103D1-U+103D5
	"\240\144\161\152-\240\144\161\159" .. -- U+10858-U+1085F
	"\240\144\161\185-\240\144\161\191" .. -- U+10879-U+1087F
	"\240\144\162\167-\240\144\162\175" .. -- U+108A7-U+108AF
	"\240\144\163\187-\240\144\163\191" .. -- U+108FB-U+108FF
	"\240\144\164\150-\240\144\164\155" .. -- U+10916-U+1091B
	"\240\144\166\188" .. -- U+109BC
	"\240\144\166\189" .. -- U+109BD
	"\240\144\167\128-\240\144\167\143" .. -- U+109C0-U+109CF
	"\240\144\167\146-\240\144\167\191" .. -- U+109D2-U+109FF
	"\240\144\169\128-\240\144\169\136" .. -- U+10A40-U+10A48
	"\240\144\169\189" .. -- U+10A7D
	"\240\144\169\190" .. -- U+10A7E
	"\240\144\170\157-\240\144\170\159" .. -- U+10A9D-U+10A9F
	"\240\144\171\171-\240\144\171\175" .. -- U+10AEB-U+10AEF
	"\240\144\173\152-\240\144\173\159" .. -- U+10B58-U+10B5F
	"\240\144\173\184-\240\144\173\191" .. -- U+10B78-U+10B7F
	"\240\144\174\169-\240\144\174\175" .. -- U+10BA9-U+10BAF
	"\240\144\179\186-\240\144\179\191" .. -- U+10CFA-U+10CFF
	"\240\144\185\160-\240\144\185\190" .. -- U+10E60-U+10E7E
	"\240\144\188\157-\240\144\188\166" .. -- U+10F1D-U+10F26
	"\240\144\189\145-\240\144\189\148" .. -- U+10F51-U+10F54
	"\240\144\191\133-\240\144\191\139" .. -- U+10FC5-U+10FCB
	"\240\145\129\146-\240\145\129\165" .. -- U+11052-U+11065
	"\240\145\135\161-\240\145\135\180" .. -- U+111E1-U+111F4
	"\240\145\156\186" .. -- U+1173A
	"\240\145\156\187" .. -- U+1173B
	"\240\145\163\170-\240\145\163\178" .. -- U+118EA-U+118F2
	"\240\145\177\154-\240\145\177\172" .. -- U+11C5A-U+11C6C
	"\240\145\191\128-\240\145\191\148" .. -- U+11FC0-U+11FD4
	"\240\146\144\128-\240\146\145\174" .. -- U+12400-U+1246E
	"\240\150\173\155-\240\150\173\161" .. -- U+16B5B-U+16B61
	"\240\150\186\128-\240\150\186\150" .. -- U+16E80-U+16E96
	"\240\157\139\128-\240\157\139\147" .. -- U+1D2C0-U+1D2D3
	"\240\157\139\160-\240\157\139\179" .. -- U+1D2E0-U+1D2F3
	"\240\157\141\160-\240\157\141\184" .. -- U+1D360-U+1D378
	"\240\158\163\135-\240\158\163\143" .. -- U+1E8C7-U+1E8CF
	"\240\158\177\177-\240\158\178\171" .. -- U+1EC71-U+1ECAB
	"\240\158\178\173-\240\158\178\175" .. -- U+1ECAD-U+1ECAF
	"\240\158\178\177-\240\158\178\180" .. -- U+1ECB1-U+1ECB4
	"\240\158\180\129-\240\158\180\173" .. -- U+1ED01-U+1ED2D
	"\240\158\180\175-\240\158\180\189" .. -- U+1ED2F-U+1ED3D
	"\240\159\132\128-\240\159\132\140" .. -- U+1F100-U+1F10C
	"]"

return data