local p =
local Unicode_data = require "Module:Unicode data/sandbox"
local function errorf(level, ...) if type(level)
function mw.logf(...) return mw.log(string.format(...))end
local output_mt = function output_mt:insert(str) self.n = self.n + 1 self[self.n] = strend
-- also in function output_mt:insert_format(...) self:insert(string.format(...))end
output_mt.join = table.concat
output_mt.__index = output_mt
local function Output return setmetatable(output_mt)end
local Latn_pattern = table.concat ;
local get_codepoint = mw.ustring.codepointlocal function expand_range(start, ending) local lower, higher = get_codepoint(start), get_codepoint(ending) if higher < lower then return nil end local chars = local i = 0 for codepoint = lower, higher do i = i + 1 chars[i] = mw.ustring.char(codepoint) end return table.concat(chars)end
local fun = require "Module:Fun"local m_table = require "Module:TableTools"
local script_to_count_mt = setmetatable(script_to_count_mt, script_to_count_mt)
-- Uses an iterator (such as mw.ustring.gcodepoint) that generates a codepoint-- each time it is called with an optional state and another value.local function show_scripts(iterator, state, value) local script_to_count = script_to_count_mt for codepoint in iterator, state, value do local script = Unicode_data.lookup_script(codepoint) script_to_count[script] = script_to_count[script] + 1 end return table.concat(fun.mapIter(function (count, script) return ("%s (%d)"):format(script, count) end, m_table.sortedPairs(script_to_count, function (script1, script2) return script_to_count[script1] > script_to_count[script2] end)), ", ")end
local function get_chars_in_scripts(iterator, state, value) local script_to_char_set = for codepoint in iterator, state, value do local script = Unicode_data.lookup_script(codepoint) script_to_char_set[script] = script_to_char_set[script] or script_to_char_set[script][codepoint] = true end return script_to_char_setend
local function print_char_set_map(script_to_char_set, format, separator) format = format or "%s: %s" separator = separator or "\n" return table.concat(fun.mapIter(function (char_set, script) local char_list = fun.mapIter(function (_, codepoint) return mw.ustring.char(codepoint) end, m_table.sortedPairs(char_set)) return (format):format(script, mw.text.nowiki(table.concat(char_list))) end, m_table.sortedPairs(script_to_char_set)), separator)end
function p.show(frame) local expanded_pattern = Latn_pattern :gsub("%[(.-)%]", "%1") :gsub(-- Find two UTF-8-encoded characters separated by hyphen-minus. "([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)", function (char1, char2) return expand_range(char1, char2) end) return ('*
%s
%s') :format(expanded_pattern :gsub("^%s*", ""), -- Remove initial "\n " to avoid creating unwanted pre element. show_scripts(mw.ustring.gcodepoint(expanded_pattern)))end
local function get_block_info_from_arg(args, arg) local block_name = args[1] or errorf("Parameter %s is required", tostring(arg)) local block_info = Unicode_data.get_block_info(block_name) or errorf("The block '%s' could be found", block_name) return block_infoend
local function get_boolean_from_arg(args, arg) return args[arg] and require "Module:Yesno" (args[arg])end
function p.scripts_in_block(frame) local block_info = get_block_info_from_arg(frame.args, 1) local show_block_name = get_boolean_from_arg(frame.args, 2) local script_list = show_scripts(fun.range(block_info[1], block_info[2])) if show_block_name then return ("%s: %s"):format(block_info[3], script_list) else return script_list endend
local function link_block_name(block_name) if block_name:find " " then return ("%s"):format(block_name) else return ("%s"):format(block_name, block_name) endend
function p.scripts_in_blocks(frame) local output = Output local start = frame.args[1] and tonumber(frame.args[1], 16) or 0 local ending = frame.args[2] and tonumber(frame.args[2], 16) or 0x4000 local script_data = mw.loadData "Module:Unicode data/scripts" local singles = script_data.singles local ranges = script_data.ranges local function clear (self) for _, key in ipairs(m_table.keysToList(self, false)) do self[key] = nil end end local counts = setmetatable(counts,) local codepoints_per_script = setmetatable(codepoints_per_script,) output:insert class="wikitable"|+ Scripts in each Unicode block! block !! codepoints !! scripts]] for _, block in pairs(mw.loadData "Module:Unicode data/blocks") do local codepoint = block[1] if codepoint > ending then break end if codepoint >= start then while codepoint <= block[2] do local script = singles[codepoint] local count if script then -- Codepoint is in "singles" map. counts:increment(script) codepoints_per_script:add(script, codepoint) codepoint = codepoint + 1 count = 1 -- for potential future use else local range, index = Unicode_data.binary_range_search(codepoint, ranges) if range then -- Codepoint is in "ranges" array. count = 0 script = range[3] while codepoint <= range[2] and codepoint <= block[2] do count = count + 1 codepoints_per_script:add(script, codepoint) codepoint = codepoint + 1 end counts:increment(script, count) else -- Codepoint doesn't have data; it's Zzzz. -- Get range immediately above codepoint. while ranges[index][2] < codepoint do index = index + 1 end count = 0 script = "Zzzz" local range = ranges[index] while codepoint < range[1] and codepoint <= block[2] and not singles[codepoint] do count = count + 1 codepoint = codepoint + 1 end counts:increment(script, count) end end end output:insert_format(-| %s| U+%04X - U+%04X| %s, link_block_name(block[3]), block[1], block[2], table.concat(fun.map(function (count, script) return ('%s (%d)') :format(script_data.aliases[script], script, codepoints_per_script[script] and mw.text.nowiki(mw.ustring.char(unpack(codepoints_per_script[script]))) or "", count) end, m_table.sortedPairs(counts, function (script1, script2) return counts[script1] > counts[script2] end)), ", ")) end -- mw.logObject(codepoints_per_script, block[3]) counts:clear codepoints_per_script:clear end output:insert "|}" return output:joinend
function p.chars_in_scripts_in_block(frame) local block_info = get_block_info_from_arg(frame.args, 1) local show_block_name = get_boolean_from_arg(frame.args, 2) local script_char_set_map = print_char_set_map(get_chars_in_scripts(fun.range(block_info[1], block_info[2]))) if show_block_name then return ("%s: %s"):format(block_info[3], script_char_set_map) else return script_char_set_map endend
function p.search_for_language_codes(frame) local page_name = frame.args[1] or "English language" local success, title_object = pcall(mw.title.new, page_name) if not (success and title_object) then mw.logf("Could not make title object for '%s'.", page_name) return end local content = title_object:getContent local language_codes = for lang_template in content:gmatch "