Module:Sandbox/Erutuon/UTF-8 explained

local p =

local bit = require("bit32")local band = bit.bandlocal rshift = bit.rshift

function table.forEach(t, func) for i, item in ipairs(t) do func(item) endend

local function setMt(arr) return setmetatable(arr,)end

-- Converts a string representing a number in binary base to a Lua number.local function binary(stringBinary) return tonumber(stringBinary, 2)end

-- Find the digit at a certain position in a byte.local function digitAt(number, index) if type(number)

"string" then number = binary(number) end return band(rshift(number, 8 - index), 1)end

-- Returns a table containing bits in a byte, from highest to lowest.local function getBits(byte) local t = for bit = 8, 1, -1 do t[bit] = band(byte, 1) byte = rshift(byte, 1) end return tend

-- mw.log(table.concat(getBits(rshift(binary("11100001"), 8 - 3))))

-- Do something to each byte in a string; put the result in a table.local function iterBytes(str, func) local out = for i = 1, #str do table.insert(out, func(string.byte(str, i))) end return outend

local function makeTag(color) return end

-- Find leading digits marking ASCII, leading bytes, or continuation bytes,-- else tag byte as red.local function markDigits(byteTable) local onesCount = 0 setMt(byteTable) for i, digit in ipairs(byteTable) do if digit

1 then onesCount = onesCount + 1 if onesCount > 4 then local tag = makeTag("red") byteTable:insert(#byteTable, tag[2]) byteTable:insert(1, tag[1]) end else local tag -- ASCII (0x00 - 0x7F) if onesCount

0 then tag = makeTag("darkgray") -- continuation bytes elseif onesCount

1 then tag = makeTag("chocolate") -- leading bytes else tag = makeTag("deeppink") end byteTable:insert(i + 1, tag[2]) byteTable:insert(1, tag[1]) return byteTable end end return byteTableend

local function printBytes(str) return table.concat(iterBytes(str, function(byte) return table.concat(markDigits(getBits(byte))) end ), " " )end

local function makeCharByteTables(str) local chars = setMt local bytes = setMt for char in mw.ustring.gmatch(str, ".") do chars:insert(char) bytes:insert(printBytes(char)) end return chars, bytesend

local function print(chars, bytes) setMt(chars) setMt(bytes) local output = setMt ") return output:concat("\n")end

function p.show(frame) local str = frame.args[1] or "abc πρᾶγμᾰ" return print(makeCharByteTables(str))end

return p