-- todo split consist Char and Args-- todo cwith double dotcircle 230/239, 233, 234-- thought: option "speccial notes", listing: "whitesace, control, combining, NaC, .."require('strict')local p = local getArgs = require('Module:Arguments').getArgslocal uChar_data = mw.loadData('Module:Sandbox/DePiep/uchar/data')local uData = require('Module:Unicode data')local uData_helper = require('Module:Sandbox/DePiep/uchar-helper')local uBaseConvert = require('Module:BaseConvert')local yesno = require('Module:Yesno')local str = require('Module:String')local plaintext = require('Module:Plain text')--- local tabletools = require('Module:TableTools')local ERRstatus = local tUchar =
local DOTTED_CIRCLE = '◌' -- U+25CClocal NBSP = ' ' -- U+00A0 local LEFT_TO_RIGHT_MARK = '' -- U+200E LEFT-TO-RIGHT MARK () local DEFAULT_IMAGE_SIZE = '21px'local WS_BLUE = 'lightblue'
local function testH(s)local h = mw.html.create('span')h :attr('id', 'testH') :tag('big') :css('background', WS_BLUE) :wikitext(s) --:newline
return tostring(h)end
local function addStyles(tChar) local h = mw.html.create('span')
h :attr('id', 'testH') :css('font-size', '150%') :wikitext(tChar.uChar) if tChar.uIsWhitespace
return tostring(h)end
function p.testH(frame)local origArgs = getArgs(frame) return testH(origArgs[1])end
function p.testFromDoc(frame)local div = mw.html.create('div')div :attr('id', 'testdiv') :css('width', '100%') :wikitext('Some text') :tag('hr') return tostring(div)-- Output:
Some text
end
-- FORMATTERS
local function inTag(s, arg, val, divspan)local objlocal rprt = if divspan
'span' then else return nil -- ERR end
return s, rprtend
local function decodeString(s) if s
-- Format string in tag / from m:str find word
-- replaces whitespace by single nbsp (keep untrimmed ws visible)
local function inCode(s)
if s == nil then return '' end
s = string.gsub(s, '%s+', ' ')
return '<code>' .. s .. '
'end
-- Use mono font-family (from: Template:Mono)local function inMono(s) if s
local function inSmallcaps(s) if (s
) then return end -- '
local function xlLinkFileFormat(uHexBare0x, uHexFormat, sGenCat)-- depending on parameter used, xlink one of two if uHexBare0x ~= nil then -- Character data page -- https://www.fileformat.info/info/unicode/char/00ad/index.htm (or "/ad/"); no 0x no uc return '.. string.lower(uHexBare0x) .. '/index.htm ff.info ' .. uHexFormat .. '' else -- GenCat list, for example gencat "Nd": -- https://www.fileformat.info/info/unicode/category/Nd/list.htm return '.. sGenCat .. '/list.htm ff.info ' .. sGenCat .. '' endend
-- UHEX HANDLERS & FORMATTERS ----- ----- ----- ----- ----- ----- ----- ----- ----- local function formatUhex(uHex0x, uLink)-- formatting into normalform "U+00A9"local uHexFmt -- working uHexFmt = string.gsub(uHex0x, '^0x', ) uHexFmt = string.gsub(uHexFmt, '^0*', ) uHexFmt = 'U+' .. string.sub('0000' .. uHexFmt, - math.max(#uHexFmt, 4)) if uLink ~= nil then return uHexFmt .. '_[todo: fmt Uhex_link_U+]' end return uHexFmtend
local function formatGenCat(sGenCat, fmt)local tCat tCat = uChar_data.tGenCat[sGenCat] if tCat
-- Formats table (array) using concat-- replace space by nbsp (keep untrimmed sp)-- in monospace font-familylocal function formatTablelist(t) -- unused?local s = if t
local function formatCombiningChar(is_combining, cWith)local addPrefixlocal uCombWith -- working, cWith logiclocal rprt-- todo need 4-way logic for cwith cWith = decodeString(cWith) rprt = 'is_combi: ' .. tostring(is_combining) .. '; cwith: ' .. tostring(cWith)
-- strip wikicode; but save NBSP -- todo improve, test if cWith ~= nil then cWith = string.gsub(cWith, NBSP, 'NBSP') cWith = plaintext._main(cWith, false) cWith = string.gsub(cWith, 'NBSP', NBSP) end uCombWith = yesno(cWith) -- y/n/nil (3-way logic; 'foo'
nil) or (uCombWith
true then addPrefix = DOTTED_CIRCLE rprt = rprt .. '_dflt' end elseif uCombWith
-- READ & PROCESS
if (uHexAnyform
) then ERRstatus ='ERR convertHexInToHex0x: no uHex input' return nil end uHexBare0x = decodeString(uHexAnyform) uHexBare0x = string.gsub(uHexBare0x, '%s', ) uHexBare0x = string.gsub(uHexBare0x, '^U%+', ) uHexBare0x = string.gsub(uHexBare0x, '^0x', ) uHexBare0x = string.upper(uHexBare0x) uHex0x = '0x' .. uHexBare0x -- number check uHexNum = tonumber(uHex0x) -- kills NaN, todo: test this if uHexNum
return uHex0x, uHexNum, uHexBare0x, uHexFormatend
local function convertHexToDec(uHex0x)local xVal if uHex0x
local function convertDecToHex(uDec)-- todo: dec input is NaN, err, edge if uDec
-- GET DATA
local function getBlock(uHexNum) uData.lookup_block(uHexNum) return 'blck'end
local function getPlane(uHexNum)local i = math.floor(uHexNum / 0x10000) return i .. ': ' .. uChar_data.tPlanes[i]end
local function getCombiningClass(uHex0x)-- CCC-- todo: 239 (230), 233, 234 = between spacing chars.local ccc
ccc = uData_helper.lookup_combiningclass(uHex0x) or -- new -helper function
return cccend
local function getNamedEntities(uDec, fmt)-- returns from datalist, by decimal val:-- formatted into concat.table list-- demo: [168]='¨, ¨, ¨, ¨'local tNamedEntitiesData = mw.loadData('Module:Numcr2namecr')local sNameList local tNames= ---- uDec=169-- fmt = report -- id = decimal input sNameList = tNamedEntitiesData[tonumber(uDec)] if sNameList
local patstring = '%f[^&][^%;]+%f[%;]' local hitCount = 0 local hitWord = while hitCount <= 20 do hitCount = hitCount + 1 hitWord = str._match(sNameList, patstring, 1, hitCount, false, ) hitWord = mw.text.trim(hitWord) if hitWord ~= then table.insert(tNames, inMono('&' .. hitWord .. ';')) elseif hitWord
local function getAliases(uHex)-- returns t5 = 5 alias tables named by reason-- demo 0x002118 = weierlocal tAllAliases = mw.loadData('Module:Unicode data/aliases')local tCPalias =
tCPalias = tAllAliases[uHex] if tCPalias
-- for 2-deep 5-subtable (Aliases)local tAlias5 = local abbreviation = local alternate = local correction = local control = local figment =
tAlias5["abbreviation"] = abbreviationtAlias5["alternate"] = alternatetAlias5["control"] = controltAlias5["correction"] = correctiontAlias5["figment"] = figment
for i, v in ipairs(tCPalias) do -- i = counter, v[i] = table (1/5), v[2] = tablename (alias, 1/5) if type(v)
local function getScriptName(sScriptISO)local sNamelocal UDscripts = mw.loadData('Module:Unicode data/scripts') if sScriptISO
sName = UDscripts.aliases[sScriptISO] or nil if sName
local function formatAlias5(t5Alias, fmt)local sReport if t5Alias
-- 1. PARSE INCOMING ARGS-- 2. READ PROPERTIESlocal function getArgsAndProps(origArgs)local tNewArgs =
local inHex, inDec, inChar = 1, 2, 3 -- 'inHex', 'inDec', 'inChar'local tOrigIn = local uHexIn = -1 -- the base inputlocal uHex0x, uHexNum -- local working val--xx-- PART 1 READ & NORMALISE ORIG ARGUMENTS -- HEX DEC CHARlocal rprt = 'R-t0:' .. #tOrigIn tOrigIn[inHex] = (origArgs[1] or origArgs['hex']) or nil -- todo: split for check? tOrigIn[inDec] = origArgs['dec'] or nil tOrigIn[inChar] = decodeString(origArgs['char']) or nil
rprt = rprt .. ' R-t2:' .. #tOrigInfor n, v in pairs(tOrigIn) do if v ~= nil then rprt = rprt .. ' ' .. tostring(v) .. ';;' endend
if tOrigIn[inDec] ~= nil then uHexIn = convertDecToHex(tOrigIn[inDec]) rprt = rprt .. ' dec;' end if tOrigIn[inChar] ~= nil then uHexIn = convertDecToHex(mw.ustring.codepoint(tOrigIn[inChar])) rprt = rprt .. ' char;' end if tOrigIn[inHex] ~= nil then uHexIn = tOrigIn[inHex] rprt = rprt .. ' hex;' end
-- REPORT todo: what if >1 input?: err msg, prio, conflictcheck -- 2023-02-04: removed "\|" "invalid escape sequence" ??? tNewArgs['rprtOrigIDs'] = ' |ID in: #t4=' .. #tOrigIn .. ':>' .. rprt .. tostring(uHexIn) .. '<| '
-- returns: uHex0x, uHexNum, uHexBare0x, uHexFormat tNewArgs['uHex0x'], tNewArgs['uHexNum'], tNewArgs['uHexBare0x'], tNewArgs['uHexFormat'] = convertHexInToHex0x(uHexIn) if tNewArgs['uHex0x']
-- DEC tNewArgs['uDec'] = convertHexToDec(uHex0x) -- OTHER ORIG ARGS tNewArgs['uNameLink'] = origArgs['link'] or origArgs['nlink'] -- old nlink = depr paramname tNewArgs['format'] = origArgs['format'] or tNewArgs['cwith'] = decodeString(origArgs['cwith'])
tNewArgs['uSize'] = origArgs['size'] tNewArgs['uImage'] = origArgs['image']
tNewArgs['html'] = origArgs['html'] -- depr? tNewArgs['ulink'] = origArgs['ulink'] -- old ulink = depr?
-- test notice tNewArgs['test'] = origArgs['test'] or
-- PART 2 READ & USE PROPERTIES
-- ASSIGNED, GenCat, Control, Char tNewArgs['uIsAssigned'] = uData.is_assigned(uHexNum)
if tNewArgs['uIsAssigned']
if tNewArgs['uGenCat']
--NAME, ALIASES tNewArgs['uName'] = uData.lookup_name(uHexNum) tNewArgs['Aliases'] = getAliases(uHexNum) -- table5
--PROPS Script, Latin, WS tNewArgs['uIsLatin'] = uData.is_Latin(tostring(tNewArgs['uChar'])) tNewArgs['uScript'] = uData.lookup_script(uHexNum) tNewArgs['uScriptName'] = getScriptName(tNewArgs['uScript']) tNewArgs['uIsWhitespace'] = uData.is_whitespace(uHexNum)
--PROPS rtl tNewArgs['uIsRtl'] = uData.is_rtl(tostring(tNewArgs['uChar']))
--PROPS2 COMBINING PREFIX Combining/cwith/dottedcircle, CCC tNewArgs['uIsCombining'] = uData.is_combining(uHexNum) or false if yesno(tNewArgs['uIsCombining'], false)
-- CHAR SUFFFIX; rtl if tNewArgs['uIsRtl']
--PROPS3: NamedEntities tNewArgs['NamedEntities'] = getNamedEntities(convertHexToDec(uHex0x))
return tNewArgsend
function p._main (args) return '_todo _main'end
function p.main (frame)local origArgs = getArgs(frame,)local tArgs = local s =
tUchar = getArgsAndProps(origArgs) if tUchar['uHex0x']
-- REPORT RPRT s = formatUhex(tUchar['uHex0x'])
--string together & css format tUchar.uChar = tUchar['uCharPrefix'] .. tUchar.uChar .. tUchar['uCharSuffix'] -- cwith, rtl, --- tUchar['styledChar'] = addStyles(tUchar) local cssChar cssChar = addStyles(tUchar) if tUchar['uImage'] ~= nil then s = s .. ' ' .. (tUchar['uSize'] or DEFAULT_IMAGE_SIZE) .. ' ' else --s = s .. ' ' .. tUchar['uCharPrefix'] .. tUchar.uChar .. tUchar['uCharSuffix'] .. ' ' s = s .. ' ' .. cssChar .. ' ' end
s = s .. inSmallcaps(tUchar['uName']) s = s .. '
[testing: ' .. tUchar['test'] .. ']' .. (tUchar['rprtOrigIDs'] or '?') .. '→ ' .. tUchar['uHex0x'] .. ' [' .. tUchar['uDec'] .. 'dec]'.. '; (' .. xlLinkFileFormat(tUchar['uHexBare0x'], tUchar['uHexFormat']) .. ') ' .. 'GC: ' .. formatGenCat(tUchar['uGenCat']) .. ' (' .. xlLinkFileFormat(nil, nil, tUchar['uGenCat']) .. ')' .. '
ASSIG: ' .. tostring(tUchar['uIsAssigned']) .. '; ' .. 'WS: '.. tostring(tUchar['uIsWhitespace']) .. '
BLK: ' .. tUchar['uBlock'] .. '; PLANE: ' .. tUchar['uPlane'] .. '; ' .. '
SC: ' .. tUchar['uScript'] .. '=' .. tUchar['uScriptName'] .. '; RTLsuffix:' .. tostring(tUchar['uIsRtl']) .. '; ' s = s .. '
COMBI PREFIX: >' .. tUchar['uCharPrefix'] .. '<; ' .. tUchar['uCwithReport'] .. '; CCC class:' .. (tUchar['uCombiningClass'] or '-')
if tUchar['NamedEntities'] ~= nil then s = s .. '
NAMED ENTITIES: ' .. tUchar['NamedEntities'] end
if tUchar['Aliases'] ~= nil then s = s .. formatAlias5(tUchar['Aliases'], 'report') end return send
function p.test(frame) local sChar sChar = frame.args['char'] return mw.ustring.codepoint(sChar, 1, 2)end
function p.testScriptName(frame) local sISOid sISOid = frame.args[1] return getScriptName(sISOid) end
return p