Module:User:Cscott/llpeg explained

return (functionlocal builders = local function register(name, f) builders[name] = fendregister('advent.compat', function return require end)

register('llpeg.types', function(myrequire)myrequire('strict')local compat = myrequire('advent.compat')local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in laterlocal metareg =

local enum = function(keys) local Enum = Enum.__index = Enum function Enum:__tostring return self.name end function Enum:pairs return keys end function Enum:type return Enum end

for name, value in pairs(keys) do Enum[name] = setmetatable(Enum) end return Enumend

local CapKind = enum

local TTag = enum

local PE = enum

-- virtual machine instructionslocal Opcode = enum

-- helper for visitor pattern definitionsfunction define(dispatch, which, f) for _,v in pairs(which) do assert(v ~= nil) -- catch typos dispatch[v] = f endend

local numsiblings = define(numsiblings,, 0)define(numsiblings,, 1)define(numsiblings,, 2)

-- more help for visitor functions

local function_name_registry = function register_fname(name, f) assert(type(name)

"string") assert(type(f)

"function") function_name_registry[f] = nameend

function report_ferror(f, msg) local fname = function_name_registry[f] if fname ~= nil then msg = fname .. ": " .. msg end error(msg)end

function define_type_visitor(tbl) local dispatch = for keys,func in pairs(tbl) do if type(keys) ~= "table" then keys = end define(dispatch, keys, func) end local visit visit = function(val, ...) local a = dispatch["assert"] if a ~= nil then a(val, ...) end -- assert preconditions local ty = type(val) if ty

'table' and getmetatable(val)

metareg then ty = 'pattern' end local f = dispatch[ty] if f ~= nil then return f(val, ...) end f = dispatch.default if f

nil then report_ferror(visit, "no default for " .. ty) end return f(val, ...) end return visitend

function define_tree_visitor(tbl, opt_name) local dispatch = for keys,func in pairs(tbl) do if type(keys) ~= "table" or getmetatable(keys)

TTag then keys = end define(dispatch, keys, func) end local visit visit = function(tree, ...) if tree

nil then report_ferror(visit, "nil tree") end local a = dispatch["assert"] if a ~= nil then a(tree, ...) end -- assert preconditions local f = dispatch[tree.tag] if f ~= nil then return f(tree, ...) end f = dispatch.default if f

nil then report_ferror(visit, "no default for " .. tree.tag) end return f(tree, ...) end return visitend

function define_opcode_visitor(tbl) local dispatch = for keys,func in pairs(tbl) do if type(keys) ~= "table" or getmetatable(keys)

Opcode then keys = end define(dispatch, keys, func) end local visit visit = function(op, ...) if op

nil then report_ferror(visit, "nil op") end local a = dispatch["assert"] if a ~= nil then a(op, ...) end -- assert preconditions local f = dispatch[op.code] if f ~= nil then return f(op, ...) end f = dispatch.default if f

nil then report_ferror(visit, "no default for " .. op.code) end return f(op, ...) end return visitend

-- helper for module importsfunction from(mod, list) local result = for _,v in ipairs(list) do table.insert(result, mod[v]) end return compat.unpack(result)end

function newcharset return setmetatable(metareg)end

local fullset = newcharsetfor i = 0,CHARMAX do fullset.set[i] = trueendfullset.rest = true -- make sure non-ascii unicode chars are included!assert(fullset.tag

TTag.Set)

return

end)

register('llpeg.print', function(myrequire)myrequire('strict')local compat = myrequire('advent.compat')local from = myrequire('llpeg.types').fromlocal CHARMAX, CapKind, Opcode, TTag, define, define_tree_visitor, numsiblings, _ = from(myrequire('llpeg.types'),)

function printcharset(tree) local result = "[" local i = 0 while i <= CHARMAX do local first = i while tree.set[i] and i <= CHARMAX do i = i + 1 end if first

(i - 1) then -- unary range result = result .. string.format("(%02x)", first) elseif first < (i-1) then -- non-empty range result = result .. string.format("(%02x-%02x)", first, i - 1) end i = i + 1 end if tree.rest then result = result .. "(80-FFFF)" end return result .. "]"end

function printjmp(op, pc) return "-> " .. op.targetend

local printinst_helper = define_opcode_visitorfunction printinst(pc, op, accum) table.insert(accum, string.format("%02d: %s ", pc, op.code.value)) table.insert(accum, printinst_helper(op, pc)) table.insert(accum, "\n") return accumend

function printpatt(code, accum) for pc,op in ipairs(code) do printinst(pc, op, accum) end return accumend

function printcap(cap, indent) print(string.format("%s%s", string.rep(' ', indent), cap))end

function printcap2close(captures, ncaptures, i, indent) local head = captures[i] i = i + 1 printcap(head, indent) -- print head capture while i <= ncaptures and head:inside(captures[i]) do i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures end if i <= ncaptures and head:isopencap then assert(captures[i]:isclosecap) printcap(captures[i], indent) -- print and skip close capture i = i + 1 end return iend

function printcaplist(captures, ncaptures) -- for debugging, first print a raw list of captures if ncaptures

nil then ncaptures = #captures end for i=1,ncaptures do printcap(captures[i], 0) end print(">

="); local i=1 while i <= ncaptures and not captures[i]:isclosecap do i = printcap2close(captures, ncaptures, i, 0) end if i > ncaptures then print("") end print("

");end

local printtree_helper = define_tree_visitorfunction printtree(tree, indent, accum) local sibs = numsiblings[tree.tag]

table.insert(accum, string.rep(' ', indent)) table.insert(accum, tree.tag.value)

table.insert(accum, printtree_helper(tree)) table.insert(accum, "\n")

if tree.tag

TTag.Rule then sibs = 1 -- don't print sib2 elseif tree.tag

TTag.Grammar then local rule = tree.sib1 for i=1,tree.n do printtree(rule, indent + 2, accum) rule = rule.sib2 end sibs = 0 -- siblings already handled end if sibs >= 1 then printtree(tree.sib1, indent + 2, accum) if sibs >= 2 then printtree(tree.sib2, indent + 2, accum) end end return accumend

local PREFIX = "" -- could also be "l." or "lpeg." etclocal printrepl_helperprintrepl_helper = define_tree_visitorfunction printrepl(tree) local buf = printrepl_helper(tree, buf) return table.concat(buf)end

return

end)

register('llpeg.code', function(myrequire)myrequire('strict')local compat = myrequire('advent.compat')local from = myrequire('llpeg.types').fromlocal CHARMAX, CapKind, Opcode, PE, TTag, define, define_tree_visitor, fullset, newcharset, numsiblings, register_fname, _ = from(myrequire('llpeg.types'),)local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction"local NOINST = nil

-- don't optimize captures longer than thislocal MAXOFF = 15

-- forward declarationslocal codegen

local CompileState = CompileState.__index = CompileState

--e2" if (e1 % 2)

1 or (e2 % 2)

1 then ret = ret + 1 end e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1) if (e1 % 2)

1 or (e2 % 2)

1 then ret = ret + 2 end return ret, firstset end, [TTag.Seq] = function(t, follow, cs) if not nullable(t.sib1, cs) then -- when p1 is not nullable, p2 has nothing to contribute return getfirst(t.sib1, fullset, cs) -- tail call else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) local e2,csaux = getfirst(t.sib2, follow, cs) local e1,firstset = getfirst(t.sib1, csaux, cs) if e1

0 then return 0, firstset -- 'e1' ensures that first can be used elseif compat.rshift(e1, 1) % 2

1 or compat.rshift(e2, 1) % 2

1 then -- one of the children has a matchtime? return 2, firstset -- pattern has a matchtime capture else return e2, firstset -- else depends on e2 end end end, [TTag.Rep] = function(t, follow, cs) local _,firstset = getfirst(t.sib1, follow, cs) return 1, cs_union(firstset, follow, cs) -- accepts the empty string end, [{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs) return getfirst(t.sib1, follow, cs) -- tail call end, [TTag.Grammar] = function(t, follow, cs) return cs:withGrammar(t, getfirst, t.sib1, follow, cs) end, [TTag.RunTime] = function(t, follow, cs) -- function invalidates any follow info local e,firstset = getfirst(t.sib1, fullset, cs) if e ~= 0 then -- function is not "protected"? return 2,firstset else -- pattern inside capture ensures first can be used return 0,firstset end end, [TTag.Call] = function(t, follow, cs) local rule = cs.grammar.ruletab[t.key] return getfirst(rule, follow, cs) -- tail call end, [TTag.And] = function(t, follow, cs) local e,firstset = getfirst(t.sib1, follow, cs) return e, cs_intersection(firstset, follow, cs) end, [{ TTag.Not, TTag.Behind }] = function(t, follow, cs) if t.tag

TTag.Not then local firstset = tocharset(t.sib1) if firstset ~= nil then return 1,cs_complement(firstset) -- could match empty input end end -- the TNot or TBehind gives no new information -- call getfirst only to check for math-time captures local e,_ = getfirst(t.sib1, follow, cs) if e%2

0 then e = e + 1 end -- set the lsb; could match empty input return e, follow -- uses follow end,}function CompileState:getfirst(t, follow) return getfirst(t, follow, self) endregister_fname("getfirst", getfirst)

--

-- ie, a single character of lookahead is enough to evaluate the pattern -- rooted at this node

--local headfailheadfail = define_tree_visitorfunction CompileState:headfail(t) return headfail(t, self) endregister_fname("headfail", headfail)

--

--local needfollowneedfollow = define_tree_visitorregister_fname("needfollow", needfollow)

--

--

local Instruction = Instruction.__index = Instruction

function Instruction:new(arg) local opcode = arg[1] if opcode

nil then error("no opcode") end -- target is rule # for open calls before correction, and absolute pc after local instr = setmetatable(instr, self) instr:setCode(opcode) -- opportunity to add tracing logic! return instrend

function Instruction:setCode(opcode) self.code = opcode local exec = opcode.exec if TRACE_INSTRUCTIONS then local str self.exec = function(self, state, ...) if str

nil then str = table.concat(printinst(0, self,)):gsub("\n","") end print(state.bytePos, state.codepoint, str) return exec(self, state, ...) end else self.exec = exec endend

-- state for the compiler

function CompileState:new(p) local cs = setmetatable(cs, self) return csend

function CompileState:withGrammar(g, f, ...) local lastGrammar = self.grammar self.grammar = g local result = compat.pack(f(...)) self.grammar = lastGrammar return compat.unpack(result)end

function CompileState:codegen(tree, opt, tt, fl) assert(fl.tag

TTag.Set) -- just a little helper return codegen(tree, self, opt, tt, fl)end

function CompileState:getinstr(i) return self.p.code[i]end

function CompileState:addinstruction(arg) local code = self.p.code table.insert(code, Instruction:new(arg)) return #codeend

function CompileState:gethere local code = self.p.code return 1 + #codeend

function CompileState:jumptothere(pc, where) if pc ~= NOINST then local code = self.p.code code[pc].target = where endend

function CompileState:jumptohere(pc) self:jumptothere(pc, self:gethere)end

function codethrow(cs, throw) local rule = nil if cs.grammar ~= nil then -- we only lookup/match *string* rule names, not numeric indices rule = cs.grammar.ruletab[tostring(throw.key)] end if rule ~= nil then return cs:addinstruction else return cs:addinstruction endend

function codeutfr(cs, tree) return cs:addinstructionend

--

--function codechar(cs, codepoint, tt) if tt ~= NOINST and cs:getinstr(tt).code

Opcode.TestChar and cs:getinstr(tt).aux

codepoint then cs:addinstruction else cs:addinstruction endend

--

--function addcharset(cs, codepoint) --