From 296443137f4288cb030e92859ccfbe3204bc1088 Mon Sep 17 00:00:00 2001 From: rahulp13 Date: Tue, 17 Mar 2020 14:55:41 +0530 Subject: initial commit --- lib/python2.7/compiler/pyassem.py | 763 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 763 insertions(+) create mode 100644 lib/python2.7/compiler/pyassem.py (limited to 'lib/python2.7/compiler/pyassem.py') diff --git a/lib/python2.7/compiler/pyassem.py b/lib/python2.7/compiler/pyassem.py new file mode 100644 index 0000000..f52f7d0 --- /dev/null +++ b/lib/python2.7/compiler/pyassem.py @@ -0,0 +1,763 @@ +"""A flow graph representation for Python bytecode""" + +import dis +import types +import sys + +from compiler import misc +from compiler.consts \ + import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS + +class FlowGraph: + def __init__(self): + self.current = self.entry = Block() + self.exit = Block("exit") + self.blocks = misc.Set() + self.blocks.add(self.entry) + self.blocks.add(self.exit) + + def startBlock(self, block): + if self._debug: + if self.current: + print "end", repr(self.current) + print " next", self.current.next + print " prev", self.current.prev + print " ", self.current.get_children() + print repr(block) + self.current = block + + def nextBlock(self, block=None): + # XXX think we need to specify when there is implicit transfer + # from one block to the next. might be better to represent this + # with explicit JUMP_ABSOLUTE instructions that are optimized + # out when they are unnecessary. + # + # I think this strategy works: each block has a child + # designated as "next" which is returned as the last of the + # children. because the nodes in a graph are emitted in + # reverse post order, the "next" block will always be emitted + # immediately after its parent. + # Worry: maintaining this invariant could be tricky + if block is None: + block = self.newBlock() + + # Note: If the current block ends with an unconditional control + # transfer, then it is techically incorrect to add an implicit + # transfer to the block graph. Doing so results in code generation + # for unreachable blocks. That doesn't appear to be very common + # with Python code and since the built-in compiler doesn't optimize + # it out we don't either. + self.current.addNext(block) + self.startBlock(block) + + def newBlock(self): + b = Block() + self.blocks.add(b) + return b + + def startExitBlock(self): + self.startBlock(self.exit) + + _debug = 0 + + def _enable_debug(self): + self._debug = 1 + + def _disable_debug(self): + self._debug = 0 + + def emit(self, *inst): + if self._debug: + print "\t", inst + if len(inst) == 2 and isinstance(inst[1], Block): + self.current.addOutEdge(inst[1]) + self.current.emit(inst) + + def getBlocksInOrder(self): + """Return the blocks in reverse postorder + + i.e. each node appears before all of its successors + """ + order = order_blocks(self.entry, self.exit) + return order + + def getBlocks(self): + return self.blocks.elements() + + def getRoot(self): + """Return nodes appropriate for use with dominator""" + return self.entry + + def getContainedGraphs(self): + l = [] + for b in self.getBlocks(): + l.extend(b.getContainedGraphs()) + return l + + +def order_blocks(start_block, exit_block): + """Order blocks so that they are emitted in the right order""" + # Rules: + # - when a block has a next block, the next block must be emitted just after + # - when a block has followers (relative jumps), it must be emitted before + # them + # - all reachable blocks must be emitted + order = [] + + # Find all the blocks to be emitted. + remaining = set() + todo = [start_block] + while todo: + b = todo.pop() + if b in remaining: + continue + remaining.add(b) + for c in b.get_children(): + if c not in remaining: + todo.append(c) + + # A block is dominated by another block if that block must be emitted + # before it. + dominators = {} + for b in remaining: + if __debug__ and b.next: + assert b is b.next[0].prev[0], (b, b.next) + # Make sure every block appears in dominators, even if no + # other block must precede it. + dominators.setdefault(b, set()) + # preceding blocks dominate following blocks + for c in b.get_followers(): + while 1: + dominators.setdefault(c, set()).add(b) + # Any block that has a next pointer leading to c is also + # dominated because the whole chain will be emitted at once. + # Walk backwards and add them all. + if c.prev and c.prev[0] is not b: + c = c.prev[0] + else: + break + + def find_next(): + # Find a block that can be emitted next. + for b in remaining: + for c in dominators[b]: + if c in remaining: + break # can't emit yet, dominated by a remaining block + else: + return b + assert 0, 'circular dependency, cannot find next block' + + b = start_block + while 1: + order.append(b) + remaining.discard(b) + if b.next: + b = b.next[0] + continue + elif b is not exit_block and not b.has_unconditional_transfer(): + order.append(exit_block) + if not remaining: + break + b = find_next() + return order + + +class Block: + _count = 0 + + def __init__(self, label=''): + self.insts = [] + self.outEdges = set() + self.label = label + self.bid = Block._count + self.next = [] + self.prev = [] + Block._count = Block._count + 1 + + def __repr__(self): + if self.label: + return "" % (self.label, self.bid) + else: + return "" % (self.bid) + + def __str__(self): + insts = map(str, self.insts) + return "" % (self.label, self.bid, + '\n'.join(insts)) + + def emit(self, inst): + op = inst[0] + self.insts.append(inst) + + def getInstructions(self): + return self.insts + + def addOutEdge(self, block): + self.outEdges.add(block) + + def addNext(self, block): + self.next.append(block) + assert len(self.next) == 1, map(str, self.next) + block.prev.append(self) + assert len(block.prev) == 1, map(str, block.prev) + + _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', + 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP', + ) + + def has_unconditional_transfer(self): + """Returns True if there is an unconditional transfer to an other block + at the end of this block. This means there is no risk for the bytecode + executer to go past this block's bytecode.""" + try: + op, arg = self.insts[-1] + except (IndexError, ValueError): + return + return op in self._uncond_transfer + + def get_children(self): + return list(self.outEdges) + self.next + + def get_followers(self): + """Get the whole list of followers, including the next block.""" + followers = set(self.next) + # Blocks that must be emitted *after* this one, because of + # bytecode offsets (e.g. relative jumps) pointing to them. + for inst in self.insts: + if inst[0] in PyFlowGraph.hasjrel: + followers.add(inst[1]) + return followers + + def getContainedGraphs(self): + """Return all graphs contained within this block. + + For example, a MAKE_FUNCTION block will contain a reference to + the graph for the function body. + """ + contained = [] + for inst in self.insts: + if len(inst) == 1: + continue + op = inst[1] + if hasattr(op, 'graph'): + contained.append(op.graph) + return contained + +# flags for code objects + +# the FlowGraph is transformed in place; it exists in one of these states +RAW = "RAW" +FLAT = "FLAT" +CONV = "CONV" +DONE = "DONE" + +class PyFlowGraph(FlowGraph): + super_init = FlowGraph.__init__ + + def __init__(self, name, filename, args=(), optimized=0, klass=None): + self.super_init() + self.name = name + self.filename = filename + self.docstring = None + self.args = args # XXX + self.argcount = getArgCount(args) + self.klass = klass + if optimized: + self.flags = CO_OPTIMIZED | CO_NEWLOCALS + else: + self.flags = 0 + self.consts = [] + self.names = [] + # Free variables found by the symbol table scan, including + # variables used only in nested scopes, are included here. + self.freevars = [] + self.cellvars = [] + # The closure list is used to track the order of cell + # variables and free variables in the resulting code object. + # The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both + # kinds of variables. + self.closure = [] + self.varnames = list(args) or [] + for i in range(len(self.varnames)): + var = self.varnames[i] + if isinstance(var, TupleArg): + self.varnames[i] = var.getName() + self.stage = RAW + + def setDocstring(self, doc): + self.docstring = doc + + def setFlag(self, flag): + self.flags = self.flags | flag + if flag == CO_VARARGS: + self.argcount = self.argcount - 1 + + def checkFlag(self, flag): + if self.flags & flag: + return 1 + + def setFreeVars(self, names): + self.freevars = list(names) + + def setCellVars(self, names): + self.cellvars = names + + def getCode(self): + """Get a Python code object""" + assert self.stage == RAW + self.computeStackDepth() + self.flattenGraph() + assert self.stage == FLAT + self.convertArgs() + assert self.stage == CONV + self.makeByteCode() + assert self.stage == DONE + return self.newCodeObject() + + def dump(self, io=None): + if io: + save = sys.stdout + sys.stdout = io + pc = 0 + for t in self.insts: + opname = t[0] + if opname == "SET_LINENO": + print + if len(t) == 1: + print "\t", "%3d" % pc, opname + pc = pc + 1 + else: + print "\t", "%3d" % pc, opname, t[1] + pc = pc + 3 + if io: + sys.stdout = save + + def computeStackDepth(self): + """Compute the max stack depth. + + Approach is to compute the stack effect of each basic block. + Then find the path through the code with the largest total + effect. + """ + depth = {} + exit = None + for b in self.getBlocks(): + depth[b] = findDepth(b.getInstructions()) + + seen = {} + + def max_depth(b, d): + if b in seen: + return d + seen[b] = 1 + d = d + depth[b] + children = b.get_children() + if children: + return max([max_depth(c, d) for c in children]) + else: + if not b.label == "exit": + return max_depth(self.exit, d) + else: + return d + + self.stacksize = max_depth(self.entry, 0) + + def flattenGraph(self): + """Arrange the blocks in order and resolve jumps""" + assert self.stage == RAW + self.insts = insts = [] + pc = 0 + begin = {} + end = {} + for b in self.getBlocksInOrder(): + begin[b] = pc + for inst in b.getInstructions(): + insts.append(inst) + if len(inst) == 1: + pc = pc + 1 + elif inst[0] != "SET_LINENO": + # arg takes 2 bytes + pc = pc + 3 + end[b] = pc + pc = 0 + for i in range(len(insts)): + inst = insts[i] + if len(inst) == 1: + pc = pc + 1 + elif inst[0] != "SET_LINENO": + pc = pc + 3 + opname = inst[0] + if opname in self.hasjrel: + oparg = inst[1] + offset = begin[oparg] - pc + insts[i] = opname, offset + elif opname in self.hasjabs: + insts[i] = opname, begin[inst[1]] + self.stage = FLAT + + hasjrel = set() + for i in dis.hasjrel: + hasjrel.add(dis.opname[i]) + hasjabs = set() + for i in dis.hasjabs: + hasjabs.add(dis.opname[i]) + + def convertArgs(self): + """Convert arguments from symbolic to concrete form""" + assert self.stage == FLAT + self.consts.insert(0, self.docstring) + self.sort_cellvars() + for i in range(len(self.insts)): + t = self.insts[i] + if len(t) == 2: + opname, oparg = t + conv = self._converters.get(opname, None) + if conv: + self.insts[i] = opname, conv(self, oparg) + self.stage = CONV + + def sort_cellvars(self): + """Sort cellvars in the order of varnames and prune from freevars. + """ + cells = {} + for name in self.cellvars: + cells[name] = 1 + self.cellvars = [name for name in self.varnames + if name in cells] + for name in self.cellvars: + del cells[name] + self.cellvars = self.cellvars + cells.keys() + self.closure = self.cellvars + self.freevars + + def _lookupName(self, name, list): + """Return index of name in list, appending if necessary + + This routine uses a list instead of a dictionary, because a + dictionary can't store two different keys if the keys have the + same value but different types, e.g. 2 and 2L. The compiler + must treat these two separately, so it does an explicit type + comparison before comparing the values. + """ + t = type(name) + for i in range(len(list)): + if t == type(list[i]) and list[i] == name: + return i + end = len(list) + list.append(name) + return end + + _converters = {} + def _convert_LOAD_CONST(self, arg): + if hasattr(arg, 'getCode'): + arg = arg.getCode() + return self._lookupName(arg, self.consts) + + def _convert_LOAD_FAST(self, arg): + self._lookupName(arg, self.names) + return self._lookupName(arg, self.varnames) + _convert_STORE_FAST = _convert_LOAD_FAST + _convert_DELETE_FAST = _convert_LOAD_FAST + + def _convert_LOAD_NAME(self, arg): + if self.klass is None: + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.names) + + def _convert_NAME(self, arg): + if self.klass is None: + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.names) + _convert_STORE_NAME = _convert_NAME + _convert_DELETE_NAME = _convert_NAME + _convert_IMPORT_NAME = _convert_NAME + _convert_IMPORT_FROM = _convert_NAME + _convert_STORE_ATTR = _convert_NAME + _convert_LOAD_ATTR = _convert_NAME + _convert_DELETE_ATTR = _convert_NAME + _convert_LOAD_GLOBAL = _convert_NAME + _convert_STORE_GLOBAL = _convert_NAME + _convert_DELETE_GLOBAL = _convert_NAME + + def _convert_DEREF(self, arg): + self._lookupName(arg, self.names) + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + _convert_LOAD_DEREF = _convert_DEREF + _convert_STORE_DEREF = _convert_DEREF + + def _convert_LOAD_CLOSURE(self, arg): + self._lookupName(arg, self.varnames) + return self._lookupName(arg, self.closure) + + _cmp = list(dis.cmp_op) + def _convert_COMPARE_OP(self, arg): + return self._cmp.index(arg) + + # similarly for other opcodes... + + for name, obj in locals().items(): + if name[:9] == "_convert_": + opname = name[9:] + _converters[opname] = obj + del name, obj, opname + + def makeByteCode(self): + assert self.stage == CONV + self.lnotab = lnotab = LineAddrTable() + for t in self.insts: + opname = t[0] + if len(t) == 1: + lnotab.addCode(self.opnum[opname]) + else: + oparg = t[1] + if opname == "SET_LINENO": + lnotab.nextLine(oparg) + continue + hi, lo = twobyte(oparg) + try: + lnotab.addCode(self.opnum[opname], lo, hi) + except ValueError: + print opname, oparg + print self.opnum[opname], lo, hi + raise + self.stage = DONE + + opnum = {} + for num in range(len(dis.opname)): + opnum[dis.opname[num]] = num + del num + + def newCodeObject(self): + assert self.stage == DONE + if (self.flags & CO_NEWLOCALS) == 0: + nlocals = 0 + else: + nlocals = len(self.varnames) + argcount = self.argcount + if self.flags & CO_VARKEYWORDS: + argcount = argcount - 1 + return types.CodeType(argcount, nlocals, self.stacksize, self.flags, + self.lnotab.getCode(), self.getConsts(), + tuple(self.names), tuple(self.varnames), + self.filename, self.name, self.lnotab.firstline, + self.lnotab.getTable(), tuple(self.freevars), + tuple(self.cellvars)) + + def getConsts(self): + """Return a tuple for the const slot of the code object + + Must convert references to code (MAKE_FUNCTION) to code + objects recursively. + """ + l = [] + for elt in self.consts: + if isinstance(elt, PyFlowGraph): + elt = elt.getCode() + l.append(elt) + return tuple(l) + +def isJump(opname): + if opname[:4] == 'JUMP': + return 1 + +class TupleArg: + """Helper for marking func defs with nested tuples in arglist""" + def __init__(self, count, names): + self.count = count + self.names = names + def __repr__(self): + return "TupleArg(%s, %s)" % (self.count, self.names) + def getName(self): + return ".%d" % self.count + +def getArgCount(args): + argcount = len(args) + if args: + for arg in args: + if isinstance(arg, TupleArg): + numNames = len(misc.flatten(arg.names)) + argcount = argcount - numNames + return argcount + +def twobyte(val): + """Convert an int argument into high and low bytes""" + assert isinstance(val, int) + return divmod(val, 256) + +class LineAddrTable: + """lnotab + + This class builds the lnotab, which is documented in compile.c. + Here's a brief recap: + + For each SET_LINENO instruction after the first one, two bytes are + added to lnotab. (In some cases, multiple two-byte entries are + added.) The first byte is the distance in bytes between the + instruction for the last SET_LINENO and the current SET_LINENO. + The second byte is offset in line numbers. If either offset is + greater than 255, multiple two-byte entries are added -- see + compile.c for the delicate details. + """ + + def __init__(self): + self.code = [] + self.codeOffset = 0 + self.firstline = 0 + self.lastline = 0 + self.lastoff = 0 + self.lnotab = [] + + def addCode(self, *args): + for arg in args: + self.code.append(chr(arg)) + self.codeOffset = self.codeOffset + len(args) + + def nextLine(self, lineno): + if self.firstline == 0: + self.firstline = lineno + self.lastline = lineno + else: + # compute deltas + addr = self.codeOffset - self.lastoff + line = lineno - self.lastline + # Python assumes that lineno always increases with + # increasing bytecode address (lnotab is unsigned char). + # Depending on when SET_LINENO instructions are emitted + # this is not always true. Consider the code: + # a = (1, + # b) + # In the bytecode stream, the assignment to "a" occurs + # after the loading of "b". This works with the C Python + # compiler because it only generates a SET_LINENO instruction + # for the assignment. + if line >= 0: + push = self.lnotab.append + while addr > 255: + push(255); push(0) + addr -= 255 + while line > 255: + push(addr); push(255) + line -= 255 + addr = 0 + if addr > 0 or line > 0: + push(addr); push(line) + self.lastline = lineno + self.lastoff = self.codeOffset + + def getCode(self): + return ''.join(self.code) + + def getTable(self): + return ''.join(map(chr, self.lnotab)) + +class StackDepthTracker: + # XXX 1. need to keep track of stack depth on jumps + # XXX 2. at least partly as a result, this code is broken + + def findDepth(self, insts, debug=0): + depth = 0 + maxDepth = 0 + for i in insts: + opname = i[0] + if debug: + print i, + delta = self.effect.get(opname, None) + if delta is not None: + depth = depth + delta + else: + # now check patterns + for pat, pat_delta in self.patterns: + if opname[:len(pat)] == pat: + delta = pat_delta + depth = depth + delta + break + # if we still haven't found a match + if delta is None: + meth = getattr(self, opname, None) + if meth is not None: + depth = depth + meth(i[1]) + if depth > maxDepth: + maxDepth = depth + if debug: + print depth, maxDepth + return maxDepth + + effect = { + 'POP_TOP': -1, + 'DUP_TOP': 1, + 'LIST_APPEND': -1, + 'SET_ADD': -1, + 'MAP_ADD': -2, + 'SLICE+1': -1, + 'SLICE+2': -1, + 'SLICE+3': -2, + 'STORE_SLICE+0': -1, + 'STORE_SLICE+1': -2, + 'STORE_SLICE+2': -2, + 'STORE_SLICE+3': -3, + 'DELETE_SLICE+0': -1, + 'DELETE_SLICE+1': -2, + 'DELETE_SLICE+2': -2, + 'DELETE_SLICE+3': -3, + 'STORE_SUBSCR': -3, + 'DELETE_SUBSCR': -2, + # PRINT_EXPR? + 'PRINT_ITEM': -1, + 'RETURN_VALUE': -1, + 'YIELD_VALUE': -1, + 'EXEC_STMT': -3, + 'BUILD_CLASS': -2, + 'STORE_NAME': -1, + 'STORE_ATTR': -2, + 'DELETE_ATTR': -1, + 'STORE_GLOBAL': -1, + 'BUILD_MAP': 1, + 'COMPARE_OP': -1, + 'STORE_FAST': -1, + 'IMPORT_STAR': -1, + 'IMPORT_NAME': -1, + 'IMPORT_FROM': 1, + 'LOAD_ATTR': 0, # unlike other loads + # close enough... + 'SETUP_EXCEPT': 3, + 'SETUP_FINALLY': 3, + 'FOR_ITER': 1, + 'WITH_CLEANUP': -1, + } + # use pattern match + patterns = [ + ('BINARY_', -1), + ('LOAD_', 1), + ] + + def UNPACK_SEQUENCE(self, count): + return count-1 + def BUILD_TUPLE(self, count): + return -count+1 + def BUILD_LIST(self, count): + return -count+1 + def BUILD_SET(self, count): + return -count+1 + def CALL_FUNCTION(self, argc): + hi, lo = divmod(argc, 256) + return -(lo + hi * 2) + def CALL_FUNCTION_VAR(self, argc): + return self.CALL_FUNCTION(argc)-1 + def CALL_FUNCTION_KW(self, argc): + return self.CALL_FUNCTION(argc)-1 + def CALL_FUNCTION_VAR_KW(self, argc): + return self.CALL_FUNCTION(argc)-2 + def MAKE_FUNCTION(self, argc): + return -argc + def MAKE_CLOSURE(self, argc): + # XXX need to account for free variables too! + return -argc + def BUILD_SLICE(self, argc): + if argc == 2: + return -1 + elif argc == 3: + return -2 + def DUP_TOPX(self, argc): + return argc + +findDepth = StackDepthTracker().findDepth -- cgit