diff -r aa480ecc8d3a Include/opcode.h --- a/Include/opcode.h Sat Mar 22 14:07:06 2008 +0100 +++ b/Include/opcode.h Sun Mar 23 16:48:26 2008 +0100 @@ -89,6 +89,7 @@ extern "C" { #define DELETE_NAME 91 /* "" */ #define UNPACK_SEQUENCE 92 /* Number of sequence items */ #define FOR_ITER 93 +#define FOR_ITER2 94 #define STORE_ATTR 95 /* Index in name list */ #define DELETE_ATTR 96 /* "" */ @@ -109,6 +110,7 @@ extern "C" { #define JUMP_IF_FALSE 111 /* "" */ #define JUMP_IF_TRUE 112 /* "" */ #define JUMP_ABSOLUTE 113 /* Target byte offset from beginning of code */ +#define JUMP_ABS_IF_TRUE 114 /* "" */ #define LOAD_GLOBAL 116 /* Index in name list */ diff -r aa480ecc8d3a Lib/compiler/pyassem.py --- a/Lib/compiler/pyassem.py Sat Mar 22 14:07:06 2008 +0100 +++ b/Lib/compiler/pyassem.py Sun Mar 23 16:48:26 2008 +0100 @@ -69,10 +69,13 @@ class FlowGraph: def emit(self, *inst): if self._debug: print "\t", inst - if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']: - self.current.addOutEdge(self.exit) + #if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']: + #self.current.addOutEdge(self.exit) if len(inst) == 2 and isinstance(inst[1], Block): self.current.addOutEdge(inst[1]) + # Record block emission order dependencies + if self.hasjrel.has_elt(inst[0]): + self.current.followers.add(inst[1]) self.current.emit(inst) def getBlocksInOrder(self): @@ -80,19 +83,23 @@ class FlowGraph: i.e. each node appears before all of its successors """ + # XXX The old hacks here have been disabled because order_blocks + # should be sufficient on its own + # XXX make sure every node that doesn't have an explicit next # is set so that next points to exit + #for b in self.blocks.elements(): + #if b is self.exit: + #continue + #if not b.next: + #b.addNext(self.exit, False) for b in self.blocks.elements(): - if b is self.exit: - continue - if not b.next: - b.addNext(self.exit) - order = dfs_postorder(self.entry, {}) - order.reverse() - self.fixupOrder(order, self.exit) + b.pruneNext() + order = order_blocks(self.entry, self.exit) + #self.fixupOrder(order, self.exit) # hack alert - if not self.exit in order: - order.append(self.exit) + #if not self.exit in order: + #order.append(self.exit) return order @@ -205,16 +212,60 @@ class FlowGraph: l.extend(b.getContainedGraphs()) return l -def dfs_postorder(b, seen): - """Depth-first search of tree rooted at b, return in postorder""" + +def order_blocks(start_block, exit_block): + """Order blocks so that they are emitted in the right order""" + # Rules: + # - when a block has a next block, the next block must be emitted just after + # - when a block has followers (relative jumps), it must be emitted before + # them + # - all reachable blocks must be emitted order = [] - seen[b] = b - for c in b.get_children(): - if seen.has_key(c): - continue - order = order + dfs_postorder(c, seen) - order.append(b) + + reachable = set() + blocks = [start_block] + while blocks: + reachable.update(blocks) + blocks = [c for b in blocks for c in b.get_children() + if not c in reachable] + + remaining = set(reachable) + + def fill_todo(): + todo = set(remaining) + if not todo: + return todo + for b in remaining: + # Followers must be processed after their preceders + for c in b.followers: + while 1: + todo.discard(c) + # We must also discard any other block whose next block + # is the block we just discarded! + if c.prev and c.prev[0] is not b: + c = c.prev[0] + else: + break + # If todo is empty, then we have a circular dependency! + assert todo + return todo + + def fill_order(b): + remaining.discard(b) + order.append(b) + if b.next: + fill_order(b.next[0]) + # Append the default exit block if necessary + elif b is not exit_block and not b.has_unconditional_transfer(): + order.append(exit_block) + + fill_order(start_block) + while remaining: + todo = fill_todo() + for b in todo: + fill_order(b) return order + class Block: _count = 0 @@ -222,10 +273,14 @@ class Block: def __init__(self, label=''): self.insts = [] self.inEdges = misc.Set() - self.outEdges = misc.Set() + self.outEdges = set() self.label = label self.bid = Block._count self.next = [] + self.prev = [] + # Blocks that must be emitted *after* this one, because of + # bytecode offsets (e.g. relative jumps) pointing to them! + self.followers = set() Block._count = Block._count + 1 def __repr__(self): @@ -241,8 +296,6 @@ class Block: def emit(self, inst): op = inst[0] - if op[:4] == 'JUMP': - self.outEdges.add(inst[1]) self.insts.append(inst) def getInstructions(self): @@ -254,12 +307,27 @@ class Block: def addOutEdge(self, block): self.outEdges.add(block) - def addNext(self, block): + def addNext(self, block, add_prev=True): self.next.append(block) + self.followers.add(block) assert len(self.next) == 1, map(str, self.next) + if add_prev: + block.prev.append(self) + assert len(block.prev) == 1, map(str, block.prev) - _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE', - 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP') + _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', #'YIELD_VALUE', + 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP', + ) + + def has_unconditional_transfer(self): + """Returns True if there is an unconditional transfer to an other block + at the end of this block. This means there is no risk for the bytecode + executer to go past this block's bytecode.""" + try: + op, arg = self.insts[-1] + except (IndexError, ValueError): + return + return op in self._uncond_transfer def pruneNext(self): """Remove bogus edge for unconditional transfers @@ -275,17 +343,14 @@ class Block: remove the next edge when it follows an unconditional control transfer. """ - try: - op, arg = self.insts[-1] - except (IndexError, ValueError): - return - if op in self._uncond_transfer: + if self.next and self.has_unconditional_transfer(): + self.outEdges.add(self.next[0]) + self.followers.remove(self.next[0]) + self.next[0].prev = [] self.next = [] def get_children(self): - if self.next and self.next[0] in self.outEdges: - self.outEdges.remove(self.next[0]) - return self.outEdges.elements() + self.next + return list(self.outEdges) + self.next def getContainedGraphs(self): """Return all graphs contained within this block. @@ -779,6 +844,7 @@ class StackDepthTracker: 'SETUP_EXCEPT': 3, 'SETUP_FINALLY': 3, 'FOR_ITER': 1, + 'FOR_ITER2': 1, 'WITH_CLEANUP': -1, } # use pattern match diff -r aa480ecc8d3a Lib/compiler/pycodegen.py --- a/Lib/compiler/pycodegen.py Sat Mar 22 14:07:06 2008 +0100 +++ b/Lib/compiler/pycodegen.py Sun Mar 23 16:48:26 2008 +0100 @@ -433,25 +433,27 @@ class CodeGenerator: self.set_lineno(node) loop = self.newBlock() + body = self.newBlock() + tail = self.newBlock() else_ = self.newBlock() + after = self.newBlock() - after = self.newBlock() self.emit('SETUP_LOOP', after) self.nextBlock(loop) self.setups.push((LOOP, loop)) + self.emit('JUMP_ABSOLUTE', tail) + self.startBlock(body) self.set_lineno(node, force=True) + + self.visit(node.body) + + self.nextBlock(tail) self.visit(node.test) - self.emit('JUMP_IF_FALSE', else_ or after) + self.emit('JUMP_ABS_IF_TRUE', body) - self.nextBlock() - self.emit('POP_TOP') - self.visit(node.body) - self.emit('JUMP_ABSOLUTE', loop) - - self.startBlock(else_) # or just the POPs if not else clause - self.emit('POP_TOP') + self.nextBlock(else_) # or just the POPs if not else clause self.emit('POP_BLOCK') self.setups.pop() if node.else_: @@ -460,6 +462,8 @@ class CodeGenerator: def visitFor(self, node): start = self.newBlock() + body = self.newBlock() + tail = self.newBlock() anchor = self.newBlock() after = self.newBlock() self.setups.push((LOOP, start)) @@ -471,10 +475,12 @@ class CodeGenerator: self.nextBlock(start) self.set_lineno(node, force=1) - self.emit('FOR_ITER', anchor) + self.emit('JUMP_ABSOLUTE', tail) + self.startBlock(body) self.visit(node.assign) self.visit(node.body) - self.emit('JUMP_ABSOLUTE', start) + self.nextBlock(tail) + self.emit('FOR_ITER2', body) self.nextBlock(anchor) self.emit('POP_BLOCK') self.setups.pop() @@ -497,7 +503,7 @@ class CodeGenerator: if kind == LOOP: self.set_lineno(node) self.emit('JUMP_ABSOLUTE', block) - self.nextBlock() + self.startBlock(self.newBlock()) elif kind == EXCEPT or kind == TRY_FINALLY: self.set_lineno(node) # find the block that starts the loop @@ -511,7 +517,7 @@ class CodeGenerator: raise SyntaxError, "'continue' outside loop (%s, %d)" % \ (node.filename, node.lineno) self.emit('CONTINUE_LOOP', loop_block) - self.nextBlock() + self.startBlock(self.newBlock()) elif kind == END_FINALLY: msg = "'continue' not allowed inside 'finally' clause (%s, %d)" raise SyntaxError, msg % (node.filename, node.lineno) @@ -540,7 +546,7 @@ class CodeGenerator: self.emit('POP_TOP') self.visit(node.then) self.emit('JUMP_FORWARD', endblock) - self.nextBlock(elseblock) + self.startBlock(elseblock) self.emit('POP_TOP') self.visit(node.else_) self.nextBlock(endblock) @@ -681,7 +687,7 @@ class CodeGenerator: self.startBlock(anchor) self.emit('POP_BLOCK') self.setups.pop() - self.startBlock(end) + self.nextBlock(end) self.emit('LOAD_CONST', None) @@ -791,9 +797,9 @@ class CodeGenerator: self.visit(body) self.emit('JUMP_FORWARD', end) if expr: - self.nextBlock(next) + self.startBlock(next) else: - self.nextBlock() + self.startBlock(self.newBlock()) if expr: # XXX self.emit('POP_TOP') self.emit('END_FINALLY') diff -r aa480ecc8d3a Lib/opcode.py --- a/Lib/opcode.py Sat Mar 22 14:07:06 2008 +0100 +++ b/Lib/opcode.py Sun Mar 23 16:48:26 2008 +0100 @@ -128,6 +128,7 @@ name_op('DELETE_NAME', 91) # "" name_op('DELETE_NAME', 91) # "" def_op('UNPACK_SEQUENCE', 92) # Number of tuple items jrel_op('FOR_ITER', 93) +jabs_op('FOR_ITER2', 94) name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" @@ -150,6 +151,7 @@ jrel_op('JUMP_IF_FALSE', 111) # "" jrel_op('JUMP_IF_FALSE', 111) # "" jrel_op('JUMP_IF_TRUE', 112) # "" jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code +jabs_op('JUMP_ABS_IF_TRUE', 114) # "" name_op('LOAD_GLOBAL', 116) # Index in name list diff -r aa480ecc8d3a Python/ceval.c --- a/Python/ceval.c Sat Mar 22 14:07:06 2008 +0100 +++ b/Python/ceval.c Sun Mar 23 16:48:26 2008 +0100 @@ -2175,6 +2175,26 @@ PyEval_EvalFrameEx(PyFrameObject *f, int break; continue; + case JUMP_ABS_IF_TRUE: + w = POP(); + if (w == Py_False) { + goto fast_next_opcode; + } + if (w == Py_True) { + JUMPTO(oparg); + goto fast_next_opcode; + } + err = PyObject_IsTrue(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) + ; + else + break; + continue; + PREDICTED_WITH_ARG(JUMP_ABSOLUTE); case JUMP_ABSOLUTE: JUMPTO(oparg); @@ -2225,6 +2245,28 @@ PyEval_EvalFrameEx(PyFrameObject *f, int x = v = POP(); Py_DECREF(v); JUMPBY(oparg); + continue; + + case FOR_ITER2: + /* before: [iter]; after: [iter, iter()] *or* [] */ + v = TOP(); + x = (*v->ob_type->tp_iternext)(v); + if (x != NULL) { + PUSH(x); + JUMPTO(oparg); + PREDICT(STORE_FAST); + PREDICT(UNPACK_SEQUENCE); + continue; + } + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches( + PyExc_StopIteration)) + break; + PyErr_Clear(); + } + /* iterator ended normally */ + x = v = POP(); + Py_DECREF(v); continue; case BREAK_LOOP: diff -r aa480ecc8d3a Python/compile.c --- a/Python/compile.c Sat Mar 22 14:07:06 2008 +0100 +++ b/Python/compile.c Sun Mar 23 16:48:26 2008 +0100 @@ -796,6 +796,8 @@ opcode_stack_effect(int opcode, int opar return oparg-1; case FOR_ITER: return 1; + case FOR_ITER2: + return 1; case STORE_ATTR: return -2; @@ -830,6 +832,8 @@ opcode_stack_effect(int opcode, int opar case JUMP_IF_TRUE: case JUMP_ABSOLUTE: return 0; + case JUMP_ABS_IF_TRUE: + return -1; case LOAD_GLOBAL: return 1; @@ -1612,12 +1616,15 @@ static int static int compiler_for(struct compiler *c, stmt_ty s) { - basicblock *start, *cleanup, *end; + basicblock *start, *body, *tail, *cleanup, *end; start = compiler_new_block(c); + body = compiler_new_block(c); + tail = compiler_new_block(c); cleanup = compiler_new_block(c); end = compiler_new_block(c); - if (start == NULL || end == NULL || cleanup == NULL) + if (start == NULL || body == NULL || tail == NULL + || end == NULL || cleanup == NULL) return 0; ADDOP_JREL(c, SETUP_LOOP, end); if (!compiler_push_fblock(c, LOOP, start)) @@ -1628,10 +1635,12 @@ compiler_for(struct compiler *c, stmt_ty /* for expressions must be traced on each iteration, so we need to set an extra line number. */ c->u->u_lineno_set = false; - ADDOP_JREL(c, FOR_ITER, cleanup); + ADDOP_JABS(c, JUMP_ABSOLUTE, tail); + compiler_use_next_block(c, body); VISIT(c, expr, s->v.For.target); VISIT_SEQ(c, stmt, s->v.For.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + compiler_use_next_block(c, tail); + ADDOP_JABS(c, FOR_ITER2, body); compiler_use_next_block(c, cleanup); ADDOP(c, POP_BLOCK); compiler_pop_fblock(c, LOOP, start); @@ -1643,7 +1652,7 @@ static int static int compiler_while(struct compiler *c, stmt_ty s) { - basicblock *loop, *orelse, *end, *anchor = NULL; + basicblock *loop, *body, *tail, *orelse, *end, *anchor = NULL; int constant = expr_constant(s->v.While.test); if (constant == 0) { @@ -1654,8 +1663,10 @@ compiler_while(struct compiler *c, stmt_ loop = compiler_new_block(c); end = compiler_new_block(c); if (constant == -1) { + body = compiler_new_block(c); + tail = compiler_new_block(c); anchor = compiler_new_block(c); - if (anchor == NULL) + if (body == NULL || tail == NULL || anchor == NULL) return 0; } if (loop == NULL || end == NULL) @@ -1676,12 +1687,18 @@ compiler_while(struct compiler *c, stmt_ /* while expressions must be traced on each iteration, so we need to set an extra line number. */ c->u->u_lineno_set = false; - VISIT(c, expr, s->v.While.test); - ADDOP_JREL(c, JUMP_IF_FALSE, anchor); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, JUMP_ABSOLUTE, tail); + compiler_use_next_block(c, body); } VISIT_SEQ(c, stmt, s->v.While.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, loop); + if (constant == -1) { + compiler_use_next_block(c, tail); + VISIT(c, expr, s->v.While.test); + ADDOP_JABS(c, JUMP_ABS_IF_TRUE, body); + } + else { + ADDOP_JABS(c, JUMP_ABSOLUTE, loop); + } /* XXX should the two POP instructions be in a separate block if there is no else clause ? @@ -1689,7 +1706,6 @@ compiler_while(struct compiler *c, stmt_ if (constant == -1) { compiler_use_next_block(c, anchor); - ADDOP(c, POP_TOP); ADDOP(c, POP_BLOCK); } compiler_pop_fblock(c, LOOP, loop); diff -r aa480ecc8d3a Python/import.c --- a/Python/import.c Sat Mar 22 14:07:06 2008 +0100 +++ b/Python/import.c Sun Mar 23 16:48:26 2008 +0100 @@ -73,9 +73,10 @@ extern time_t PyOS_GetLastModificationTi Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) Python 2.6a1: 62161 (WITH_CLEANUP optimization) + Python 2.6a1: 62171 (FOR_ITER2, JUMP_ABS_IF_TRUE) . */ -#define MAGIC (62161 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62171 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the diff -r aa480ecc8d3a Python/peephole.c --- a/Python/peephole.c Sat Mar 22 14:07:06 2008 +0100 +++ b/Python/peephole.c Sun Mar 23 16:48:26 2008 +0100 @@ -13,7 +13,8 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP || \ + op==FOR_ITER2 || op==JUMP_ABS_IF_TRUE) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -244,10 +245,12 @@ markblocks(unsigned char *code, int len) opcode = code[i]; switch (opcode) { case FOR_ITER: + case FOR_ITER2: case JUMP_FORWARD: case JUMP_IF_FALSE: case JUMP_IF_TRUE: case JUMP_ABSOLUTE: + case JUMP_ABS_IF_TRUE: case CONTINUE_LOOP: case SETUP_LOOP: case SETUP_EXCEPT: @@ -590,7 +593,9 @@ PyCode_Optimize(PyObject *code, PyObject continue; case JUMP_ABSOLUTE: + case JUMP_ABS_IF_TRUE: case CONTINUE_LOOP: + case FOR_ITER2: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break;