diff -r 0f9dcecd9d88 Include/opcode.h --- a/Include/opcode.h Wed Apr 30 18:19:55 2008 +0200 +++ b/Include/opcode.h Wed Apr 30 23:39:31 2008 +0200 @@ -88,7 +88,7 @@ #define STORE_NAME 90 /* Index in name list */ #define DELETE_NAME 91 /* "" */ #define UNPACK_SEQUENCE 92 /* Number of sequence items */ -#define FOR_ITER 93 +#define FOR_ITER 93 /* Byte index of beginning of loop body */ #define STORE_ATTR 95 /* Index in name list */ #define DELETE_ATTR 96 /* "" */ @@ -109,6 +109,8 @@ #define JUMP_IF_FALSE 111 /* "" */ #define JUMP_IF_TRUE 112 /* "" */ #define JUMP_ABSOLUTE 113 /* Target byte offset from beginning of code */ +#define JUMP_ABS_IF_TRUE 114 /* "" */ +#define JUMP_ABS_IF_FALSE 115 /* "" */ #define LOAD_GLOBAL 116 /* Index in name list */ diff -r 0f9dcecd9d88 Lib/compiler/pyassem.py --- a/Lib/compiler/pyassem.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/compiler/pyassem.py Wed Apr 30 23:39:31 2008 +0200 @@ -42,10 +42,8 @@ # Note: If the current block ends with an unconditional # control transfer, then it is incorrect to add an implicit - # transfer to the block graph. The current code requires - # these edges to get the blocks emitted in the right order, - # however. :-( If a client needs to remove these edges, call - # pruneEdges(). + # transfer to the block graph. These edges are removed later by + # calling pruneEdges() (see getBlocksInOrder()). self.current.addNext(block) self.startBlock(block) @@ -69,10 +67,12 @@ def emit(self, *inst): if self._debug: print "\t", inst - if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']: - self.current.addOutEdge(self.exit) if len(inst) == 2 and isinstance(inst[1], Block): self.current.addOutEdge(inst[1]) + # Record block emission order dependencies for relative jumps + # (which can only have positive offsets). + if self.hasjrel.has_elt(inst[0]): + self.current.followers.add(inst[1]) self.current.emit(inst) def getBlocksInOrder(self): @@ -80,117 +80,11 @@ i.e. each node appears before all of its successors """ - # XXX make sure every node that doesn't have an explicit next - # is set so that next points to exit + # Remove false edges. for b in self.blocks.elements(): - if b is self.exit: - continue - if not b.next: - b.addNext(self.exit) - order = dfs_postorder(self.entry, {}) - order.reverse() - self.fixupOrder(order, self.exit) - # hack alert - if not self.exit in order: - order.append(self.exit) - + b.pruneNext() + order = order_blocks(self.entry, self.exit) return order - - def fixupOrder(self, blocks, default_next): - """Fixup bad order introduced by DFS.""" - - # XXX This is a total mess. There must be a better way to get - # the code blocks in the right order. - - self.fixupOrderHonorNext(blocks, default_next) - self.fixupOrderForward(blocks, default_next) - - def fixupOrderHonorNext(self, blocks, default_next): - """Fix one problem with DFS. - - The DFS uses child block, but doesn't know about the special - "next" block. As a result, the DFS can order blocks so that a - block isn't next to the right block for implicit control - transfers. - """ - index = {} - for i in range(len(blocks)): - index[blocks[i]] = i - - for i in range(0, len(blocks) - 1): - b = blocks[i] - n = blocks[i + 1] - if not b.next or b.next[0] == default_next or b.next[0] == n: - continue - # The blocks are in the wrong order. Find the chain of - # blocks to insert where they belong. - cur = b - chain = [] - elt = cur - while elt.next and elt.next[0] != default_next: - chain.append(elt.next[0]) - elt = elt.next[0] - # Now remove the blocks in the chain from the current - # block list, so that they can be re-inserted. - l = [] - for b in chain: - assert index[b] > i - l.append((index[b], b)) - l.sort() - l.reverse() - for j, b in l: - del blocks[index[b]] - # Insert the chain in the proper location - blocks[i:i + 1] = [cur] + chain - # Finally, re-compute the block indexes - for i in range(len(blocks)): - index[blocks[i]] = i - - def fixupOrderForward(self, blocks, default_next): - """Make sure all JUMP_FORWARDs jump forward""" - index = {} - chains = [] - cur = [] - for b in blocks: - index[b] = len(chains) - cur.append(b) - if b.next and b.next[0] == default_next: - chains.append(cur) - cur = [] - chains.append(cur) - - while 1: - constraints = [] - - for i in range(len(chains)): - l = chains[i] - for b in l: - for c in b.get_children(): - if index[c] < i: - forward_p = 0 - for inst in b.insts: - if inst[0] == 'JUMP_FORWARD': - if inst[1] == c: - forward_p = 1 - if not forward_p: - continue - constraints.append((index[c], i)) - - if not constraints: - break - - # XXX just do one for now - # do swaps to get things in the right order - goes_before, a_chain = constraints[0] - assert a_chain > goes_before - c = chains[a_chain] - chains.remove(c) - chains.insert(goes_before, c) - - del blocks[:] - for c in chains: - for b in c: - blocks.append(b) def getBlocks(self): return self.blocks.elements() @@ -205,16 +99,60 @@ l.extend(b.getContainedGraphs()) return l -def dfs_postorder(b, seen): - """Depth-first search of tree rooted at b, return in postorder""" + +def order_blocks(start_block, exit_block): + """Order blocks so that they are emitted in the right order""" + # Rules: + # - when a block has a next block, the next block must be emitted just after + # - when a block has followers (relative jumps), it must be emitted before + # them + # - all reachable blocks must be emitted order = [] - seen[b] = b - for c in b.get_children(): - if seen.has_key(c): - continue - order = order + dfs_postorder(c, seen) - order.append(b) + + reachable = set() + blocks = [start_block] + while blocks: + reachable.update(blocks) + blocks = [c for b in blocks for c in b.get_children() + if not c in reachable] + + remaining = set(reachable) + + def fill_todo(): + todo = set(remaining) + if not todo: + return todo + for b in remaining: + # Followers must be processed after their preceders + for c in b.get_followers(): + while 1: + todo.discard(c) + # We must also discard any other block whose next block + # is the block we just discarded! + if c.prev and c.prev[0] is not b: + c = c.prev[0] + else: + break + # If todo is empty, then we have a circular dependency! + assert todo + return todo + + def fill_order(b): + remaining.discard(b) + order.append(b) + if b.next: + fill_order(b.next[0]) + # Append the default exit block if necessary + elif b is not exit_block and not b.has_unconditional_transfer(): + order.append(exit_block) + + fill_order(start_block) + while remaining: + todo = fill_todo() + for b in todo: + fill_order(b) return order + class Block: _count = 0 @@ -222,10 +160,18 @@ def __init__(self, label=''): self.insts = [] self.inEdges = misc.Set() - self.outEdges = misc.Set() + self.outEdges = set() self.label = label self.bid = Block._count self.next = [] + self.prev = [] + # Blocks that must be emitted *after* this one, because of + # bytecode offsets (e.g. relative jumps) pointing to them. + # NOTE: this might also include the next block if it is explicitly + # referenced by an instruction inside the block. Yet, the next block + # is treated separately from self.followers so that pruneNext() + # can work properly. + self.followers = set() Block._count = Block._count + 1 def __repr__(self): @@ -241,8 +187,6 @@ def emit(self, inst): op = inst[0] - if op[:4] == 'JUMP': - self.outEdges.add(inst[1]) self.insts.append(inst) def getInstructions(self): @@ -254,12 +198,26 @@ def addOutEdge(self, block): self.outEdges.add(block) - def addNext(self, block): + def addNext(self, block, add_prev=True): self.next.append(block) assert len(self.next) == 1, map(str, self.next) + if add_prev: + block.prev.append(self) + assert len(block.prev) == 1, map(str, block.prev) - _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE', - 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP') + _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', + 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP', + ) + + def has_unconditional_transfer(self): + """Returns True if there is an unconditional transfer to an other block + at the end of this block. This means there is no risk for the bytecode + executer to go past this block's bytecode.""" + try: + op, arg = self.insts[-1] + except (IndexError, ValueError): + return + return op in self._uncond_transfer def pruneNext(self): """Remove bogus edge for unconditional transfers @@ -275,17 +233,17 @@ remove the next edge when it follows an unconditional control transfer. """ - try: - op, arg = self.insts[-1] - except (IndexError, ValueError): - return - if op in self._uncond_transfer: + if self.next and self.has_unconditional_transfer(): + self.outEdges.add(self.next[0]) + self.next[0].prev = [] self.next = [] def get_children(self): - if self.next and self.next[0] in self.outEdges: - self.outEdges.remove(self.next[0]) - return self.outEdges.elements() + self.next + return list(self.outEdges) + self.next + + def get_followers(self): + """Get the whole list of followers, including the next block.""" + return self.followers | set(self.next) def getContainedGraphs(self): """Return all graphs contained within this block. @@ -679,6 +637,11 @@ # compute deltas addr = self.codeOffset - self.lastoff line = lineno - self.lastline + abs_line = abs(line) + if line >= 0: + sign_line = lambda l: l + else: + sign_line = lambda l: 256 - l # Python assumes that lineno always increases with # increasing bytecode address (lnotab is unsigned char). # Depending on when SET_LINENO instructions are emitted @@ -689,19 +652,18 @@ # after the loading of "b". This works with the C Python # compiler because it only generates a SET_LINENO instruction # for the assignment. - if line >= 0: - push = self.lnotab.append - while addr > 255: - push(255); push(0) - addr -= 255 - while line > 255: - push(addr); push(255) - line -= 255 - addr = 0 - if addr > 0 or line > 0: - push(addr); push(line) - self.lastline = lineno - self.lastoff = self.codeOffset + push = self.lnotab.append + while addr > 255: + push(255); push(0) + addr -= 255 + while abs_line > 127: + push(addr); push(sign_line(127)) + abs_line -= 127 + addr = 0 + if addr > 0 or abs_line > 0: + push(addr); push(sign_line(abs_line)) + self.lastline = lineno + self.lastoff = self.codeOffset def getCode(self): return ''.join(self.code) diff -r 0f9dcecd9d88 Lib/compiler/pycodegen.py --- a/Lib/compiler/pycodegen.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/compiler/pycodegen.py Wed Apr 30 23:39:31 2008 +0200 @@ -410,7 +410,7 @@ def visitIf(self, node): end = self.newBlock() numtests = len(node.tests) - for i in range(numtests): + for i in range(numtests - 1): test, suite = node.tests[i] if is_constant_false(test): # XXX will need to check generator stuff here @@ -418,40 +418,54 @@ self.set_lineno(test) self.visit(test) nextTest = self.newBlock() - self.emit('JUMP_IF_FALSE', nextTest) + self.emit('JUMP_ABS_IF_FALSE', nextTest) self.nextBlock() - self.emit('POP_TOP') self.visit(suite) self.emit('JUMP_FORWARD', end) self.startBlock(nextTest) - self.emit('POP_TOP') - if node.else_: - self.visit(node.else_) + # The last test can optimize a few things + test, suite = node.tests[numtests - 1] + if is_constant_false(test): + if node.else_: + self.visit(node.else_) + else: + self.set_lineno(test) + self.visit(test) + else_block = self.newBlock() if node.else_ else end + self.emit('JUMP_ABS_IF_FALSE', else_block) + self.nextBlock() + self.visit(suite) + if node.else_: + self.emit('JUMP_FORWARD', end) + self.startBlock(else_block) + self.visit(node.else_) self.nextBlock(end) def visitWhile(self, node): self.set_lineno(node) loop = self.newBlock() + body = self.newBlock() + tail = self.newBlock() else_ = self.newBlock() + after = self.newBlock() - after = self.newBlock() self.emit('SETUP_LOOP', after) self.nextBlock(loop) self.setups.push((LOOP, loop)) + self.emit('JUMP_FORWARD', tail) + self.startBlock(body) self.set_lineno(node, force=True) + + self.visit(node.body) + + self.nextBlock(tail) self.visit(node.test) - self.emit('JUMP_IF_FALSE', else_ or after) + self.emit('JUMP_ABS_IF_TRUE', body) - self.nextBlock() - self.emit('POP_TOP') - self.visit(node.body) - self.emit('JUMP_ABSOLUTE', loop) - - self.startBlock(else_) # or just the POPs if not else clause - self.emit('POP_TOP') + self.nextBlock(else_) # or just the POPs if not else clause self.emit('POP_BLOCK') self.setups.pop() if node.else_: @@ -460,6 +474,8 @@ def visitFor(self, node): start = self.newBlock() + body = self.newBlock() + tail = self.newBlock() anchor = self.newBlock() after = self.newBlock() self.setups.push((LOOP, start)) @@ -471,10 +487,12 @@ self.nextBlock(start) self.set_lineno(node, force=1) - self.emit('FOR_ITER', anchor) + self.emit('JUMP_FORWARD', tail) + self.startBlock(body) self.visit(node.assign) self.visit(node.body) - self.emit('JUMP_ABSOLUTE', start) + self.nextBlock(tail) + self.emit('FOR_ITER', body) self.nextBlock(anchor) self.emit('POP_BLOCK') self.setups.pop() @@ -536,12 +554,10 @@ endblock = self.newBlock() elseblock = self.newBlock() self.visit(node.test) - self.emit('JUMP_IF_FALSE', elseblock) - self.emit('POP_TOP') + self.emit('JUMP_ABS_IF_FALSE', elseblock) self.visit(node.then) self.emit('JUMP_FORWARD', endblock) self.nextBlock(elseblock) - self.emit('POP_TOP') self.visit(node.else_) self.nextBlock(endblock) @@ -583,50 +599,42 @@ stack = [] for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor = self.visit(for_) + start, body, tail = self.visit(for_) cont = None for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor)) + self.visit(if_, tail) + stack.insert(0, (start, body, cont, tail)) self._implicitNameOp('LOAD', tmpname) self.visit(node.expr) self.emit('LIST_APPEND') - for start, cont, anchor in stack: - if cont: - skip_one = self.newBlock() - self.emit('JUMP_FORWARD', skip_one) - self.startBlock(cont) - self.emit('POP_TOP') - self.nextBlock(skip_one) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) + for start, body, cont, tail in stack: + self.nextBlock(tail) + self.emit('FOR_ITER', body) + self.nextBlock() self._implicitNameOp('DELETE', tmpname) self.__list_count = self.__list_count - 1 def visitListCompFor(self, node): start = self.newBlock() - anchor = self.newBlock() + body = self.newBlock() + tail = self.newBlock() self.visit(node.list) self.emit('GET_ITER') self.nextBlock(start) self.set_lineno(node, force=True) - self.emit('FOR_ITER', anchor) - self.nextBlock() + self.emit('JUMP_FORWARD', tail) + self.nextBlock(body) self.visit(node.assign) - return start, anchor + return start, body, tail def visitListCompIf(self, node, branch): self.set_lineno(node, force=True) self.visit(node.test) - self.emit('JUMP_IF_FALSE', branch) - self.newBlock() - self.emit('POP_TOP') + self.emit('JUMP_ABS_IF_FALSE', branch) def _makeClosure(self, gen, args): frees = gen.scope.get_free_vars() @@ -658,36 +666,30 @@ stack = [] for i, for_ in zip(range(len(node.quals)), node.quals): - start, anchor, end = self.visit(for_) + start, body, tail, end = self.visit(for_) cont = None for if_ in for_.ifs: - if cont is None: - cont = self.newBlock() - self.visit(if_, cont) - stack.insert(0, (start, cont, anchor, end)) + self.visit(if_, tail) + stack.insert(0, (start, body, tail, end)) self.visit(node.expr) self.emit('YIELD_VALUE') self.emit('POP_TOP') - for start, cont, anchor, end in stack: - if cont: - skip_one = self.newBlock() - self.emit('JUMP_FORWARD', skip_one) - self.startBlock(cont) - self.emit('POP_TOP') - self.nextBlock(skip_one) - self.emit('JUMP_ABSOLUTE', start) - self.startBlock(anchor) + for start, body, tail, end in stack: + self.nextBlock(tail) + self.emit('FOR_ITER', body) + self.nextBlock() self.emit('POP_BLOCK') self.setups.pop() - self.startBlock(end) + self.nextBlock(end) self.emit('LOAD_CONST', None) def visitGenExprFor(self, node): start = self.newBlock() - anchor = self.newBlock() + body = self.newBlock() + tail = self.newBlock() end = self.newBlock() self.setups.push((LOOP, start)) @@ -701,17 +703,16 @@ self.nextBlock(start) self.set_lineno(node, force=True) - self.emit('FOR_ITER', anchor) - self.nextBlock() + self.emit('JUMP_FORWARD', tail) + self.nextBlock(body) self.visit(node.assign) - return start, anchor, end + return start, body, tail, end def visitGenExprIf(self, node, branch): self.set_lineno(node, force=True) self.visit(node.test) - self.emit('JUMP_IF_FALSE', branch) + self.emit('JUMP_ABS_IF_FALSE', branch) self.newBlock() - self.emit('POP_TOP') # exception related diff -r 0f9dcecd9d88 Lib/dis.py --- a/Lib/dis.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/dis.py Wed Apr 30 23:39:31 2008 +0200 @@ -181,7 +181,8 @@ """ byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + line_increments = [c if c < 128 else c - 256 + for c in map(ord, code.co_lnotab[1::2])] lastlineno = None lineno = code.co_firstlineno diff -r 0f9dcecd9d88 Lib/opcode.py --- a/Lib/opcode.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/opcode.py Wed Apr 30 23:39:31 2008 +0200 @@ -127,7 +127,7 @@ name_op('STORE_NAME', 90) # Index in name list name_op('DELETE_NAME', 91) # "" def_op('UNPACK_SEQUENCE', 92) # Number of tuple items -jrel_op('FOR_ITER', 93) +jabs_op('FOR_ITER', 93) name_op('STORE_ATTR', 95) # Index in name list name_op('DELETE_ATTR', 96) # "" @@ -150,6 +150,8 @@ jrel_op('JUMP_IF_FALSE', 111) # "" jrel_op('JUMP_IF_TRUE', 112) # "" jabs_op('JUMP_ABSOLUTE', 113) # Target byte offset from beginning of code +jabs_op('JUMP_ABS_IF_TRUE', 114) # "" +jabs_op('JUMP_ABS_IF_FALSE', 115) # "" name_op('LOAD_GLOBAL', 116) # Index in name list diff -r 0f9dcecd9d88 Lib/test/test_dis.py --- a/Lib/test/test_dis.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/test/test_dis.py Wed Apr 30 23:39:31 2008 +0200 @@ -35,16 +35,14 @@ %-4d 9 LOAD_CONST 2 (10) 12 CALL_FUNCTION 2 15 GET_ITER - >> 16 FOR_ITER 6 (to 25) - 19 STORE_FAST 0 (res) - - %-4d 22 JUMP_ABSOLUTE 16 - >> 25 POP_BLOCK + 16 JUMP_FORWARD 3 (to 22) + >> 19 STORE_FAST 0 (res) + >> 22 FOR_ITER 19 + 25 POP_BLOCK >> 26 LOAD_CONST 0 (None) 29 RETURN_VALUE """%(bug708901.func_code.co_firstlineno + 1, - bug708901.func_code.co_firstlineno + 2, - bug708901.func_code.co_firstlineno + 3) + bug708901.func_code.co_firstlineno + 2) def bug1333982(x=[]): @@ -62,13 +60,13 @@ 14 STORE_FAST 1 (_[1]) 17 LOAD_FAST 0 (x) 20 GET_ITER - >> 21 FOR_ITER 13 (to 37) - 24 STORE_FAST 2 (s) + 21 JUMP_FORWARD 10 (to 34) + >> 24 STORE_FAST 2 (s) 27 LOAD_FAST 1 (_[1]) 30 LOAD_FAST 2 (s) 33 LIST_APPEND - 34 JUMP_ABSOLUTE 21 - >> 37 DELETE_FAST 1 (_[1]) + >> 34 FOR_ITER 24 + 37 DELETE_FAST 1 (_[1]) %-4d 40 LOAD_CONST 2 (1) 43 BINARY_ADD diff -r 0f9dcecd9d88 Lib/test/test_peepholer.py --- a/Lib/test/test_peepholer.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/test/test_peepholer.py Wed Apr 30 23:39:31 2008 +0200 @@ -19,14 +19,14 @@ class TestTranforms(unittest.TestCase): def test_unot(self): - # UNARY_NOT JUMP_IF_FALSE POP_TOP --> JUMP_IF_TRUE POP_TOP' + # UNARY_NOT JUMP_ABS_IF_FALSE --> JUMP_ABS_IF_TRUE def unot(x): if not x == 2: del x asm = disassemble(unot) - for elem in ('UNARY_NOT', 'JUMP_IF_FALSE'): + for elem in ('UNARY_NOT', 'JUMP_ABS_IF_FALSE'): self.assert_(elem not in asm) - for elem in ('JUMP_IF_TRUE', 'POP_TOP'): + for elem in ('JUMP_ABS_IF_TRUE'): self.assert_(elem in asm) def test_elim_inversion_of_is_or_in(self): diff -r 0f9dcecd9d88 Lib/test/test_trace.py --- a/Lib/test/test_trace.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/test/test_trace.py Wed Apr 30 23:39:31 2008 +0200 @@ -58,6 +58,7 @@ no_pop_tops.events = [(0, 'call'), (1, 'line'), + (2, 'line'), (2, 'line'), (3, 'line'), (6, 'line'), @@ -229,6 +230,7 @@ (-4, 'exception'), (-1, 'line'), (-1, 'return')] + + [(5, 'line')] + [(5, 'line'), (6, 'line')] * 10 + [(5, 'line'), (5, 'return')]) @@ -356,9 +358,8 @@ for_example, [(0, 'call'), (1, 'line'), - (2, 'line'), (1, 'line'), - (2, 'line'), + (1, 'line'), (1, 'line'), (1, 'return')]) @@ -372,9 +373,7 @@ [(0, 'call'), (2, 'line'), (3, 'line'), - (4, 'line'), (3, 'line'), - (4, 'line'), (3, 'line'), (3, 'return')]) diff -r 0f9dcecd9d88 Lib/trace.py --- a/Lib/trace.py Wed Apr 30 18:19:55 2008 +0200 +++ b/Lib/trace.py Wed Apr 30 23:39:31 2008 +0200 @@ -377,7 +377,8 @@ """Return dict where keys are lines in the line number table.""" linenos = {} - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + line_increments = [c if c < 128 else c - 256 + for c in map(ord, code.co_lnotab[1::2])] table_length = len(line_increments) docstring = False diff -r 0f9dcecd9d88 Objects/codeobject.c --- a/Objects/codeobject.c Wed Apr 30 18:19:55 2008 +0200 +++ b/Objects/codeobject.c Wed Apr 30 23:39:31 2008 +0200 @@ -629,7 +629,7 @@ addr += *p++; if (*p) bounds->ap_lower = addr; - line += *p++; + line += * (signed char *) p++; --size; } diff -r 0f9dcecd9d88 Python/ceval.c --- a/Python/ceval.c Wed Apr 30 18:19:55 2008 +0200 +++ b/Python/ceval.c Wed Apr 30 23:39:31 2008 +0200 @@ -631,8 +631,11 @@ #ifdef DYNAMIC_EXECUTION_PROFILE #define PREDICT(op) if (0) goto PRED_##op +#define PREDICT_WITH_SIGNALS(op) if (0) goto PRED_##op #else #define PREDICT(op) if (*next_instr == op) goto PRED_##op +#define PREDICT_WITH_SIGNALS(op) \ + if (*next_instr == op && --_Py_Ticker > 0) goto PRED_##op #endif #define PREDICTED(op) PRED_##op: next_instr++ @@ -754,8 +757,8 @@ prediction effectively links the two codes together as if they were a single new opcode; accordingly,f->f_lasti will point to the first code in the pair (for instance, GET_ITER followed by - FOR_ITER is effectively a single opcode and f->f_lasti will point - at to the beginning of the combined pair.) + JUMP_FORWARD is effectively a single opcode and f->f_lasti will + point at to the beginning of the combined pair.) */ next_instr = first_instr + f->f_lasti + 1; stack_pointer = f->f_stacktop; @@ -1268,7 +1271,7 @@ Py_DECREF(v); Py_DECREF(w); if (err == 0) { - PREDICT(JUMP_ABSOLUTE); + PREDICT(FOR_ITER); continue; } break; @@ -2055,6 +2058,7 @@ Py_DECREF(w); SET_TOP(x); if (x == NULL) break; + PREDICT(JUMP_ABS_IF_FALSE); PREDICT(JUMP_IF_FALSE); PREDICT(JUMP_IF_TRUE); continue; @@ -2129,6 +2133,7 @@ if (x != NULL) continue; break; + PREDICTED_WITH_ARG(JUMP_FORWARD); case JUMP_FORWARD: JUMPBY(oparg); goto fast_next_opcode; @@ -2175,7 +2180,66 @@ break; continue; - PREDICTED_WITH_ARG(JUMP_ABSOLUTE); + PREDICTED_WITH_ARG(JUMP_ABS_IF_FALSE); + case JUMP_ABS_IF_FALSE: + w = POP(); + if (w == Py_True) { + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_False) { + Py_DECREF(w); + JUMPTO(oparg); +#if FAST_LOOPS + goto fast_next_opcode; +#else + continue; +#endif + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) + err = 0; + else if (err == 0) { + JUMPTO(oparg); +#if FAST_LOOPS + goto fast_next_opcode; +#endif + } + else + break; + continue; + + case JUMP_ABS_IF_TRUE: + w = POP(); + if (w == Py_False) { + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_True) { + Py_DECREF(w); + JUMPTO(oparg); +#if FAST_LOOPS + goto fast_next_opcode; +#else + continue; +#endif + } + err = PyObject_IsTrue(w); + Py_DECREF(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); +#if FAST_LOOPS + goto fast_next_opcode; +#endif + } + else if (err == 0) + ; + else + break; + continue; + case JUMP_ABSOLUTE: JUMPTO(oparg); #if FAST_LOOPS @@ -2198,7 +2262,7 @@ Py_DECREF(v); if (x != NULL) { SET_TOP(x); - PREDICT(FOR_ITER); + PREDICT(JUMP_FORWARD); continue; } STACKADJ(-1); @@ -2211,7 +2275,10 @@ x = (*v->ob_type->tp_iternext)(v); if (x != NULL) { PUSH(x); - PREDICT(STORE_FAST); + JUMPTO(oparg); + /* STORE_FAST is a fast_next_opcode so we must be careful + about not blocking signals in an empty loop. */ + PREDICT_WITH_SIGNALS(STORE_FAST); PREDICT(UNPACK_SEQUENCE); continue; } @@ -2222,9 +2289,8 @@ PyErr_Clear(); } /* iterator ended normally */ - x = v = POP(); - Py_DECREF(v); - JUMPBY(oparg); + x = v = POP(); + Py_DECREF(v); continue; case BREAK_LOOP: diff -r 0f9dcecd9d88 Python/compile.c --- a/Python/compile.c Wed Apr 30 18:19:55 2008 +0200 +++ b/Python/compile.c Wed Apr 30 23:39:31 2008 +0200 @@ -830,6 +830,9 @@ case JUMP_IF_TRUE: case JUMP_ABSOLUTE: return 0; + case JUMP_ABS_IF_TRUE: + case JUMP_ABS_IF_FALSE: + return -1; case LOAD_GLOBAL: return 1; @@ -1489,12 +1492,10 @@ if (next == NULL) return 0; VISIT(c, expr, e->v.IfExp.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, JUMP_ABS_IF_FALSE, next); VISIT(c, expr, e->v.IfExp.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); VISIT(c, expr, e->v.IfExp.orelse); compiler_use_next_block(c, end); return 1; @@ -1582,29 +1583,32 @@ end = compiler_new_block(c); if (end == NULL) return 0; - next = compiler_new_block(c); - if (next == NULL) - return 0; - + constant = expr_constant(s->v.If.test); /* constant = 0: "if 0" * constant = 1: "if 1", "if 2", ... * constant = -1: rest */ if (constant == 0) { - if (s->v.If.orelse) + if (asdl_seq_LEN(s->v.If.orelse)) VISIT_SEQ(c, stmt, s->v.If.orelse); } else if (constant == 1) { VISIT_SEQ(c, stmt, s->v.If.body); } else { VISIT(c, expr, s->v.If.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + if (asdl_seq_LEN(s->v.If.orelse)) { + next = compiler_new_block(c); + if (next == NULL) + return 0; + } + else + next = end; + ADDOP_JABS(c, JUMP_ABS_IF_FALSE, next); VISIT_SEQ(c, stmt, s->v.If.body); - ADDOP_JREL(c, JUMP_FORWARD, end); - compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); - if (s->v.If.orelse) + if (asdl_seq_LEN(s->v.If.orelse)) { + ADDOP_JREL(c, JUMP_FORWARD, end); + compiler_use_next_block(c, next); VISIT_SEQ(c, stmt, s->v.If.orelse); + } } compiler_use_next_block(c, end); return 1; @@ -1613,26 +1617,34 @@ static int compiler_for(struct compiler *c, stmt_ty s) { - basicblock *start, *cleanup, *end; + basicblock *start, *body, *tail, *cleanup, *end; + int for_lineno; start = compiler_new_block(c); + body = compiler_new_block(c); + tail = compiler_new_block(c); cleanup = compiler_new_block(c); end = compiler_new_block(c); - if (start == NULL || end == NULL || cleanup == NULL) + if (start == NULL || body == NULL || tail == NULL + || end == NULL || cleanup == NULL) return 0; ADDOP_JREL(c, SETUP_LOOP, end); if (!compiler_push_fblock(c, LOOP, start)) return 0; VISIT(c, expr, s->v.For.iter); + for_lineno = c->u->u_lineno; ADDOP(c, GET_ITER); compiler_use_next_block(c, start); - /* for expressions must be traced on each iteration, - so we need to set an extra line number. */ - c->u->u_lineno_set = false; - ADDOP_JREL(c, FOR_ITER, cleanup); + ADDOP_JREL(c, JUMP_FORWARD, tail); + compiler_use_next_block(c, body); VISIT(c, expr, s->v.For.target); VISIT_SEQ(c, stmt, s->v.For.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + compiler_use_next_block(c, tail); + /* for expressions must be traced on each iteration, + so we need to set an extra line number. */ + c->u->u_lineno_set = false; + c->u->u_lineno = for_lineno; + ADDOP_JABS(c, FOR_ITER, body); compiler_use_next_block(c, cleanup); ADDOP(c, POP_BLOCK); compiler_pop_fblock(c, LOOP, start); @@ -1644,7 +1656,9 @@ static int compiler_while(struct compiler *c, stmt_ty s) { - basicblock *loop, *orelse, *end, *anchor = NULL; + basicblock *loop, *orelse, *end; + /* These are unused if the condition is optimized into a constant */ + basicblock *body = NULL, *tail = NULL, *anchor = NULL; int constant = expr_constant(s->v.While.test); if (constant == 0) { @@ -1655,13 +1669,15 @@ loop = compiler_new_block(c); end = compiler_new_block(c); if (constant == -1) { + body = compiler_new_block(c); + tail = compiler_new_block(c); anchor = compiler_new_block(c); - if (anchor == NULL) + if (body == NULL || tail == NULL || anchor == NULL) return 0; } if (loop == NULL || end == NULL) return 0; - if (s->v.While.orelse) { + if (asdl_seq_LEN(s->v.While.orelse)) { orelse = compiler_new_block(c); if (orelse == NULL) return 0; @@ -1677,12 +1693,18 @@ /* while expressions must be traced on each iteration, so we need to set an extra line number. */ c->u->u_lineno_set = false; - VISIT(c, expr, s->v.While.test); - ADDOP_JREL(c, JUMP_IF_FALSE, anchor); - ADDOP(c, POP_TOP); + ADDOP_JREL(c, JUMP_FORWARD, tail); + compiler_use_next_block(c, body); } VISIT_SEQ(c, stmt, s->v.While.body); - ADDOP_JABS(c, JUMP_ABSOLUTE, loop); + if (constant == -1) { + compiler_use_next_block(c, tail); + VISIT(c, expr, s->v.While.test); + ADDOP_JABS(c, JUMP_ABS_IF_TRUE, body); + } + else { + ADDOP_JABS(c, JUMP_ABSOLUTE, loop); + } /* XXX should the two POP instructions be in a separate block if there is no else clause ? @@ -1690,7 +1712,6 @@ if (constant == -1) { compiler_use_next_block(c, anchor); - ADDOP(c, POP_TOP); ADDOP(c, POP_BLOCK); } compiler_pop_fblock(c, LOOP, loop); @@ -2603,24 +2624,23 @@ and then write to the element */ comprehension_ty l; - basicblock *start, *anchor, *skip, *if_cleanup; + basicblock *start, *body, *tail, *anchor; int i, n; start = compiler_new_block(c); - skip = compiler_new_block(c); - if_cleanup = compiler_new_block(c); + body = compiler_new_block(c); + tail = compiler_new_block(c); anchor = compiler_new_block(c); - if (start == NULL || skip == NULL || if_cleanup == NULL || - anchor == NULL) + if (start == NULL || body == NULL || tail == NULL || anchor == NULL) return 0; l = (comprehension_ty)asdl_seq_GET(generators, gen_index); VISIT(c, expr, l->iter); ADDOP(c, GET_ITER); compiler_use_next_block(c, start); - ADDOP_JREL(c, FOR_ITER, anchor); - NEXT_BLOCK(c); + ADDOP_JREL(c, JUMP_FORWARD, tail); + compiler_use_next_block(c, body); VISIT(c, expr, l->target); /* XXX this needs to be cleaned up...a lot! */ @@ -2628,10 +2648,8 @@ for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(l->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); - NEXT_BLOCK(c); - ADDOP(c, POP_TOP); - } + ADDOP_JABS(c, JUMP_ABS_IF_FALSE, tail); + } if (++gen_index < asdl_seq_LEN(generators)) if (!compiler_listcomp_generator(c, tmpname, @@ -2644,16 +2662,9 @@ return 0; VISIT(c, expr, elt); ADDOP(c, LIST_APPEND); - - compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - ADDOP(c, POP_TOP); - } - ADDOP_JABS(c, JUMP_ABSOLUTE, start); + compiler_use_next_block(c, tail); + ADDOP_JABS(c, FOR_ITER, body); compiler_use_next_block(c, anchor); /* delete the temporary list name added to locals */ if (gen_index == 1) @@ -2692,17 +2703,15 @@ and then write to the element */ comprehension_ty ge; - basicblock *start, *anchor, *skip, *if_cleanup, *end; + basicblock *start, *body, *tail, *end; int i, n; start = compiler_new_block(c); - skip = compiler_new_block(c); - if_cleanup = compiler_new_block(c); - anchor = compiler_new_block(c); + body = compiler_new_block(c); + tail = compiler_new_block(c); end = compiler_new_block(c); - if (start == NULL || skip == NULL || if_cleanup == NULL || - anchor == NULL || end == NULL) + if (start == NULL || body == NULL || tail == NULL || end == NULL) return 0; ge = (comprehension_ty)asdl_seq_GET(generators, gen_index); @@ -2721,8 +2730,8 @@ ADDOP(c, GET_ITER); } compiler_use_next_block(c, start); - ADDOP_JREL(c, FOR_ITER, anchor); - NEXT_BLOCK(c); + ADDOP_JREL(c, JUMP_FORWARD, tail); + compiler_use_next_block(c, body); VISIT(c, expr, ge->target); /* XXX this needs to be cleaned up...a lot! */ @@ -2730,10 +2739,9 @@ for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(ge->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, JUMP_ABS_IF_FALSE, tail); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); - } + } if (++gen_index < asdl_seq_LEN(generators)) if (!compiler_genexp_generator(c, generators, gen_index, elt)) @@ -2744,18 +2752,9 @@ VISIT(c, expr, elt); ADDOP(c, YIELD_VALUE); ADDOP(c, POP_TOP); - - compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - - ADDOP(c, POP_TOP); - } - ADDOP_JABS(c, JUMP_ABSOLUTE, start); - compiler_use_next_block(c, anchor); + compiler_use_next_block(c, tail); + ADDOP_JABS(c, FOR_ITER, body); ADDOP(c, POP_BLOCK); compiler_pop_fblock(c, LOOP, start); compiler_use_next_block(c, end); @@ -3521,12 +3520,11 @@ /* All about a_lnotab. -c_lnotab is an array of unsigned bytes disguised as a Python string. -It is used to map bytecode offsets to source code line #s (when needed -for tracebacks). +c_lnotab is an array of bytes disguised as a Python string. It is used +to map bytecode offsets to source code line #s (when needed for tracebacks). The array is conceptually a list of - (bytecode offset increment, line number increment) + (unsigned bytecode offset increment, signed line number increment) pairs. The details are important and delicate, best illustrated by example: byte code offset source code line number @@ -3541,14 +3539,17 @@ 0, 1, 6, 1, 44, 5, 300, 300, 11, 1 -The second trick is that an unsigned byte can't hold negative values, or -values larger than 255, so (a) there's a deep assumption that byte code -offsets and their corresponding line #s both increase monotonically, and (b) -if at least one column jumps by more than 255 from one row to the next, more -than one pair is written to the table. In case #b, there's no way to know -from looking at the table later how many were written. That's the delicate -part. A user of c_lnotab desiring to find the source line number -corresponding to a bytecode address A should do something like this +The second trick is that while bytecode offset increments are unsigned +bytes (byte code offsets are assumed to increase monotonically), line +number increments can be negative as well as positive (this is necessary +to support moving evaluation of a loop condition at its end, saving an +unconditional jump). Thus, if either the bytecode increment jumps by more +than 255, or the line number increment jumps by more than 127 or less than +-127, more than one pair is written to the table. In this case there's no +way to know from looking at the table later how many were written. +That's the delicate part. A user of c_lnotab desiring to find the source +line number corresponding to a bytecode address A should do something like +this: lineno = addr = 0 for addr_incr, line_incr in c_lnotab: @@ -3561,28 +3562,33 @@ the line # increment in each pair generated must be 0 until the remaining addr increment is < 256. So, in the example above, assemble_lnotab (it used to be called com_set_lineno) should not (as was actually done until 2.2) -expand 300, 300 to 255, 255, 45, 45, - but to 255, 0, 45, 255, 0, 45. +expand 300, 300 to 255, 255, 45, 45, + but to 255, 0, 45, 127, 0, 127, 0, 46. */ + +#define MAX_LINENO 127 +#define MAX_BYTECODE 255 static int assemble_lnotab(struct assembler *a, struct instr *i) { - int d_bytecode, d_lineno; + int d_bytecode, d_lineno, d_abs_lineno, d_lineno_sign; int len; unsigned char *lnotab; d_bytecode = a->a_offset - a->a_lineno_off; d_lineno = i->i_lineno - a->a_lineno; + d_abs_lineno = abs(d_lineno); + d_lineno_sign = (d_lineno >= 0) ? 1 : -1; assert(d_bytecode >= 0); - assert(d_lineno >= 0); + assert(d_abs_lineno >= 0); if(d_bytecode == 0 && d_lineno == 0) return 1; - if (d_bytecode > 255) { - int j, nbytes, ncodes = d_bytecode / 255; + if (d_bytecode > MAX_BYTECODE) { + int j, nbytes, ncodes = d_bytecode / MAX_BYTECODE; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3596,15 +3602,15 @@ lnotab = (unsigned char *) PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; for (j = 0; j < ncodes; j++) { - *lnotab++ = 255; + *lnotab++ = MAX_BYTECODE; *lnotab++ = 0; } - d_bytecode -= ncodes * 255; + d_bytecode -= ncodes * MAX_BYTECODE; a->a_lnotab_off += ncodes * 2; } - assert(d_bytecode <= 255); - if (d_lineno > 255) { - int j, nbytes, ncodes = d_lineno / 255; + assert(d_bytecode <= MAX_BYTECODE); + if (d_abs_lineno > MAX_LINENO) { + int j, nbytes, ncodes = d_abs_lineno / MAX_LINENO; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyString_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -3618,13 +3624,13 @@ lnotab = (unsigned char *) PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; *lnotab++ = d_bytecode; - *lnotab++ = 255; + *lnotab++ = MAX_LINENO * d_lineno_sign; d_bytecode = 0; for (j = 1; j < ncodes; j++) { *lnotab++ = 0; - *lnotab++ = 255; + *lnotab++ = MAX_LINENO * d_lineno_sign; } - d_lineno -= ncodes * 255; + d_abs_lineno -= ncodes * MAX_LINENO; a->a_lnotab_off += ncodes * 2; } @@ -3639,11 +3645,11 @@ a->a_lnotab_off += 2; if (d_bytecode) { *lnotab++ = d_bytecode; - *lnotab++ = d_lineno; + *lnotab++ = d_abs_lineno * d_lineno_sign; } else { /* First line of a block; def stmt, etc. */ *lnotab++ = 0; - *lnotab++ = d_lineno; + *lnotab++ = d_abs_lineno * d_lineno_sign; } a->a_lineno = i->i_lineno; a->a_lineno_off = a->a_offset; diff -r 0f9dcecd9d88 Python/import.c --- a/Python/import.c Wed Apr 30 18:19:55 2008 +0200 +++ b/Python/import.c Wed Apr 30 23:39:31 2008 +0200 @@ -73,9 +73,12 @@ Python 2.5c2: 62131 (fix wrong code: for x, in ... in listcomp/genexp) Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) Python 2.6a1: 62161 (WITH_CLEANUP optimization) + Python 2.6a2: 62171 (FOR_ITER optimization, + introduction of JUMP_ABS_IF_TRUE, JUMP_ABS_IF_FALSE, + lnotab format allowing negative line increments) . */ -#define MAGIC (62161 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62171 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the diff -r 0f9dcecd9d88 Python/peephole.c --- a/Python/peephole.c Wed Apr 30 18:19:55 2008 +0200 +++ b/Python/peephole.c Wed Apr 30 23:39:31 2008 +0200 @@ -13,7 +13,8 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP || \ + op==FOR_ITER || op==JUMP_ABS_IF_TRUE || op==JUMP_ABS_IF_FALSE) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -248,6 +249,8 @@ case JUMP_IF_FALSE: case JUMP_IF_TRUE: case JUMP_ABSOLUTE: + case JUMP_ABS_IF_TRUE: + case JUMP_ABS_IF_FALSE: case CONTINUE_LOOP: case SETUP_LOOP: case SETUP_EXCEPT: @@ -345,21 +348,31 @@ switch (opcode) { - /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with - with JUMP_IF_TRUE POP_TOP */ + /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP + with JUMP_IF_TRUE POP_TOP + and UNARY_NOT JUMP_ABS_IF_FALSE + with JUMP_ABS_IF_TRUE + */ case UNARY_NOT: - if (codestr[i+1] != JUMP_IF_FALSE || - codestr[i+4] != POP_TOP || - !ISBASICBLOCK(blocks,i,5)) - continue; - tgt = GETJUMPTGT(codestr, (i+1)); - if (codestr[tgt] != POP_TOP) - continue; - j = GETARG(codestr, i+1) + 1; - codestr[i] = JUMP_IF_TRUE; - SETARG(codestr, i, j); - codestr[i+3] = POP_TOP; - codestr[i+4] = NOP; + if (codestr[i+1] == JUMP_IF_FALSE && + codestr[i+4] == POP_TOP && + ISBASICBLOCK(blocks,i,5)) { + tgt = GETJUMPTGT(codestr, (i+1)); + if (codestr[tgt] != POP_TOP) + continue; + j = GETARG(codestr, i+1) + 1; + codestr[i] = JUMP_IF_TRUE; + SETARG(codestr, i, j); + codestr[i+3] = POP_TOP; + codestr[i+4] = NOP; + } + else if (codestr[i+1] == JUMP_ABS_IF_FALSE && + ISBASICBLOCK(blocks,i,4)) { + j = GETARG(codestr, i+1); + codestr[i] = JUMP_ABS_IF_TRUE; + SETARG(codestr, i, j); + codestr[i+3] = NOP; + } break; /* not a is b --> a is not b @@ -590,12 +603,14 @@ continue; case JUMP_ABSOLUTE: + case JUMP_ABS_IF_TRUE: + case JUMP_ABS_IF_FALSE: case CONTINUE_LOOP: + case FOR_ITER: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; - case FOR_ITER: case JUMP_FORWARD: case JUMP_IF_FALSE: case JUMP_IF_TRUE: