diff -r 336137a359ae Python/compile.c --- a/Python/compile.c Fri Mar 11 10:27:14 2011 -0500 +++ b/Python/compile.c Mon Mar 14 23:22:44 2011 -0400 @@ -287,6 +287,9 @@ c.c_optimize = (optimize == -1) ? Py_OptimizeFlag : optimize; c.c_nestlevel = 0; + if (!_PyAST_Optimize(mod, arena)) + goto finally; + c.c_st = PySymtable_Build(mod, filename, c.c_future); if (c.c_st == NULL) { if (!PyErr_Occurred()) @@ -996,15 +999,19 @@ static int compiler_addop_i(struct compiler *c, int opcode, int oparg) { + basicblock *b; struct instr *i; int off; off = compiler_next_instr(c, c->u->u_curblock); if (off < 0) return 0; - i = &c->u->u_curblock->b_instr[off]; + b = c->u->u_curblock; + i = &b->b_instr[off]; i->i_opcode = opcode; i->i_oparg = oparg; i->i_hasarg = 1; + if (opcode == RAISE_VARARGS) + b->b_return = 1; compiler_set_lineno(c, off); return 1; } @@ -1132,33 +1139,23 @@ } \ } +/* Compile a sequence of statements, setting a docstring. */ + static int -compiler_isdocstring(stmt_ty s) +compiler_body(struct compiler *c, asdl_seq *stmts, string docstring) { - if (s->kind != Expr_kind) - return 0; - return s->v.Expr.value->kind == Str_kind; -} - -/* Compile a sequence of statements, checking for a docstring. */ - -static int -compiler_body(struct compiler *c, asdl_seq *stmts) -{ - int i = 0; + int i; stmt_ty st; + /* if not -OO mode, set docstring */ + if (c->c_optimize < 2 && docstring) { + ADDOP_O(c, LOAD_CONST, docstring, consts); + ADDOP_NAME(c, STORE_NAME, __doc__, names); + } if (!asdl_seq_LEN(stmts)) return 1; st = (stmt_ty)asdl_seq_GET(stmts, 0); - if (compiler_isdocstring(st) && c->c_optimize < 2) { - /* don't generate docstrings if -OO */ - i = 1; - VISIT(c, expr, st->v.Expr.value); - if (!compiler_nameop(c, __doc__, Store)) - return 0; - } - for (; i < asdl_seq_LEN(stmts); i++) + for (i = 0; i < asdl_seq_LEN(stmts); i++) VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i)); return 1; } @@ -1179,7 +1176,7 @@ return NULL; switch (mod->kind) { case Module_kind: - if (!compiler_body(c, mod->v.Module.body)) { + if (!compiler_body(c, mod->v.Module.body, mod->v.Module.docstring)) { compiler_exit_scope(c); return 0; } @@ -1433,12 +1430,12 @@ compiler_function(struct compiler *c, stmt_ty s) { PyCodeObject *co; - PyObject *first_const = Py_None; + PyObject *docstring = Py_None; arguments_ty args = s->v.FunctionDef.args; expr_ty returns = s->v.FunctionDef.returns; asdl_seq* decos = s->v.FunctionDef.decorator_list; stmt_ty st; - int i, n, docstring, kw_default_count = 0, arglength; + int i, n, kw_default_count = 0, arglength; int num_annotations; assert(s->kind == FunctionDef_kind); @@ -1464,10 +1461,10 @@ return 0; st = (stmt_ty)asdl_seq_GET(s->v.FunctionDef.body, 0); - docstring = compiler_isdocstring(st); - if (docstring && c->c_optimize < 2) - first_const = st->v.Expr.value->v.Str.s; - if (compiler_add_o(c, c->u->u_consts, first_const) < 0) { + /* if not -OO mode, add docstring */ + if (c->c_optimize < 2 && s->v.FunctionDef.docstring) + docstring = s->v.FunctionDef.docstring; + if (compiler_add_o(c, c->u->u_consts, docstring) < 0) { compiler_exit_scope(c); return 0; } @@ -1475,8 +1472,7 @@ c->u->u_argcount = asdl_seq_LEN(args->args); c->u->u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs); n = asdl_seq_LEN(s->v.FunctionDef.body); - /* if there was a docstring, we need to skip the first statement */ - for (i = docstring; i < n; i++) { + for (i = 0; i < n; i++) { st = (stmt_ty)asdl_seq_GET(s->v.FunctionDef.body, i); VISIT_IN_SCOPE(c, stmt, st); } @@ -1554,7 +1550,7 @@ } Py_DECREF(str); /* compile the body proper */ - if (!compiler_body(c, s->v.ClassDef.body)) { + if (!compiler_body(c, s->v.ClassDef.body, s->v.ClassDef.docstring)) { compiler_exit_scope(c); return 0; } @@ -1614,20 +1610,104 @@ return 1; } +/* Last jump needs special handling, since we're going to fall through + into 'next' block. If that's where we were going to jump, reverse + condition and jump to alternative instead. */ +static int +compiler_cond_jump(struct compiler *c, int if_true, + basicblock *target, basicblock *alt_target, + basicblock *next) +{ + int opcode; + + if (target == next) { + if_true = !if_true; + target = alt_target; + } + opcode = if_true ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE; + ADDOP_JABS(c, opcode, target); + return 1; +} + +static int +compiler_condition(struct compiler *c, expr_ty cond, + basicblock *if_true, basicblock *if_false, + basicblock *next) +{ + int i, op, len; + asdl_seq *vals; + basicblock *alt; + + /* Optimize logical operations inside condition. */ + switch (cond->kind) { + case UnaryOp_kind: + if (cond->v.UnaryOp.op == Not) + return compiler_condition(c, cond->v.UnaryOp.operand, + if_false, if_true, next); + break; + + case BoolOp_kind: + alt = NULL; + op = cond->v.BoolOp.op == And; + vals = cond->v.BoolOp.values; + len = asdl_seq_LEN(vals); + for (i = 0; i < len; i++) { + expr_ty e; + basicblock *t, *f; + + if (alt) + compiler_use_next_block(c, alt); + /* Make new 'next' block, unless we're at the last expr. */ + if (i < len-1) { + alt = compiler_new_block(c); + if (alt == NULL) + return 0; + } + else alt = next; + /* AND: if true try next expr, if false go to parent's false + OR: if false try next expr, if true go to parent's true. */ + if (op) { + t = (alt == next) ? if_true : alt; + f = if_false; + } + else { + t = if_true; + f = (alt == next) ? if_false : alt; + } + e = (expr_ty)asdl_seq_GET(vals, i); + if (!compiler_condition(c, e, t, f, alt)) + return 0; + } + return 1; + + default: + break; + } + + /* Fall back to simple handling. */ + VISIT(c, expr, cond); + return compiler_cond_jump(c, 0, if_false, if_true, next); +} + + static int compiler_ifexp(struct compiler *c, expr_ty e) { - basicblock *end, *next; + basicblock *end, *then, *next; assert(e->kind == IfExp_kind); end = compiler_new_block(c); if (end == NULL) return 0; + then = compiler_new_block(c); + if (then == NULL) + return 0; next = compiler_new_block(c); if (next == NULL) return 0; - VISIT(c, expr, e->v.IfExp.test); - ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); + if (!compiler_condition(c, e->v.IfExp.test, then, next, then)) + return 0; + compiler_use_next_block(c, then); VISIT(c, expr, e->v.IfExp.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); @@ -1692,7 +1772,7 @@ static int compiler_if(struct compiler *c, stmt_ty s) { - basicblock *end, *next; + basicblock *then, *end, *next; int constant; assert(s->kind == If_kind); end = compiler_new_block(c); @@ -1709,6 +1789,9 @@ } else if (constant == 1) { VISIT_SEQ(c, stmt, s->v.If.body); } else { + then = compiler_new_block(c); + if (then == NULL) + return 0; if (s->v.If.orelse) { next = compiler_new_block(c); if (next == NULL) @@ -1716,11 +1799,12 @@ } else next = end; - VISIT(c, expr, s->v.If.test); - ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); + if (!compiler_condition(c, s->v.If.test, then, next, then)) + return 0; + compiler_use_next_block(c, then); VISIT_SEQ(c, stmt, s->v.If.body); - ADDOP_JREL(c, JUMP_FORWARD, end); if (s->v.If.orelse) { + ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); VISIT_SEQ(c, stmt, s->v.If.orelse); } @@ -1760,7 +1844,7 @@ static int compiler_while(struct compiler *c, stmt_ty s) { - basicblock *loop, *orelse, *end, *anchor = NULL; + basicblock *loop, *orelse, *end, *body = NULL, *anchor = NULL; int constant = expr_constant(c, s->v.While.test); if (constant == 0) { @@ -1771,8 +1855,9 @@ loop = compiler_new_block(c); end = compiler_new_block(c); if (constant == -1) { + body = compiler_new_block(c); anchor = compiler_new_block(c); - if (anchor == NULL) + if (body == NULL || anchor == NULL) return 0; } if (loop == NULL || end == NULL) @@ -1790,8 +1875,9 @@ if (!compiler_push_fblock(c, LOOP, loop)) return 0; if (constant == -1) { - VISIT(c, expr, s->v.While.test); - ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor); + if (!compiler_condition(c, s->v.While.test, body, anchor, body)) + return 0; + compiler_use_next_block(c, body); } VISIT_SEQ(c, stmt, s->v.While.body); ADDOP_JABS(c, JUMP_ABSOLUTE, loop); @@ -2319,8 +2405,7 @@ VISIT(c, expr, s->v.Expr.value); ADDOP(c, PRINT_EXPR); } - else if (s->v.Expr.value->kind != Str_kind && - s->v.Expr.value->kind != Num_kind) { + else if (s->v.Expr.value->kind != Lit_kind) { VISIT(c, expr, s->v.Expr.value); ADDOP(c, POP_TOP); } @@ -2604,6 +2689,41 @@ return 1; } +#define LAST_INSTR(C) \ + ((C)->u->u_curblock->b_iused > 0 ? \ + &(C)->u->u_curblock->b_instr[(C)->u->u_curblock->b_iused-1] : \ + NULL) + +static int +compiler_skip_unpack(struct compiler *c, int n) +{ + /* Skip over BUILD_SEQN 1 UNPACK_SEQN 1. + Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2. + Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */ + struct instr *in; + + if (n < 1 || n > 3 || + (in = LAST_INSTR(c)) == NULL) + return 0; + + /* NOTE: do NOT do this for SET, set deletes duplicate + values, while replacement code doesn't. */ + if ((in->i_opcode != BUILD_TUPLE && + in->i_opcode != BUILD_LIST) || + in->i_oparg != n) + return 0; + + if (n == 1) + c->u->u_curblock->b_iused -= 1; + else { + in->i_opcode = n == 2 ? ROT_TWO : ROT_THREE; + in->i_hasarg = 0; + if (n == 3) + ADDOP(c, ROT_TWO); + } + return 1; +} + static int compiler_list(struct compiler *c, expr_ty e) { @@ -2627,7 +2747,8 @@ } } if (!seen_star) { - ADDOP_I(c, UNPACK_SEQUENCE, n); + if (!compiler_skip_unpack(c, n)) + ADDOP_I(c, UNPACK_SEQUENCE, n); } } VISIT_SEQ(c, expr, e->v.List.elts); @@ -2660,7 +2781,8 @@ } } if (!seen_star) { - ADDOP_I(c, UNPACK_SEQUENCE, n); + if (!compiler_skip_unpack(c, n)) + ADDOP_I(c, UNPACK_SEQUENCE, n); } } VISIT_SEQ(c, expr, e->v.Tuple.elts); @@ -2825,10 +2947,16 @@ /* XXX this needs to be cleaned up...a lot! */ n = asdl_seq_LEN(gen->ifs); for (i = 0; i < n; i++) { - expr_ty e = (expr_ty)asdl_seq_GET(gen->ifs, i); - VISIT(c, expr, e); - ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); - NEXT_BLOCK(c); + expr_ty e; + basicblock *next; + + next = compiler_new_block(c); + if (next == NULL) + return 0; + e = (expr_ty)asdl_seq_GET(gen->ifs, i); + if (!compiler_condition(c, e, next, if_cleanup, next)) + return 0; + compiler_use_next_block(c, next); } if (++gen_index < asdl_seq_LEN(generators)) @@ -3015,27 +3143,7 @@ static int expr_constant(struct compiler *c, expr_ty e) { - char *id; - switch (e->kind) { - case Ellipsis_kind: - return 1; - case Num_kind: - return PyObject_IsTrue(e->v.Num.n); - case Str_kind: - return PyObject_IsTrue(e->v.Str.s); - case Name_kind: - /* optimize away names that can't be reassigned */ - id = PyBytes_AS_STRING( - _PyUnicode_AsDefaultEncodedString(e->v.Name.id)); - if (strcmp(id, "True") == 0) return 1; - if (strcmp(id, "False") == 0) return 0; - if (strcmp(id, "None") == 0) return 0; - if (strcmp(id, "__debug__") == 0) - return ! c->c_optimize; - /* fall through */ - default: - return -1; - } + return e->kind == Lit_kind ? PyObject_IsTrue(e->v.Lit.v) : -1; } /* @@ -3183,17 +3291,8 @@ return compiler_compare(c, e); case Call_kind: return compiler_call(c, e); - case Num_kind: - ADDOP_O(c, LOAD_CONST, e->v.Num.n, consts); - break; - case Str_kind: - ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts); - break; - case Bytes_kind: - ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts); - break; - case Ellipsis_kind: - ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts); + case Lit_kind: + ADDOP_O(c, LOAD_CONST, e->v.Lit.v, consts); break; /* The following exprs can be assignment targets. */ case Attribute_kind: @@ -3661,6 +3760,62 @@ return size; } +static void +optimize_jumps(struct compiler *c) +{ + basicblock *b = c->u->u_blocks; + while (b) { + int i; + for (i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + /* Turn unconditional jumps to RETURN into RETURNs. */ + if ((instr->i_opcode == JUMP_FORWARD || + instr->i_opcode == JUMP_ABSOLUTE) && + instr->i_target->b_iused > 0 && + instr->i_target->b_instr[0].i_opcode == RETURN_VALUE) { + instr->i_opcode = RETURN_VALUE; + instr->i_target = 0; + instr->i_hasarg = instr->i_jabs = instr->i_jrel = 0; + b->b_return = 1; + } + /* Retarget jumps to unconditional jumps. */ + if (instr->i_opcode == JUMP_FORWARD || + instr->i_opcode == JUMP_ABSOLUTE || + instr->i_opcode == POP_JUMP_IF_FALSE || + instr->i_opcode == POP_JUMP_IF_TRUE || + instr->i_opcode == JUMP_IF_FALSE_OR_POP || + instr->i_opcode == JUMP_IF_TRUE_OR_POP) { + for (;;) { + struct instr *target; + if (instr->i_target->b_iused == 0) + break; + target = &instr->i_target->b_instr[0]; + if (instr == target || + (target->i_opcode != JUMP_FORWARD && + target->i_opcode != JUMP_ABSOLUTE)) + break; + instr->i_target = target->i_target; + /* Change relative jump to absolute. */ + if (instr->i_opcode == JUMP_FORWARD) { + instr->i_opcode = JUMP_ABSOLUTE; + instr->i_jabs = 1; + instr->i_jrel = 0; + } + } + } + /* Code after unconditional jumps is dead. */ + if (instr->i_opcode == JUMP_FORWARD || + instr->i_opcode == JUMP_ABSOLUTE || + instr->i_opcode == RETURN_VALUE || + instr->i_opcode == RAISE_VARARGS) { + b->b_iused = i + 1; + break; + } + } + b = b->b_list; + } +} + /* Appends a pair to the end of the line number table, a_lnotab, representing the instruction's bytecode offset and line number. See Objects/lnotab_notes.txt for the description of the line number table. */ @@ -3931,13 +4086,12 @@ PyObject *name = NULL; PyObject *freevars = NULL; PyObject *cellvars = NULL; - PyObject *bytecode = NULL; int nlocals, flags; tmp = dict_keys_inorder(c->u->u_consts, 0); if (!tmp) goto error; - consts = PySequence_List(tmp); /* optimize_code requires a list */ + consts = PySequence_Tuple(tmp); Py_DECREF(tmp); names = dict_keys_inorder(c->u->u_names, 0); @@ -3960,19 +4114,9 @@ if (flags < 0) goto error; - bytecode = PyCode_Optimize(a->a_bytecode, consts, names, a->a_lnotab); - if (!bytecode) - goto error; - - tmp = PyList_AsTuple(consts); /* PyCode_New requires a tuple */ - if (!tmp) - goto error; - Py_DECREF(consts); - consts = tmp; - co = PyCode_New(c->u->u_argcount, c->u->u_kwonlyargcount, nlocals, stackdepth(c), flags, - bytecode, consts, names, varnames, + a->a_bytecode, consts, names, varnames, freevars, cellvars, filename, c->u->u_name, c->u->u_firstlineno, @@ -3985,7 +4129,6 @@ Py_XDECREF(name); Py_XDECREF(freevars); Py_XDECREF(cellvars); - Py_XDECREF(bytecode); return co; } @@ -4032,6 +4175,8 @@ int i, j, nblocks; PyCodeObject *co = NULL; + optimize_jumps(c); + /* Make sure every block that falls off the end returns None. XXX NEXT_BLOCK() isn't quite right, because if the last block ends with a jump or return b_next shouldn't set.