diff -r 69c0aa8a8185 -r 031d992bdcea Include/code.h --- a/Include/code.h Thu Jun 09 16:30:29 2016 +0300 +++ b/Include/code.h Fri Jun 10 21:50:50 2016 +0300 @@ -7,6 +7,19 @@ extern "C" { #endif +#if SIZEOF_SHORT == 2 +typedef unsigned short _Py_CODEUNIT; +#else +#error "Could not find a proper typedef for _Py_CODEUNIT" +#endif +#ifdef WORDS_BIGENDIAN +#define _Py_OPCODE(word) ((word) >> 8) +#define _Py_OPARG(word) ((word) & 255) +#else +#define _Py_OPCODE(word) ((word) & 255) +#define _Py_OPARG(word) ((word) >> 8) +#endif + /* Bytecode object */ typedef struct { PyObject_HEAD diff -r 69c0aa8a8185 -r 031d992bdcea Objects/frameobject.c --- a/Objects/frameobject.c Thu Jun 09 16:30:29 2016 +0300 +++ b/Objects/frameobject.c Fri Jun 10 21:50:50 2016 +0300 @@ -189,7 +189,7 @@ frame_setlineno(PyFrameObject *f, PyObje memset(blockstack, '\0', sizeof(blockstack)); memset(in_finally, '\0', sizeof(in_finally)); blockstack_top = 0; - for (addr = 0; addr < code_len; addr += 2) { + for (addr = 0; addr < code_len; addr += sizeof(_Py_CODEUNIT)) { unsigned char op = code[addr]; switch (op) { case SETUP_LOOP: @@ -273,7 +273,7 @@ frame_setlineno(PyFrameObject *f, PyObje * can tell whether the jump goes into any blocks without coming out * again - in that case we raise an exception below. */ delta_iblock = 0; - for (addr = min_addr; addr < max_addr; addr += 2) { + for (addr = min_addr; addr < max_addr; addr += sizeof(_Py_CODEUNIT)) { unsigned char op = code[addr]; switch (op) { case SETUP_LOOP: diff -r 69c0aa8a8185 -r 031d992bdcea Objects/genobject.c --- a/Objects/genobject.c Thu Jun 09 16:30:29 2016 +0300 +++ b/Objects/genobject.c Fri Jun 10 21:50:50 2016 +0300 @@ -277,7 +277,7 @@ PyObject * PyObject *bytecode = f->f_code->co_code; unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); - if (code[f->f_lasti + 2] != YIELD_FROM) + if (code[f->f_lasti + sizeof(_Py_CODEUNIT)] != YIELD_FROM) return NULL; yf = f->f_stacktop[-1]; Py_INCREF(yf); @@ -376,7 +376,7 @@ gen_throw(PyGenObject *gen, PyObject *ar assert(ret == yf); Py_DECREF(ret); /* Termination repetition of YIELD_FROM */ - gen->gi_frame->f_lasti += 2; + gen->gi_frame->f_lasti += sizeof(_Py_CODEUNIT); if (_PyGen_FetchStopIterationValue(&val) == 0) { ret = gen_send_ex(gen, val, 0, 0); Py_DECREF(val); diff -r 69c0aa8a8185 -r 031d992bdcea Python/ceval.c --- a/Python/ceval.c Thu Jun 09 16:30:29 2016 +0300 +++ b/Python/ceval.c Fri Jun 10 21:50:50 2016 +0300 @@ -144,7 +144,7 @@ static int import_all_from(PyObject *, P static void format_exc_check_arg(PyObject *, const char *, PyObject *); static void format_exc_unbound(PyCodeObject *co, int oparg); static PyObject * unicode_concatenate(PyObject *, PyObject *, - PyFrameObject *, const unsigned short *); + PyFrameObject *, const _Py_CODEUNIT *); static PyObject * special_lookup(PyObject *, _Py_Identifier *); #define NAME_ERROR_MSG \ @@ -800,7 +800,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int int lastopcode = 0; #endif PyObject **stack_pointer; /* Next free slot in value stack */ - const unsigned short *next_instr; + const _Py_CODEUNIT *next_instr; int opcode; /* Current opcode */ int oparg; /* Current opcode argument, if any */ enum why_code why; /* Reason for block stack unwind */ @@ -818,7 +818,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int time it is tested. */ int instr_ub = -1, instr_lb = 0, instr_prev = -1; - const unsigned short *first_instr; + const _Py_CODEUNIT *first_instr; PyObject *names; PyObject *consts; @@ -979,23 +979,16 @@ PyEval_EvalFrameEx(PyFrameObject *f, int /* Code access macros */ -#ifdef WORDS_BIGENDIAN - #define OPCODE(word) ((word) >> 8) - #define OPARG(word) ((word) & 255) -#else - #define OPCODE(word) ((word) & 255) - #define OPARG(word) ((word) >> 8) -#endif /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() (2*(int)(next_instr - first_instr)) +#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) #define NEXTOPARG() do { \ - unsigned short word = *next_instr; \ - opcode = OPCODE(word); \ - oparg = OPARG(word); \ + _Py_CODEUNIT word = *next_instr; \ + opcode = _Py_OPCODE(word); \ + oparg = _Py_OPARG(word); \ next_instr++; \ } while (0) -#define JUMPTO(x) (next_instr = first_instr + (x)/2) -#define JUMPBY(x) (next_instr += (x)/2) +#define JUMPTO(x) (next_instr = first_instr + (x) / sizeof(_Py_CODEUNIT)) +#define JUMPBY(x) (next_instr += (x) / sizeof(_Py_CODEUNIT)) /* OpCode prediction macros Some opcodes tend to come in pairs thus making it possible to @@ -1029,10 +1022,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int #else #define PREDICT(op) \ do{ \ - unsigned short word = *next_instr; \ - opcode = OPCODE(word); \ + _Py_CODEUNIT word = *next_instr; \ + opcode = _Py_OPCODE(word); \ if (opcode == op){ \ - oparg = OPARG(word); \ + oparg = _Py_OPARG(word); \ next_instr++; \ goto PRED_##op; \ } \ @@ -1169,9 +1162,9 @@ PyEval_EvalFrameEx(PyFrameObject *f, int freevars = f->f_localsplus + co->co_nlocals; assert(PyBytes_Check(co->co_code)); assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX); - assert(PyBytes_GET_SIZE(co->co_code) % 2 == 0); - assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(co->co_code), 2)); - first_instr = (unsigned short*) PyBytes_AS_STRING(co->co_code); + assert(PyBytes_GET_SIZE(co->co_code) % sizeof(_Py_CODEUNIT) == 0); + assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(co->co_code), sizeof(_Py_CODEUNIT))); + first_instr = (_Py_CODEUNIT *) PyBytes_AS_STRING(co->co_code); /* f->f_lasti refers to the index of the last instruction, unless it's -1 in which case next_instr should be first_instr. @@ -1187,10 +1180,11 @@ PyEval_EvalFrameEx(PyFrameObject *f, int FOR_ITER is effectively a single opcode and f->f_lasti will point to the beginning of the combined pair.) */ + assert(f->f_lasti >= -1); next_instr = first_instr; if (f->f_lasti >= 0) { - assert(f->f_lasti % 2 == 0); - next_instr += f->f_lasti/2 + 1; + assert(f->f_lasti % sizeof(_Py_CODEUNIT) == 0); + next_instr += f->f_lasti / sizeof(_Py_CODEUNIT) + 1; } stack_pointer = f->f_stacktop; assert(stack_pointer != NULL); @@ -1255,7 +1249,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int Py_MakePendingCalls() above. */ if (_Py_atomic_load_relaxed(&eval_breaker)) { - if (OPCODE(*next_instr) == SETUP_FINALLY) { + if (_Py_OPCODE(*next_instr) == SETUP_FINALLY) { /* Make the last opcode before a try: finally: block uninterruptible. */ goto fast_next_opcode; @@ -2113,7 +2107,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int f->f_stacktop = stack_pointer; why = WHY_YIELD; /* and repeat... */ - f->f_lasti -= 2; + f->f_lasti -= sizeof(_Py_CODEUNIT); goto fast_yield; } @@ -5339,7 +5333,7 @@ format_exc_unbound(PyCodeObject *co, int static PyObject * unicode_concatenate(PyObject *v, PyObject *w, - PyFrameObject *f, const unsigned short *next_instr) + PyFrameObject *f, const _Py_CODEUNIT *next_instr) { PyObject *res; if (Py_REFCNT(v) == 2) { diff -r 69c0aa8a8185 -r 031d992bdcea Python/compile.c --- a/Python/compile.c Thu Jun 09 16:30:29 2016 +0300 +++ b/Python/compile.c Fri Jun 10 21:50:50 2016 +0300 @@ -4420,7 +4420,7 @@ assemble_lnotab(struct assembler *a, str Py_ssize_t len; unsigned char *lnotab; - d_bytecode = a->a_offset - a->a_lineno_off; + d_bytecode = (a->a_offset - a->a_lineno_off) * sizeof(_Py_CODEUNIT); d_lineno = i->i_lineno - a->a_lineno; assert(d_bytecode >= 0); @@ -4527,21 +4527,21 @@ assemble_emit(struct assembler *a, struc { int size, arg = 0; Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); - char *code; + _Py_CODEUNIT *code; arg = i->i_oparg; size = instrsize(arg); if (i->i_lineno && !assemble_lnotab(a, i)) return 0; - if (a->a_offset + size >= len) { + if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { if (len > PY_SSIZE_T_MAX / 2) return 0; if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0) return 0; } - code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; + code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; a->a_offset += size; - write_op_arg((unsigned char*)code, i->i_opcode, arg, size); + write_op_arg(code, i->i_opcode, arg, size); return 1; } @@ -4578,6 +4578,7 @@ assemble_jump_offsets(struct assembler * if (instr->i_jrel) { instr->i_oparg -= bsize; } + instr->i_oparg *= sizeof(_Py_CODEUNIT); if (instrsize(instr->i_oparg) != isize) { extended_arg_recompile = 1; } @@ -4819,7 +4820,7 @@ assemble(struct compiler *c, int addNone if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) goto error; - if (_PyBytes_Resize(&a.a_bytecode, a.a_offset) < 0) + if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0) goto error; co = makecode(c, &a); diff -r 69c0aa8a8185 -r 031d992bdcea Python/peephole.c --- a/Python/peephole.c Thu Jun 09 16:30:29 2016 +0300 +++ b/Python/peephole.c Fri Jun 10 21:50:50 2016 +0300 @@ -17,7 +17,8 @@ || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) -#define GETJUMPTGT(arr, i) (get_arg(arr, i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+2)) +#define GETJUMPTGT(arr, i) (get_arg(arr, i) / sizeof(_Py_CODEUNIT) + \ + (ABSOLUTE_JUMP(_Py_OPCODE(arr[i])) ? 0 : i+1)) #define ISBASICBLOCK(blocks, start, end) \ (blocks[start]==blocks[end]) @@ -40,7 +41,7 @@ #define CONST_STACK_PUSH_OP(i) do { \ PyObject *_x; \ - assert(codestr[i] == LOAD_CONST); \ + assert(_Py_OPCODE(codestr[i]) == LOAD_CONST); \ assert(PyList_GET_SIZE(consts) > (Py_ssize_t)get_arg(codestr, i)); \ _x = PyList_GET_ITEM(consts, get_arg(codestr, i)); \ if (++const_stack_top >= const_stack_size) { \ @@ -72,33 +73,33 @@ Callers are responsible to check CONST_STACK_LEN beforehand. */ static Py_ssize_t -lastn_const_start(unsigned char *codestr, Py_ssize_t i, Py_ssize_t n) +lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n) { - assert(n > 0 && (i&1) == 0); + assert(n > 0); for (;;) { - i -= 2; + i--; assert(i >= 0); - if (codestr[i] == LOAD_CONST) { + if (_Py_OPCODE(codestr[i]) == LOAD_CONST) { if (!--n) { - while (i > 0 && codestr[i-2] == EXTENDED_ARG) { - i -= 2; + while (i > 0 && _Py_OPCODE(codestr[i-1]) == EXTENDED_ARG) { + i--; } return i; } } else { - assert(codestr[i] == NOP || codestr[i] == EXTENDED_ARG); + assert(_Py_OPCODE(codestr[i]) == NOP || + _Py_OPCODE(codestr[i]) == EXTENDED_ARG); } } } /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */ static Py_ssize_t -find_op(unsigned char *codestr, Py_ssize_t i) +find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i) { - assert((i&1) == 0); - while (codestr[i] == EXTENDED_ARG) { - i += 2; + while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) { + i++; } return i; } @@ -106,27 +107,34 @@ find_op(unsigned char *codestr, Py_ssize /* Given the index of the effective opcode, scan back to construct the oparg with EXTENDED_ARG */ static unsigned int -get_arg(unsigned char *codestr, Py_ssize_t i) +get_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i) { - unsigned int oparg = codestr[i+1]; - assert((i&1) == 0); - if (i >= 2 && codestr[i-2] == EXTENDED_ARG) { - oparg |= codestr[i-1] << 8; - if (i >= 4 && codestr[i-4] == EXTENDED_ARG) { - oparg |= codestr[i-3] << 16; - if (i >= 6 && codestr[i-6] == EXTENDED_ARG) { - oparg |= codestr[i-5] << 24; + _Py_CODEUNIT word; + unsigned int oparg = _Py_OPARG(codestr[i]); + if (i >= 1 && _Py_OPCODE(word = codestr[i-1]) == EXTENDED_ARG) { + oparg |= _Py_OPARG(word) << 8; + if (i >= 2 && _Py_OPCODE(word = codestr[i-2]) == EXTENDED_ARG) { + oparg |= _Py_OPARG(word) << 16; + if (i >= 3 && _Py_OPCODE(word = codestr[i-3]) == EXTENDED_ARG) { + oparg |= _Py_OPARG(word) << 24; } } } return oparg; } +/* Fill the region with NOPs. */ +static void +fill_nops(_Py_CODEUNIT *codestr, Py_ssize_t start, Py_ssize_t end) +{ + memset(codestr + start, NOP, (end - start) * sizeof(_Py_CODEUNIT)); +} + /* Given the index of the effective opcode, attempt to replace the argument, taking into account EXTENDED_ARG. Returns -1 on failure, or the new op index on success */ static Py_ssize_t -set_arg(unsigned char *codestr, Py_ssize_t i, unsigned int oparg) +set_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned int oparg) { unsigned int curarg = get_arg(codestr, i); int curilen, newilen; @@ -138,8 +146,8 @@ set_arg(unsigned char *codestr, Py_ssize return -1; } - write_op_arg(codestr + i + 2 - curilen, codestr[i], oparg, newilen); - memset(codestr + i + 2 - curilen + newilen, NOP, curilen - newilen); + write_op_arg(codestr + i + 1 - curilen, _Py_OPCODE(codestr[i]), oparg, newilen); + fill_nops(codestr, i + 1 - curilen + newilen, i + 1); return i-curilen+newilen; } @@ -147,17 +155,16 @@ set_arg(unsigned char *codestr, Py_ssize Preceding memory in the region is overwritten with NOPs. Returns -1 on failure, op index on success */ static Py_ssize_t -copy_op_arg(unsigned char *codestr, Py_ssize_t i, unsigned char op, +copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op, unsigned int oparg, Py_ssize_t maxi) { int ilen = instrsize(oparg); - assert((i&1) == 0); if (i + ilen > maxi) { return -1; } write_op_arg(codestr + maxi - ilen, op, oparg, ilen); - memset(codestr + i, NOP, maxi - i - ilen); - return maxi - 2; + fill_nops(codestr, i, maxi - ilen); + return maxi - 1; } /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n @@ -170,7 +177,7 @@ copy_op_arg(unsigned char *codestr, Py_s test; for BUILD_SET it assembles a frozenset rather than a tuple. */ static Py_ssize_t -fold_tuple_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start, Py_ssize_t opcode_end, unsigned char opcode, PyObject *consts, PyObject **objs, int n) { @@ -222,7 +229,7 @@ fold_tuple_on_constants(unsigned char *c becoming large in the presence of code like: (None,)*1000. */ static Py_ssize_t -fold_binops_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start, Py_ssize_t opcode_end, unsigned char opcode, PyObject *consts, PyObject **objs) { @@ -311,7 +318,7 @@ fold_binops_on_constants(unsigned char * } static Py_ssize_t -fold_unaryops_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_unaryops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start, Py_ssize_t opcode_end, unsigned char opcode, PyObject *consts, PyObject *v) { @@ -359,7 +366,7 @@ fold_unaryops_on_constants(unsigned char } static unsigned int * -markblocks(unsigned char *code, Py_ssize_t len) +markblocks(_Py_CODEUNIT *code, Py_ssize_t len) { unsigned int *blocks = PyMem_New(unsigned int, len); int i, j, opcode, blockcnt = 0; @@ -371,8 +378,8 @@ markblocks(unsigned char *code, Py_ssize memset(blocks, 0, len*sizeof(int)); /* Mark labels in the first pass */ - for (i=0 ; i= 2 && codestr[op_start-2] == EXTENDED_ARG) { - op_start -= 2; + while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) { + op_start--; } - nexti = i + 2; - while (nexti < codelen && codestr[nexti] == EXTENDED_ARG) - nexti += 2; - nextop = nexti < codelen ? codestr[nexti] : 0; + nexti = i + 1; + while (nexti < codelen && _Py_OPCODE(codestr[nexti]) == EXTENDED_ARG) + nexti++; + nextop = nexti < codelen ? _Py_OPCODE(codestr[nexti]) : 0; if (!in_consts) { CONST_STACK_RESET(); @@ -488,10 +496,10 @@ PyCode_Optimize(PyObject *code, PyObject with POP_JUMP_IF_TRUE */ case UNARY_NOT: if (nextop != POP_JUMP_IF_FALSE - || !ISBASICBLOCK(blocks, op_start, i+2)) + || !ISBASICBLOCK(blocks, op_start, i + 1)) break; - memset(codestr + op_start, NOP, i - op_start + 2); - codestr[nexti] = POP_JUMP_IF_TRUE; + fill_nops(codestr, op_start, i + 1); + codestr[nexti] = PACKOPARG(POP_JUMP_IF_TRUE, _Py_OPARG(codestr[nexti])); break; /* not a is b --> a is not b @@ -503,10 +511,10 @@ PyCode_Optimize(PyObject *code, PyObject j = get_arg(codestr, i); if (j < 6 || j > 9 || nextop != UNARY_NOT || - !ISBASICBLOCK(blocks, op_start, i + 2)) + !ISBASICBLOCK(blocks, op_start, i + 1)) break; - codestr[i+1] = (j^1); - memset(codestr + i + 2, NOP, nexti - i); + codestr[i] = PACKOPARG(opcode, j^1); + fill_nops(codestr, i + 1, nexti + 1); break; /* Skip over LOAD_CONST trueconst @@ -515,10 +523,10 @@ PyCode_Optimize(PyObject *code, PyObject case LOAD_CONST: CONST_STACK_PUSH_OP(i); if (nextop != POP_JUMP_IF_FALSE || - !ISBASICBLOCK(blocks, op_start, i + 2) || + !ISBASICBLOCK(blocks, op_start, i + 1) || !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i)))) break; - memset(codestr + op_start, NOP, nexti - op_start + 2); + fill_nops(codestr, op_start, nexti + 1); CONST_STACK_POP(1); break; @@ -537,10 +545,10 @@ PyCode_Optimize(PyObject *code, PyObject ISBASICBLOCK(blocks, h, op_start)) || ((opcode == BUILD_LIST || opcode == BUILD_SET) && ((nextop==COMPARE_OP && - (codestr[nexti+1]==6 || - codestr[nexti+1]==7)) || - nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 2))) { - h = fold_tuple_on_constants(codestr, h, i+2, opcode, + (_Py_OPARG(codestr[nexti]) == PyCmp_IN || + _Py_OPARG(codestr[nexti]) == PyCmp_NOT_IN)) || + nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 1))) { + h = fold_tuple_on_constants(codestr, h, i + 1, opcode, consts, CONST_STACK_LASTN(j), j); if (h >= 0) { CONST_STACK_POP(j); @@ -550,23 +558,20 @@ PyCode_Optimize(PyObject *code, PyObject } } if (nextop != UNPACK_SEQUENCE || - !ISBASICBLOCK(blocks, op_start, i + 2) || + !ISBASICBLOCK(blocks, op_start, i + 1) || j != get_arg(codestr, nexti) || opcode == BUILD_SET) break; if (j < 2) { - memset(codestr+op_start, NOP, nexti - op_start + 2); + fill_nops(codestr, op_start, nexti + 1); } else if (j == 2) { - codestr[op_start] = ROT_TWO; - codestr[op_start + 1] = 0; - memset(codestr + op_start + 2, NOP, nexti - op_start); + codestr[op_start] = PACKOPARG(ROT_TWO, 0); + fill_nops(codestr, op_start + 1, nexti + 1); CONST_STACK_RESET(); } else if (j == 3) { - codestr[op_start] = ROT_THREE; - codestr[op_start + 1] = 0; - codestr[op_start + 2] = ROT_TWO; - codestr[op_start + 3] = 0; - memset(codestr + op_start + 4, NOP, nexti - op_start - 2); + codestr[op_start] = PACKOPARG(ROT_THREE, 0); + codestr[op_start + 1] = PACKOPARG(ROT_TWO, 0); + fill_nops(codestr, op_start + 2, nexti + 1); CONST_STACK_RESET(); } break; @@ -590,7 +595,7 @@ PyCode_Optimize(PyObject *code, PyObject break; h = lastn_const_start(codestr, op_start, 2); if (ISBASICBLOCK(blocks, h, op_start)) { - h = fold_binops_on_constants(codestr, h, i+2, opcode, + h = fold_binops_on_constants(codestr, h, i + 1, opcode, consts, CONST_STACK_LASTN(2)); if (h >= 0) { CONST_STACK_POP(2); @@ -608,7 +613,7 @@ PyCode_Optimize(PyObject *code, PyObject break; h = lastn_const_start(codestr, op_start, 1); if (ISBASICBLOCK(blocks, h, op_start)) { - h = fold_unaryops_on_constants(codestr, h, i+2, opcode, + h = fold_unaryops_on_constants(codestr, h, i + 1, opcode, consts, *CONST_STACK_LASTN(1)); if (h >= 0) { CONST_STACK_POP(1); @@ -628,15 +633,15 @@ PyCode_Optimize(PyObject *code, PyObject x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z --> x:JUMP_IF_FALSE_OR_POP z x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z - --> x:POP_JUMP_IF_FALSE y+2 - where y+2 is the instruction following the second test. + --> x:POP_JUMP_IF_FALSE y+1 + where y+1 is the instruction following the second test. */ case JUMP_IF_FALSE_OR_POP: case JUMP_IF_TRUE_OR_POP: - h = get_arg(codestr, i); + h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT); tgt = find_op(codestr, h); - j = codestr[tgt]; + j = _Py_OPCODE(codestr[tgt]); if (CONDITIONAL_JUMP(j)) { /* NOTE: all possible jumps here are absolute! */ @@ -654,14 +659,14 @@ PyCode_Optimize(PyObject *code, PyObject they're not taken (so change the first jump to pop its argument when it's taken). */ - h = set_arg(codestr, i, tgt + 2); + h = set_arg(codestr, i, (tgt + 1) * sizeof(_Py_CODEUNIT)); j = opcode == JUMP_IF_TRUE_OR_POP ? POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE; } if (h >= 0) { nexti = h; - codestr[nexti] = j; + codestr[nexti] = PACKOPARG(j, _Py_OPARG(codestr[nexti])); break; } } @@ -683,32 +688,32 @@ PyCode_Optimize(PyObject *code, PyObject tgt = find_op(codestr, h); /* Replace JUMP_* to a RETURN into just a RETURN */ if (UNCONDITIONAL_JUMP(opcode) && - codestr[tgt] == RETURN_VALUE) { - codestr[op_start] = RETURN_VALUE; - codestr[op_start + 1] = 0; - memset(codestr + op_start + 2, NOP, i - op_start); - } else if (UNCONDITIONAL_JUMP(codestr[tgt])) { + _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) { + codestr[op_start] = PACKOPARG(RETURN_VALUE, 0); + fill_nops(codestr, op_start + 1, i + 1); + } else if (UNCONDITIONAL_JUMP(_Py_OPCODE(codestr[tgt]))) { j = GETJUMPTGT(codestr, tgt); if (opcode == JUMP_FORWARD) { /* JMP_ABS can go backwards */ opcode = JUMP_ABSOLUTE; } else if (!ABSOLUTE_JUMP(opcode)) { - if ((Py_ssize_t)j < i + 2) { + if ((Py_ssize_t)j < i + 1) { break; /* No backward relative jumps */ } - j -= i + 2; /* Calc relative jump addr */ + j -= i + 1; /* Calc relative jump addr */ } - copy_op_arg(codestr, op_start, opcode, j, i+2); + j *= sizeof(_Py_CODEUNIT); + copy_op_arg(codestr, op_start, opcode, j, i + 1); } break; /* Remove unreachable ops after RETURN */ case RETURN_VALUE: - h = i + 2; - while (h + 2 < codelen && ISBASICBLOCK(blocks, i, h + 2)) { - h += 2; + h = i + 1; + while (h + 1 < codelen && ISBASICBLOCK(blocks, i, h + 1)) { + h++; } - if (h > i + 2) { - memset(codestr + i + 2, NOP, h - i); + if (h > i + 1) { + fill_nops(codestr, i + 1, h + 1); nexti = find_op(codestr, h); } break; @@ -716,20 +721,21 @@ PyCode_Optimize(PyObject *code, PyObject } /* Fixup lnotab */ - for (i=0, nops=0 ; i new code offset */ blocks[i] = i - nops; - if (codestr[i] == NOP) - nops += 2; + if (_Py_OPCODE(codestr[i]) == NOP) + nops++; } cum_orig_offset = 0; last_offset = 0; for (i=0 ; i < tabsiz ; i+=2) { unsigned int offset_delta, new_offset; cum_orig_offset += lnotab[i]; - assert((cum_orig_offset & 1) == 0); - new_offset = blocks[cum_orig_offset]; + assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0); + new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] * + sizeof(_Py_CODEUNIT); offset_delta = new_offset - last_offset; assert(offset_delta <= 255); lnotab[i] = (unsigned char)offset_delta; @@ -737,13 +743,13 @@ PyCode_Optimize(PyObject *code, PyObject } /* Remove NOPs and fixup jump targets */ - for (op_start=0, i=0, h=0 ; i nexti) goto exitUnchanged; /* If instrsize(j) < nexti, we'll emit EXTENDED_ARG 0 */ @@ -777,7 +784,7 @@ PyCode_Optimize(PyObject *code, PyObject CONST_STACK_DELETE(); PyMem_Free(blocks); - code = PyBytes_FromStringAndSize((char *)codestr, h); + code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT)); PyMem_Free(codestr); return code; diff -r 69c0aa8a8185 -r 031d992bdcea Python/wordcode_helpers.h --- a/Python/wordcode_helpers.h Thu Jun 09 16:30:29 2016 +0300 +++ b/Python/wordcode_helpers.h Fri Jun 10 21:50:50 2016 +0300 @@ -2,35 +2,38 @@ optimizer. */ -/* Minimum number of bytes necessary to encode instruction with EXTENDED_ARGs */ +#ifdef WORDS_BIGENDIAN +#define PACKOPARG(opcode, oparg) ((_Py_CODEUNIT)(((opcode) << 8) | (oparg))) +#else +#define PACKOPARG(opcode, oparg) ((_Py_CODEUNIT)(((oparg) << 8) | (opcode))) +#endif + +/* Minimum number of code units necessary to encode instruction with + EXTENDED_ARGs */ static int instrsize(unsigned int oparg) { - return oparg <= 0xff ? 2 : - oparg <= 0xffff ? 4 : - oparg <= 0xffffff ? 6 : - 8; + return oparg <= 0xff ? 1 : + oparg <= 0xffff ? 2 : + oparg <= 0xffffff ? 3 : + 4; } /* Spits out op/oparg pair using ilen bytes. codestr should be pointed at the desired location of the first EXTENDED_ARG */ static void -write_op_arg(unsigned char *codestr, unsigned char opcode, +write_op_arg(_Py_CODEUNIT *codestr, unsigned char opcode, unsigned int oparg, int ilen) { switch (ilen) { - case 8: - *codestr++ = EXTENDED_ARG; - *codestr++ = (oparg >> 24) & 0xff; - case 6: - *codestr++ = EXTENDED_ARG; - *codestr++ = (oparg >> 16) & 0xff; case 4: - *codestr++ = EXTENDED_ARG; - *codestr++ = (oparg >> 8) & 0xff; + *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 24) & 0xff); + case 3: + *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 16) & 0xff); case 2: - *codestr++ = opcode; - *codestr++ = oparg & 0xff; + *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 8) & 0xff); + case 1: + *codestr++ = PACKOPARG(opcode, oparg & 0xff); break; default: assert(0);