Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 57473) +++ Python/ceval.c (working copy) @@ -738,7 +738,7 @@ consts = co->co_consts; fastlocals = f->f_localsplus; freevars = f->f_localsplus + co->co_nlocals; - first_instr = (unsigned char*) PyString_AS_STRING(co->co_code); + first_instr = (unsigned char*) PyBytes_AS_STRING(co->co_code); /* An explanation is in order for the next line. f->f_lasti now refers to the index of the last instruction Index: Python/peephole.c =================================================================== --- Python/peephole.c (revision 57473) +++ Python/peephole.c (working copy) @@ -325,15 +325,15 @@ goto exitUnchanged; /* Bypass optimization when the lineno table is too complex */ - assert(PyString_Check(lineno_obj)); - lineno = (unsigned char*)PyString_AS_STRING(lineno_obj); - tabsiz = PyString_GET_SIZE(lineno_obj); + assert(PyBytes_Check(lineno_obj)); + lineno = (unsigned char*)PyBytes_AS_STRING(lineno_obj); + tabsiz = PyBytes_GET_SIZE(lineno_obj); if (memchr(lineno, 255, tabsiz) != NULL) goto exitUnchanged; /* Avoid situations where jump retargeting could overflow */ - assert(PyString_Check(code)); - codelen = PyString_GET_SIZE(code); + assert(PyBytes_Check(code)); + codelen = PyBytes_GET_SIZE(code); if (codelen > 32700) goto exitUnchanged; @@ -342,7 +342,7 @@ if (codestr == NULL) goto exitUnchanged; codestr = (unsigned char *)memcpy(codestr, - PyString_AS_STRING(code), codelen); + PyBytes_AS_STRING(code), codelen); /* Verify that RETURN_VALUE terminates the codestring. This allows the various transformation patterns to look ahead several @@ -407,7 +407,7 @@ case LOAD_NAME: case LOAD_GLOBAL: j = GETARG(codestr, i); - name = PyString_AsString(PyTuple_GET_ITEM(names, j)); + name = PyUnicode_AsString(PyTuple_GET_ITEM(names, j)); h = load_global(codestr, i, name, consts); if (h < 0) goto exitUnchanged; @@ -632,7 +632,7 @@ } assert(h + nops == codelen); - code = PyString_FromStringAndSize((char *)codestr, h); + code = PyBytes_FromStringAndSize((char *)codestr, h); PyMem_Free(addrmap); PyMem_Free(codestr); PyMem_Free(blocks); Index: Python/import.c =================================================================== --- Python/import.c (revision 57473) +++ Python/import.c (working copy) @@ -74,9 +74,10 @@ 3040 (added signature annotations) 3050 (print becomes a function) 3060 (PEP 3115 metaclass syntax) + 3070 (change to use bytes for code objects) . */ -#define MAGIC (3060 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the Index: Python/marshal.c =================================================================== --- Python/marshal.c (revision 57473) +++ Python/marshal.c (working copy) @@ -891,6 +891,7 @@ int stacksize; int flags; PyObject *code = NULL; + PyObject *code_str = NULL; PyObject *consts = NULL; PyObject *names = NULL; PyObject *varnames = NULL; @@ -900,6 +901,7 @@ PyObject *name = NULL; int firstlineno; PyObject *lnotab = NULL; + PyObject *lnotab_str = NULL; v = NULL; @@ -909,7 +911,16 @@ nlocals = (int)r_long(p); stacksize = (int)r_long(p); flags = (int)r_long(p); - code = r_object(p); + code_str = r_object(p); + if (code_str == NULL) + goto code_error; + /* XXX(nnorwitz): hack to handle bytes which are + marshalled as strings. */ + assert(PyString_Check(code_str)); + code = PyBytes_FromStringAndSize( + PyString_AS_STRING(code_str), + PyString_GET_SIZE(code_str)); + Py_DECREF(code_str); if (code == NULL) goto code_error; consts = r_object(p); @@ -934,7 +945,16 @@ if (name == NULL) goto code_error; firstlineno = (int)r_long(p); - lnotab = r_object(p); + lnotab_str = r_object(p); + if (lnotab_str == NULL) + goto code_error; + /* XXX(nnorwitz): hack to handle bytes which are + marshalled as strings. */ + assert(PyString_Check(lnotab_str)); + lnotab = PyBytes_FromStringAndSize( + PyString_AS_STRING(lnotab_str), + PyString_GET_SIZE(lnotab_str)); + Py_DECREF(lnotab_str); if (lnotab == NULL) goto code_error; Index: Python/compile.c =================================================================== --- Python/compile.c (revision 57473) +++ Python/compile.c (working copy) @@ -3627,11 +3627,11 @@ */ struct assembler { - PyObject *a_bytecode; /* string containing bytecode */ + PyObject *a_bytecode; /* bytes containing bytecode */ int a_offset; /* offset into bytecode */ int a_nblocks; /* number of reachable blocks */ basicblock **a_postorder; /* list of blocks in dfs postorder */ - PyObject *a_lnotab; /* string containing lnotab */ + PyObject *a_lnotab; /* bytes containing lnotab */ int a_lnotab_off; /* offset into lnotab */ int a_lineno; /* last lineno of emitted instruction */ int a_lineno_off; /* bytecode offset of last lineno */ @@ -3710,10 +3710,10 @@ { memset(a, 0, sizeof(struct assembler)); a->a_lineno = firstlineno; - a->a_bytecode = PyString_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); + a->a_bytecode = PyBytes_FromStringAndSize(NULL, DEFAULT_CODE_SIZE); if (!a->a_bytecode) return 0; - a->a_lnotab = PyString_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); + a->a_lnotab = PyBytes_FromStringAndSize(NULL, DEFAULT_LNOTAB_SIZE); if (!a->a_lnotab) return 0; a->a_postorder = (basicblock **)PyObject_Malloc( @@ -3759,7 +3759,7 @@ /* All about a_lnotab. -c_lnotab is an array of unsigned bytes disguised as a Python string. +c_lnotab is an array of unsigned bytes (a Python bytes object). It is used to map bytecode offsets to source code line #s (when needed for tracebacks). @@ -3825,17 +3825,17 @@ if (d_bytecode > 255) { int j, nbytes, ncodes = d_bytecode / 255; nbytes = a->a_lnotab_off + 2 * ncodes; - len = PyString_GET_SIZE(a->a_lnotab); + len = PyBytes_GET_SIZE(a->a_lnotab); if (nbytes >= len) { if (len * 2 < nbytes) len = nbytes; else len *= 2; - if (_PyString_Resize(&a->a_lnotab, len) < 0) + if (PyBytes_Resize(a->a_lnotab, len) < 0) return 0; } lnotab = (unsigned char *) - PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; + PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; for (j = 0; j < ncodes; j++) { *lnotab++ = 255; *lnotab++ = 0; @@ -3847,17 +3847,17 @@ if (d_lineno > 255) { int j, nbytes, ncodes = d_lineno / 255; nbytes = a->a_lnotab_off + 2 * ncodes; - len = PyString_GET_SIZE(a->a_lnotab); + len = PyBytes_GET_SIZE(a->a_lnotab); if (nbytes >= len) { if (len * 2 < nbytes) len = nbytes; else len *= 2; - if (_PyString_Resize(&a->a_lnotab, len) < 0) + if (PyBytes_Resize(a->a_lnotab, len) < 0) return 0; } lnotab = (unsigned char *) - PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; + PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; *lnotab++ = d_bytecode; *lnotab++ = 255; d_bytecode = 0; @@ -3869,13 +3869,13 @@ a->a_lnotab_off += ncodes * 2; } - len = PyString_GET_SIZE(a->a_lnotab); + len = PyBytes_GET_SIZE(a->a_lnotab); if (a->a_lnotab_off + 2 >= len) { - if (_PyString_Resize(&a->a_lnotab, len * 2) < 0) + if (PyBytes_Resize(a->a_lnotab, len * 2) < 0) return 0; } lnotab = (unsigned char *) - PyString_AS_STRING(a->a_lnotab) + a->a_lnotab_off; + PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; a->a_lnotab_off += 2; if (d_bytecode) { @@ -3900,7 +3900,7 @@ assemble_emit(struct assembler *a, struct instr *i) { int size, arg = 0, ext = 0; - Py_ssize_t len = PyString_GET_SIZE(a->a_bytecode); + Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); char *code; size = instrsize(i); @@ -3911,10 +3911,10 @@ if (i->i_lineno && !assemble_lnotab(a, i)) return 0; if (a->a_offset + size >= len) { - if (_PyString_Resize(&a->a_bytecode, len * 2) < 0) + if (PyBytes_Resize(a->a_bytecode, len * 2) < 0) return 0; } - code = PyString_AS_STRING(a->a_bytecode) + a->a_offset; + code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; a->a_offset += size; if (size == 6) { assert(i->i_hasarg); @@ -4208,9 +4208,9 @@ goto error; } - if (_PyString_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) + if (PyBytes_Resize(a.a_lnotab, a.a_lnotab_off) < 0) goto error; - if (_PyString_Resize(&a.a_bytecode, a.a_offset) < 0) + if (PyBytes_Resize(a.a_bytecode, a.a_offset) < 0) goto error; co = makecode(c, &a); Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (revision 57473) +++ Objects/codeobject.c (working copy) @@ -52,7 +52,7 @@ Py_ssize_t i; /* Check argument types */ if (argcount < 0 || nlocals < 0 || - code == NULL || + code == NULL || !PyBytes_Check(code) || consts == NULL || !PyTuple_Check(consts) || names == NULL || !PyTuple_Check(names) || varnames == NULL || !PyTuple_Check(varnames) || @@ -60,8 +60,7 @@ cellvars == NULL || !PyTuple_Check(cellvars) || name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || filename == NULL || !PyString_Check(filename) || - lnotab == NULL || !PyString_Check(lnotab) || - !PyObject_CheckReadBuffer(code)) { + lnotab == NULL || !PyBytes_Check(lnotab)) { PyErr_BadInternalCall(); return NULL; } @@ -209,15 +208,16 @@ int firstlineno; PyObject *lnotab; - if (!PyArg_ParseTuple(args, "iiiiiSO!O!O!SSiS|O!O!:code", + if (!PyArg_ParseTuple(args, "iiiiiO!O!O!O!SSiO!|O!O!:code", &argcount, &kwonlyargcount, &nlocals, &stacksize, &flags, - &code, + &PyBytes_Type, &code, &PyTuple_Type, &consts, &PyTuple_Type, &names, &PyTuple_Type, &varnames, &filename, &name, - &firstlineno, &lnotab, + &firstlineno, + &PyBytes_Type, &lnotab, &PyTuple_Type, &freevars, &PyTuple_Type, &cellvars)) return NULL; @@ -373,7 +373,7 @@ long h, h0, h1, h2, h3, h4, h5, h6; h0 = PyObject_Hash(co->co_name); if (h0 == -1) return -1; - h1 = PyObject_Hash(co->co_code); + h1 = 0; // PyObject_Hash(co->co_code); if (h1 == -1) return -1; h2 = PyObject_Hash(co->co_consts); if (h2 == -1) return -1; @@ -437,7 +437,7 @@ /* All about c_lnotab. -c_lnotab is an array of unsigned bytes disguised as a Python string. In -O +c_lnotab is an array of unsigned bytes (a Python bytes object). In -O mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped to source code line #s (when needed for tracebacks) via c_lnotab instead. The array is conceptually a list of @@ -482,8 +482,8 @@ int PyCode_Addr2Line(PyCodeObject *co, int addrq) { - int size = PyString_Size(co->co_lnotab) / 2; - unsigned char *p = (unsigned char*)PyString_AsString(co->co_lnotab); + int size = PyBytes_Size(co->co_lnotab) / 2; + unsigned char *p = (unsigned char*)PyBytes_AsString(co->co_lnotab); int line = co->co_firstlineno; int addr = 0; while (--size >= 0) { @@ -578,8 +578,8 @@ int size, addr, line; unsigned char* p; - p = (unsigned char*)PyString_AS_STRING(co->co_lnotab); - size = PyString_GET_SIZE(co->co_lnotab) / 2; + p = (unsigned char*)PyBytes_AS_STRING(co->co_lnotab); + size = PyBytes_GET_SIZE(co->co_lnotab) / 2; addr = 0; line = co->co_firstlineno; Index: Objects/frameobject.c =================================================================== --- Objects/frameobject.c (revision 57473) +++ Objects/frameobject.c (working copy) @@ -114,7 +114,9 @@ /* Find the bytecode offset for the start of the given line, or the * first code-owning line after it. */ - PyString_AsStringAndSize(f->f_code->co_lnotab, &lnotab, &lnotab_len); + assert(PyBytes_Check(f->f_code->co_lnotab)); + lnotab = PyBytes_AS_STRING(f->f_code->co_lnotab); + lnotab_len = PyBytes_GET_SIZE(f->f_code->co_lnotab); addr = 0; line = f->f_code->co_firstlineno; new_lasti = -1; @@ -137,7 +139,9 @@ } /* We're now ready to look at the bytecode. */ - PyString_AsStringAndSize(f->f_code->co_code, (char **)&code, &code_len); + assert(PyBytes_Check(f->f_code->co_code)); + code = (unsigned char*) PyBytes_AS_STRING(f->f_code->co_code); + code_len = PyBytes_GET_SIZE(f->f_code->co_code); min_addr = MIN(new_lasti, f->f_lasti); max_addr = MAX(new_lasti, f->f_lasti); Index: Lib/modulefinder.py =================================================================== --- Lib/modulefinder.py (revision 57473) +++ Lib/modulefinder.py (working copy) @@ -16,12 +16,12 @@ # remain compatible with Python < 2.3 READ_MODE = "r" -LOAD_CONST = chr(dis.opname.index('LOAD_CONST')) -IMPORT_NAME = chr(dis.opname.index('IMPORT_NAME')) -STORE_NAME = chr(dis.opname.index('STORE_NAME')) -STORE_GLOBAL = chr(dis.opname.index('STORE_GLOBAL')) +LOAD_CONST = dis.opname.index('LOAD_CONST') +IMPORT_NAME = dis.opname.index('IMPORT_NAME') +STORE_NAME = dis.opname.index('STORE_NAME') +STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') STORE_OPS = [STORE_NAME, STORE_GLOBAL] -HAVE_ARGUMENT = chr(dis.HAVE_ARGUMENT) +HAVE_ARGUMENT = dis.HAVE_ARGUMENT # Modulefinder does a good job at simulating Python's, but it can not # handle __path__ modifications packages make at runtime. Therefore there Index: Lib/trace.py =================================================================== --- Lib/trace.py (revision 57473) +++ Lib/trace.py (working copy) @@ -367,7 +367,7 @@ """Return dict where keys are lines in the line number table.""" linenos = {} - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + line_increments = [c for c in code.co_lnotab[1::2]] table_length = len(line_increments) docstring = False Index: Lib/test/test_compile.py =================================================================== --- Lib/test/test_compile.py (revision 57473) +++ Lib/test/test_compile.py (working copy) @@ -157,7 +157,7 @@ s256 = "".join(["\n"] * 256 + ["spam"]) co = compile(s256, 'fn', 'exec') self.assertEqual(co.co_firstlineno, 257) - self.assertEqual(co.co_lnotab, '') + self.assertEqual(co.co_lnotab, b'') def test_literals_with_leading_zeroes(self): for arg in ["077787", "0xj", "0x.", "0e", "090000000000000", Index: Lib/dis.py =================================================================== --- Lib/dis.py (revision 57473) +++ Lib/dis.py (working copy) @@ -117,8 +117,7 @@ extended_arg = 0 free = None while i < n: - c = code[i] - op = ord(c) + op = code[i] if i in linestarts: if i > 0: print() @@ -134,7 +133,7 @@ print(opname[op].ljust(20), end=' ') i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg + oparg = code[i] + code[i+1]*256 + extended_arg extended_arg = 0 i = i+2 if op == EXTENDED_ARG: @@ -162,8 +161,7 @@ n = len(code) i = 0 while i < n: - c = code[i] - op = ord(c) + op = code[i] if i == lasti: print('-->', end=' ') else: print(' ', end=' ') if i in labels: print('>>', end=' ') @@ -172,7 +170,7 @@ print(opname[op].ljust(15), end=' ') i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + oparg = code[i] + code[i+1]*256 i = i+2 print(repr(oparg).rjust(5), end=' ') if op in hasconst: @@ -208,11 +206,10 @@ n = len(code) i = 0 while i < n: - c = code[i] - op = ord(c) + op = code[i] i = i+1 if op >= HAVE_ARGUMENT: - oparg = ord(code[i]) + ord(code[i+1])*256 + oparg = code[i] + code[i+1]*256 i = i+2 label = -1 if op in hasjrel: @@ -230,8 +227,8 @@ Generate pairs (offset, lineno) as described in Python/compile.c. """ - byte_increments = [ord(c) for c in code.co_lnotab[0::2]] - line_increments = [ord(c) for c in code.co_lnotab[1::2]] + byte_increments = [c for c in code.co_lnotab[0::2]] + line_increments = [c for c in code.co_lnotab[1::2]] lastlineno = None lineno = code.co_firstlineno Index: Modules/_ctypes/callbacks.c =================================================================== --- Modules/_ctypes/callbacks.c (revision 57473) +++ Modules/_ctypes/callbacks.c (working copy) @@ -30,7 +30,7 @@ PyObject *py_funcname = 0; PyObject *py_globals = 0; PyObject *empty_tuple = 0; - PyObject *empty_string = 0; + PyObject *empty_bytes = 0; PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; @@ -42,15 +42,15 @@ if (!py_globals) goto bad; empty_tuple = PyTuple_New(0); if (!empty_tuple) goto bad; - empty_string = PyString_FromString(""); - if (!empty_string) goto bad; + empty_bytes = PyBytes_FromStringAndSize(NULL, 0); + if (!empty_bytes) goto bad; py_code = PyCode_New( 0, /*int argcount,*/ 0, /*int kwonlyargcount,*/ 0, /*int nlocals,*/ 0, /*int stacksize,*/ 0, /*int flags,*/ - empty_string, /*PyObject *code,*/ + empty_bytes, /*PyObject *code,*/ empty_tuple, /*PyObject *consts,*/ empty_tuple, /*PyObject *names,*/ empty_tuple, /*PyObject *varnames,*/ @@ -59,7 +59,7 @@ py_srcfile, /*PyObject *filename,*/ py_funcname, /*PyObject *name,*/ lineno, /*int firstlineno,*/ - empty_string /*PyObject *lnotab*/ + empty_bytes /*PyObject *lnotab*/ ); if (!py_code) goto bad; py_frame = PyFrame_New( @@ -76,7 +76,7 @@ Py_XDECREF(py_srcfile); Py_XDECREF(py_funcname); Py_XDECREF(empty_tuple); - Py_XDECREF(empty_string); + Py_XDECREF(empty_bytes); Py_XDECREF(py_code); Py_XDECREF(py_frame); } Index: Modules/pyexpat.c =================================================================== --- Modules/pyexpat.c (revision 57473) +++ Modules/pyexpat.c (working copy) @@ -224,20 +224,24 @@ getcode(enum HandlerTypes slot, char* func_name, int lineno) { PyObject *code = NULL; + PyObject *lnotab = NULL; PyObject *name = NULL; PyObject *nulltuple = NULL; PyObject *filename = NULL; if (handler_info[slot].tb_code == NULL) { - code = PyString_FromString(""); + code = PyBytes_FromStringAndSize(NULL, 0); if (code == NULL) goto failed; - name = PyString_FromString(func_name); + name = PyUnicode_FromString(func_name); if (name == NULL) goto failed; nulltuple = PyTuple_New(0); if (nulltuple == NULL) goto failed; + lnotab = PyBytes_FromStringAndSize(NULL, 0); + if (lnotab == NULL) + goto failed; filename = PyString_FromString(__FILE__); handler_info[slot].tb_code = PyCode_New(0, /* argcount */ @@ -256,11 +260,12 @@ filename, /* filename */ name, /* name */ lineno, /* firstlineno */ - code /* lnotab */ + lnotab /* lnotab */ ); if (handler_info[slot].tb_code == NULL) goto failed; Py_DECREF(code); + Py_DECREF(lnotab); Py_DECREF(nulltuple); Py_DECREF(filename); Py_DECREF(name); @@ -268,6 +273,9 @@ return handler_info[slot].tb_code; failed: Py_XDECREF(code); + Py_XDECREF(lnotab); + Py_XDECREF(nulltuple); + Py_XDECREF(filename); Py_XDECREF(name); return NULL; }