# HG changeset patch # Parent 96d0cffe12e72f3965509c890703f4afef26fa78 diff -r 96d0cffe12e7 Lib/pickle.py --- a/Lib/pickle.py Mon Dec 12 13:39:05 2011 +0000 +++ b/Lib/pickle.py Mon Dec 12 21:36:56 2011 +0000 @@ -33,6 +33,7 @@ import io import codecs import _compat_pickle +import _codecs __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -485,7 +486,11 @@ def save_bytes(self, obj, pack=struct.pack): if self.proto < 3: - self.save_reduce(bytes, (list(obj),), obj=obj) + if len(obj) == 0: + self.save_reduce(bytes, (), obj=obj) + else: + self.save_reduce(_codecs.encode, + (obj.decode('latin1'), 'latin1'), obj=obj) return n = len(obj) if n < 256: diff -r 96d0cffe12e7 Lib/pickletools.py --- a/Lib/pickletools.py Mon Dec 12 13:39:05 2011 +0000 +++ b/Lib/pickletools.py Mon Dec 12 21:36:56 2011 +0000 @@ -2080,27 +2080,22 @@ 29: ( MARK 30: d DICT (MARK at 29) 31: p PUT 2 - 34: c GLOBAL '__builtin__ bytes' - 53: p PUT 3 - 56: ( MARK - 57: ( MARK - 58: l LIST (MARK at 57) + 34: c GLOBAL '_codecs encode' + 50: p PUT 3 + 53: ( MARK + 54: V UNICODE 'abc' 59: p PUT 4 - 62: L LONG 97 - 67: a APPEND - 68: L LONG 98 - 73: a APPEND - 74: L LONG 99 - 79: a APPEND - 80: t TUPLE (MARK at 56) - 81: p PUT 5 - 84: R REDUCE - 85: p PUT 6 - 88: V UNICODE 'def' - 93: p PUT 7 - 96: s SETITEM - 97: a APPEND - 98: . STOP + 62: V UNICODE 'latin1' + 70: p PUT 5 + 73: t TUPLE (MARK at 53) + 74: p PUT 6 + 77: R REDUCE + 78: p PUT 7 + 81: V UNICODE 'def' + 86: p PUT 8 + 89: s SETITEM + 90: a APPEND + 91: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. @@ -2119,25 +2114,22 @@ 14: q BINPUT 1 16: } EMPTY_DICT 17: q BINPUT 2 - 19: c GLOBAL '__builtin__ bytes' - 38: q BINPUT 3 - 40: ( MARK - 41: ] EMPTY_LIST - 42: q BINPUT 4 - 44: ( MARK - 45: K BININT1 97 - 47: K BININT1 98 - 49: K BININT1 99 - 51: e APPENDS (MARK at 44) - 52: t TUPLE (MARK at 40) - 53: q BINPUT 5 - 55: R REDUCE - 56: q BINPUT 6 - 58: X BINUNICODE 'def' - 66: q BINPUT 7 - 68: s SETITEM - 69: e APPENDS (MARK at 3) - 70: . STOP + 19: c GLOBAL '_codecs encode' + 35: q BINPUT 3 + 37: ( MARK + 38: X BINUNICODE 'abc' + 46: q BINPUT 4 + 48: X BINUNICODE 'latin1' + 59: q BINPUT 5 + 61: t TUPLE (MARK at 37) + 62: q BINPUT 6 + 64: R REDUCE + 65: q BINPUT 7 + 67: X BINUNICODE 'def' + 75: q BINPUT 8 + 77: s SETITEM + 78: e APPENDS (MARK at 3) + 79: . STOP highest protocol among opcodes = 1 Exercise the INST/OBJ/BUILD family. diff -r 96d0cffe12e7 Modules/_pickle.c --- a/Modules/_pickle.c Mon Dec 12 13:39:05 2011 +0000 +++ b/Modules/_pickle.c Mon Dec 12 21:36:56 2011 +0000 @@ -136,6 +136,10 @@ /* For looking up name pairs in copyreg._extension_registry. */ static PyObject *two_tuple = NULL; +/* For latin_1 codec used by save_bytes() */ +static PyObject *codecs_encode = NULL; +static PyObject *codecs_decode = NULL; + static int stack_underflow(void) { @@ -1709,25 +1713,42 @@ /* Older pickle protocols do not have an opcode for pickling bytes objects. Therefore, we need to fake the copy protocol (i.e., the __reduce__ method) to permit bytes object unpickling. */ + static PyObject *latin1 = NULL; PyObject *reduce_value = NULL; - PyObject *bytelist = NULL; + PyObject *unicode = NULL; int status; - bytelist = PySequence_List(obj); - if (bytelist == NULL) - return -1; - - reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type, - bytelist); - if (reduce_value == NULL) { - Py_DECREF(bytelist); + if (latin1 == NULL) { + latin1 = PyUnicode_InternFromString("latin1"); + if (latin1 == NULL) + return -1; + } + + if (!PyBytes_CheckExact(obj)) { + PyErr_SetString(PyExc_TypeError, "save_bytes() expected a " + "bytes object"); return -1; } + if (PyBytes_GET_SIZE(obj) == 0) { + reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type); + } + else { + unicode = PyObject_CallFunctionObjArgs(codecs_decode, obj, + latin1, NULL); + if (unicode == NULL) + return -1; + reduce_value = Py_BuildValue("(O(OO))", + codecs_encode, unicode, latin1); + Py_XDECREF(unicode); + } + + if (reduce_value == NULL) + return -1; + /* save_reduce() will memoize the object automatically. */ status = save_reduce(self, reduce_value, obj); Py_DECREF(reduce_value); - Py_DECREF(bytelist); return status; } else { @@ -6236,6 +6257,7 @@ { PyObject *copyreg = NULL; PyObject *compat_pickle = NULL; + PyObject *_codecs = NULL; /* XXX: We should ensure that the types of the dictionaries imported are exactly PyDict objects. Otherwise, it is possible to crash the pickle @@ -6317,6 +6339,17 @@ */ PyObject_GC_UnTrack(two_tuple); + _codecs = PyImport_ImportModule("_codecs"); + if (!_codecs) + goto error; + codecs_encode = PyObject_GetAttrString(_codecs, "encode"); + if (!codecs_encode) + goto error; + codecs_decode = PyObject_GetAttrString(_codecs, "decode"); + if (!codecs_decode) + goto error; + Py_CLEAR(_codecs); + return 0; error: @@ -6332,6 +6365,9 @@ Py_CLEAR(import_mapping_3to2); Py_CLEAR(empty_tuple); Py_CLEAR(two_tuple); + Py_CLEAR(_codecs); + Py_CLEAR(codecs_encode); + Py_CLEAR(codecs_decode); return -1; }