Index: Doc/library/pickle.rst =================================================================== --- Doc/library/pickle.rst (revision 73195) +++ Doc/library/pickle.rst (working copy) @@ -141,7 +141,7 @@ The :mod:`pickle` module provides the following functions to make the pickling process more convenient: -.. function:: dump(obj, file[, protocol]) +.. function:: dump(obj, file[, protocol, \*, fix_imports=True]) Write a pickled representation of *obj* to the open file object *file*. This is equivalent to ``Pickler(file, protocol).dump(obj)``. @@ -158,8 +158,12 @@ argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. -.. function:: dumps(obj[, protocol]) + If *fix_imports* is True and *protocol* is less than 3, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle data stream is readable with Python 2.x. +.. function:: dumps(obj[, protocol, \*, fix_imports=True]) + Return the pickled representation of the object as a :class:`bytes` object, instead of writing it to a file. @@ -171,8 +175,12 @@ supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. -.. function:: load(file, [\*, encoding="ASCII", errors="strict"]) + If *fix_imports* is True and *protocol* is less than 3, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle data stream is readable with Python 2.x. +.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) + Read a pickled object representation from the open file object *file* and return the reconstituted object hierarchy specified therein. This is equivalent to ``Unpickler(file).load()``. @@ -187,11 +195,14 @@ for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and *errors*, + which are used to control compatiblity support for pickle stream generated + by Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by Python + 2.x; these default to 'ASCII' and 'strict', respectively. -.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"]) +.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object hierarchy from a :class:`bytes` object and return the reconstituted object hierarchy specified therein @@ -200,9 +211,12 @@ argument is needed. Bytes past the pickled object's representation are ignored. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and *errors*, + which are used to control compatiblity support for pickle stream generated + by Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by Python + 2.x; these default to 'ASCII' and 'strict', respectively. The :mod:`pickle` module defines three exceptions: @@ -233,7 +247,7 @@ The :mod:`pickle` module exports two classes, :class:`Pickler` and :class:`Unpickler`: -.. class:: Pickler(file[, protocol]) +.. class:: Pickler(file[, protocol, \*, fix_imports=True]) This takes a binary file for writing a pickle data stream. @@ -249,6 +263,10 @@ argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. + If *fix_imports* is True and *protocol* is less than 3, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle data stream is readable with Python 2.x. + .. method:: dump(obj) Write a pickled representation of *obj* to the open file object given in @@ -277,7 +295,7 @@ Use :func:`pickletools.optimize` if you need more compact pickles. -.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"]) +.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) This takes a binary file for reading a pickle data stream. @@ -290,9 +308,12 @@ for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and *errors*, + which are used to control compatiblity support for pickle stream generated + by Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by Python + 2.x; these default to 'ASCII' and 'strict', respectively. .. method:: load() Index: Lib/pickletools.py =================================================================== --- Lib/pickletools.py (revision 73195) +++ Lib/pickletools.py (working copy) @@ -2066,27 +2066,27 @@ 29: ( MARK 30: d DICT (MARK at 29) 31: p PUT 2 - 34: c GLOBAL 'builtins bytes' - 50: p PUT 3 - 53: ( MARK - 54: ( MARK - 55: l LIST (MARK at 54) - 56: p PUT 4 - 59: L LONG 97 - 64: a APPEND - 65: L LONG 98 - 70: a APPEND - 71: L LONG 99 - 76: a APPEND - 77: t TUPLE (MARK at 53) - 78: p PUT 5 - 81: R REDUCE - 82: p PUT 6 - 85: V UNICODE 'def' - 90: p PUT 7 - 93: s SETITEM - 94: a APPEND - 95: . STOP + 34: c GLOBAL '__builtin__ bytes' + 53: p PUT 3 + 56: ( MARK + 57: ( MARK + 58: l LIST (MARK at 57) + 59: p PUT 4 + 62: L LONG 97 + 67: a APPEND + 68: L LONG 98 + 73: a APPEND + 74: L LONG 99 + 79: a APPEND + 80: t TUPLE (MARK at 56) + 81: p PUT 5 + 84: R REDUCE + 85: p PUT 6 + 88: V UNICODE 'def' + 93: p PUT 7 + 96: s SETITEM + 97: a APPEND + 98: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. @@ -2105,25 +2105,25 @@ 14: q BINPUT 1 16: } EMPTY_DICT 17: q BINPUT 2 - 19: c GLOBAL 'builtins bytes' - 35: q BINPUT 3 - 37: ( MARK - 38: ] EMPTY_LIST - 39: q BINPUT 4 - 41: ( MARK - 42: K BININT1 97 - 44: K BININT1 98 - 46: K BININT1 99 - 48: e APPENDS (MARK at 41) - 49: t TUPLE (MARK at 37) - 50: q BINPUT 5 - 52: R REDUCE - 53: q BINPUT 6 - 55: X BINUNICODE 'def' - 63: q BINPUT 7 - 65: s SETITEM - 66: e APPENDS (MARK at 3) - 67: . STOP + 19: c GLOBAL '__builtin__ bytes' + 38: q BINPUT 3 + 40: ( MARK + 41: ] EMPTY_LIST + 42: q BINPUT 4 + 44: ( MARK + 45: K BININT1 97 + 47: K BININT1 98 + 49: K BININT1 99 + 51: e APPENDS (MARK at 44) + 52: t TUPLE (MARK at 40) + 53: q BINPUT 5 + 55: R REDUCE + 56: q BINPUT 6 + 58: X BINUNICODE 'def' + 66: q BINPUT 7 + 68: s SETITEM + 69: e APPENDS (MARK at 3) + 70: . STOP highest protocol among opcodes = 1 Exercise the INST/OBJ/BUILD family. @@ -2141,58 +2141,58 @@ 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 - 5: c GLOBAL 'copyreg _reconstructor' - 29: p PUT 1 - 32: ( MARK - 33: c GLOBAL 'pickletools _Example' - 55: p PUT 2 - 58: c GLOBAL 'builtins object' - 75: p PUT 3 - 78: N NONE - 79: t TUPLE (MARK at 32) - 80: p PUT 4 - 83: R REDUCE - 84: p PUT 5 - 87: ( MARK - 88: d DICT (MARK at 87) - 89: p PUT 6 - 92: V UNICODE 'value' - 99: p PUT 7 - 102: L LONG 42 - 107: s SETITEM - 108: b BUILD - 109: a APPEND - 110: g GET 5 + 5: c GLOBAL 'copy_reg _reconstructor' + 30: p PUT 1 + 33: ( MARK + 34: c GLOBAL 'pickletools _Example' + 56: p PUT 2 + 59: c GLOBAL '__builtin__ object' + 79: p PUT 3 + 82: N NONE + 83: t TUPLE (MARK at 33) + 84: p PUT 4 + 87: R REDUCE + 88: p PUT 5 + 91: ( MARK + 92: d DICT (MARK at 91) + 93: p PUT 6 + 96: V UNICODE 'value' + 103: p PUT 7 + 106: L LONG 42 + 111: s SETITEM + 112: b BUILD 113: a APPEND - 114: . STOP + 114: g GET 5 + 117: a APPEND + 118: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK - 4: c GLOBAL 'copyreg _reconstructor' - 28: q BINPUT 1 - 30: ( MARK - 31: c GLOBAL 'pickletools _Example' - 53: q BINPUT 2 - 55: c GLOBAL 'builtins object' - 72: q BINPUT 3 - 74: N NONE - 75: t TUPLE (MARK at 30) - 76: q BINPUT 4 - 78: R REDUCE - 79: q BINPUT 5 - 81: } EMPTY_DICT - 82: q BINPUT 6 - 84: X BINUNICODE 'value' - 94: q BINPUT 7 - 96: K BININT1 42 - 98: s SETITEM - 99: b BUILD - 100: h BINGET 5 - 102: e APPENDS (MARK at 3) - 103: . STOP + 4: c GLOBAL 'copy_reg _reconstructor' + 29: q BINPUT 1 + 31: ( MARK + 32: c GLOBAL 'pickletools _Example' + 54: q BINPUT 2 + 56: c GLOBAL '__builtin__ object' + 76: q BINPUT 3 + 78: N NONE + 79: t TUPLE (MARK at 31) + 80: q BINPUT 4 + 82: R REDUCE + 83: q BINPUT 5 + 85: } EMPTY_DICT + 86: q BINPUT 6 + 88: X BINUNICODE 'value' + 98: q BINPUT 7 + 100: K BININT1 42 + 102: s SETITEM + 103: b BUILD + 104: h BINGET 5 + 106: e APPENDS (MARK at 3) + 107: . STOP highest protocol among opcodes = 1 Try "the canonical" recursive-object test. Index: Lib/test/pickletester.py =================================================================== --- Lib/test/pickletester.py (revision 73195) +++ Lib/test/pickletester.py (working copy) @@ -3,6 +3,7 @@ import pickle import pickletools import copyreg +from http.cookies import SimpleCookie from test.support import TestFailed, TESTFN, run_with_locale @@ -342,6 +343,24 @@ highest protocol among opcodes = 2 """ +# set([1,2]) pickled from 2.x with protocol 2 +DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' + +# xrange(5) pickled from 2.x with protocol 2 +DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' + +# a SimpleCookie() object pickled from 2.x with protocol 2 +DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' + b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' + b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' + b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' + b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' + b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') + +# set([3]) pickled from 2.x with protocol 2 +DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.' + + def create_data(): c = C() c.foo = 1 @@ -956,6 +975,29 @@ for x_key, y_key in zip(x_keys, y_keys): self.assertIs(x_key, y_key) + def test_unpickle_from_2x(self): + # Unpickle non-trivial data from Python 2.x. + loaded = self.loads(DATA3) + self.assertEqual(loaded, set([1, 2])) + loaded = self.loads(DATA4) + self.assertEqual(type(loaded), type(range(0))) + self.assertEqual(list(loaded), list(range(5))) + loaded = self.loads(DATA5) + self.assertEqual(type(loaded), SimpleCookie) + self.assertEqual(list(loaded.keys()), ["key"]) + self.assertEqual(loaded["key"].value, "Set-Cookie: key=value") + + def test_pickle_to_2x(self): + # Pickle non-trivial data with protocol 2, expecting that it yields + # the same result as Python 2.x did. + # NOTE: this test is a bit too strong since we can produce different + # bytecode that 2.x will still understand. + dumped = self.dumps(range(5), 2) + self.assertEqual(dumped, DATA4) + dumped = self.dumps(set([3]), 2) + self.assertEqual(dumped, DATA6) + + # Test classes for reduce_ex class REX_one(object): Index: Lib/test/test_pickletools.py =================================================================== --- Lib/test/test_pickletools.py (revision 73195) +++ Lib/test/test_pickletools.py (working copy) @@ -12,7 +12,10 @@ def loads(self, buf): return pickle.loads(buf) + # Test relies on precise output of dumps() + test_pickle_to_2x = None + def test_main(): support.run_unittest(OptimizedPickleTests) support.run_doctest(pickletools) Index: Lib/pickle.py =================================================================== --- Lib/pickle.py (revision 73195) +++ Lib/pickle.py (working copy) @@ -34,6 +34,7 @@ import re import io import codecs +import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,12 +172,11 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) - # Pickling machinery class _Pickler: - def __init__(self, file, protocol=None): + def __init__(self, file, protocol=None, *, fix_imports=True): """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the @@ -193,6 +193,10 @@ bytes argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. + + If fix_imports is True and protocol is less than 3, pickle will try to + map the new Python 3.x names to the old module names used in Python + 2.x, so that the pickle data stream is readable with Python 2.x. """ if protocol is None: protocol = DEFAULT_PROTOCOL @@ -208,6 +212,7 @@ self.proto = int(protocol) self.bin = protocol >= 1 self.fast = 0 + self.fix_imports = fix_imports and protocol < 3 def clear_memo(self): """Clears the pickler's "memo". @@ -698,6 +703,11 @@ write(GLOBAL + bytes(module, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: + if self.fix_imports: + if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: + module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)] + if module in _compat_pickle.REVERSE_IMPORT_MAPPING: + module = _compat_pickle.REVERSE_IMPORT_MAPPING[module] try: write(GLOBAL + bytes(module, "ascii") + b'\n' + bytes(name, "ascii") + b'\n') @@ -766,7 +776,8 @@ class _Unpickler: - def __init__(self, file, *, encoding="ASCII", errors="strict"): + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no @@ -779,15 +790,21 @@ reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are - used to decode 8-bit string instances pickled by Python 2.x. - These default to 'ASCII' and 'strict', respectively. + Optional keyword arguments are *fix_imports*, *encoding* and *errors*, + which are used to control compatiblity support for pickle stream + generated by Python 2.x. If *fix_imports* is True, pickle will try to + map the old Python 2.x names to the new names used in Python 3.x. The + *encoding* and *errors* tell pickle how to decode 8-bit string + instances pickled by Python 2.x; these default to 'ASCII' and + 'strict', respectively. """ self.readline = file.readline self.read = file.read self.memo = {} self.encoding = encoding self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. @@ -838,6 +855,7 @@ proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) + self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -1088,7 +1106,12 @@ self.append(obj) def find_class(self, module, name): - # Subclasses may override this + # Subclasses may override this. + if self.proto < 3 and self.fix_imports: + if (module, name) in _compat_pickle.NAME_MAPPING: + module, name = _compat_pickle.NAME_MAPPING[(module, name)] + if module in _compat_pickle.IMPORT_MAPPING: + module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) @@ -1327,27 +1350,28 @@ # Shorthands -def dump(obj, file, protocol=None): - Pickler(file, protocol).dump(obj) +def dump(obj, file, protocol=None, *, fix_imports=True): + Pickler(file, protocol, fix_imports=fix_imports).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, *, fix_imports=True): f = io.BytesIO() - Pickler(f, protocol).dump(obj) + Pickler(f, protocol, fix_imports=fix_imports).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res -def load(file, *, encoding="ASCII", errors="strict"): - return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, encoding=encoding, errors=errors).load() + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Doctest - def _test(): import doctest return doctest.testmod() Index: Modules/_pickle.c =================================================================== --- Modules/_pickle.c (revision 73195) +++ Modules/_pickle.c (working copy) @@ -103,25 +103,33 @@ /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping_2to3 = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping_2to3 = NULL; +/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ +static PyObject *name_mapping_3to2 = NULL; +static PyObject *import_mapping_3to2 = NULL; + /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL; static int stack_underflow(void) @@ -315,6 +323,8 @@ should not be used if with self-referential objects. */ int fast_nesting; + int fix_imports; /* Indicate whether Pickler should fix + the name of globals for Python 2.x. */ PyObject *fast_memo; } PicklerObject; @@ -340,6 +350,9 @@ objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ + int proto; /* Protocol of the pickle loaded. */ + int fix_imports; /* Indicate whether Unpickler should fix + the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ @@ -1972,6 +1985,63 @@ unicode_encoder = PyUnicode_AsASCIIString; } + /* For protocol < 3 and if the user didn't request against doing so, + we convert module names to the old 2.x module names. */ + if (self->fix_imports) { + PyObject *key; + PyObject *item; + + key = PyTuple_Pack(2, module_name, global_name); + if (key == NULL) + goto error; + item = PyDict_GetItemWithError(name_mapping_3to2, key); + Py_DECREF(key); + if (item) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be 2-tuples, not %.200s", + Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + Py_CLEAR(global_name); + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(module_name) || + !PyUnicode_Check(global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values " + "should be pairs of str, not (%.200s, %.200s)", + Py_TYPE(module_name)->tp_name, + Py_TYPE(global_name)->tp_name); + goto error; + } + Py_INCREF(module_name); + Py_INCREF(global_name); + } + else if (PyErr_Occurred()) { + goto error; + } + + item = PyDict_GetItemWithError(import_mapping_3to2, module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING values " + "should be strings, not %.200s", + Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + module_name = item; + Py_INCREF(module_name); + } + else if (PyErr_Occurred()) { + goto error; + } + } + /* Save the name of the module. */ encoded = unicode_encoder(module_name); if (encoded == NULL) { @@ -2608,18 +2678,23 @@ "The file argument must have a write() method that accepts a single\n" "bytes argument. It can thus be a file object opened for binary\n" "writing, a io.BytesIO instance, or any other custom object that\n" -"meets this interface.\n"); +"meets this interface.\n" +"\n" +"If fix_imports is True and protocol is less than 3, pickle will try to\n" +"map the new Python 3.x names to the old module names used in Python\n" +"2.x, so that the pickle data stream is readable with Python 2.x.\n"); static int Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "protocol", 0}; + static char *kwlist[] = {"file", "protocol", "fix_imports", 0}; PyObject *file; PyObject *proto_obj = NULL; long proto = 0; + int fix_imports = 1; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", - kwlist, &file, &proto_obj)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler", + kwlist, &file, &proto_obj, &fix_imports)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -2628,8 +2703,11 @@ if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; - else + else { proto = PyLong_AsLong(proto_obj); + if (proto == -1 && PyErr_Occurred()) + return -1; + } if (proto < 0) proto = HIGHEST_PROTOCOL; @@ -2639,12 +2717,13 @@ return -1; } - self->proto = proto; - self->bin = proto > 0; - self->arg = NULL; - self->fast = 0; - self->fast_nesting = 0; - self->fast_memo = NULL; + self->proto = proto; + self->bin = proto > 0; + self->arg = NULL; + self->fast = 0; + self->fast_nesting = 0; + self->fast_memo = NULL; + self->fix_imports = fix_imports && proto < 3; if (!PyObject_HasAttrString(file, "write")) { PyErr_SetString(PyExc_TypeError, @@ -4220,8 +4299,10 @@ return -1; i = (unsigned char)s[0]; - if (i <= HIGHEST_PROTOCOL) + if (i <= HIGHEST_PROTOCOL) { + self->proto = i; return 0; + } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; @@ -4383,12 +4464,67 @@ &module_name, &global_name)) return NULL; + /* Try to map the old names used in Python 2.x to the new ones used in + Python 3.x. We do this only with old pickle protocols and when the + user has not disabled the feature. */ + if (self->proto < 3 && self->fix_imports) { + PyObject *key; + PyObject *item; + + /* Check if the global (i.e., a function or a class) was renamed + or moved to another module. */ + key = PyTuple_Pack(2, module_name, global_name); + if (key == NULL) + return NULL; + item = PyDict_GetItemWithError(name_mapping_2to3, key); + Py_DECREF(key); + if (item) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "2-tuples, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + if (!PyUnicode_Check(module_name) || + !PyUnicode_Check(global_name)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "pairs of str, not (%.200s, %.200s)", + Py_TYPE(module_name)->tp_name, + Py_TYPE(global_name)->tp_name); + return NULL; + } + } + else if (PyErr_Occurred()) { + return NULL; + } + + /* Check if the module was renamed. */ + item = PyDict_GetItemWithError(import_mapping_2to3, module_name); + if (item) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = item; + } + else if (PyErr_Occurred()) { + return NULL; + } + } + modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; - module = PyDict_GetItem(modules_dict, module_name); + module = PyDict_GetItemWithError(modules_dict, module_name); if (module == NULL) { + if (PyErr_Occurred()) + return NULL; module = PyImport_Import(module_name); if (module == NULL) return NULL; @@ -4477,15 +4613,20 @@ "reading, a BytesIO object, or any other custom object that\n" "meets this interface.\n" "\n" -"Optional keyword arguments are encoding and errors, which are\n" -"used to decode 8-bit string instances pickled by Python 2.x.\n" -"These default to 'ASCII' and 'strict', respectively.\n"); +"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n" +"which are used to control compatiblity support for pickle stream\n" +"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n" +"map the old Python 2.x names to the new names used in Python 3.x. The\n" +"*encoding* and *errors* tell pickle how to decode 8-bit string\n" +"instances pickled by Python 2.x; these default to 'ASCII' and\n" +"'strict', respectively.\n"); static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "encoding", "errors", 0}; + static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; + int fix_imports = 1; char *encoding = NULL; char *errors = NULL; @@ -4504,8 +4645,8 @@ extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, + &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4690,8 @@ self->last_string = NULL; self->arg = NULL; + self->proto = 0; + self->fix_imports = fix_imports; return 0; } @@ -4672,40 +4815,85 @@ }; static int -init_stuff(void) +initmodule(void) { - PyObject *copyreg; + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + /* XXX: We should ensure that the types of the dictionaries imported are + exactly PyDict objects. Otherwise, it is possible to crash the pickle + since we use the PyDict API directly to access these dictionaries. */ + copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) - return -1; - + goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; - extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; + Py_CLEAR(copyreg); - Py_DECREF(copyreg); + /* Load the 2.x -> 3.x stdlib module mapping tables */ + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!name_mapping_2to3) + goto error; + if (!PyDict_CheckExact(name_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(name_mapping_2to3)->tp_name); + goto error; + } + import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, + "IMPORT_MAPPING"); + if (!import_mapping_2to3) + goto error; + if (!PyDict_CheckExact(import_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name); + goto error; + } + /* ... and the 3.x -> 2.x mapping tables */ + name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_NAME_MAPPING"); + if (!name_mapping_3to2) + goto error; + if (!PyDict_CheckExact(name_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, " + "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name); + goto error; + } + import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_IMPORT_MAPPING"); + if (!import_mapping_3to2) + goto error; + if (!PyDict_CheckExact(import_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, " + "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name); + goto error; + } + Py_CLEAR(compat_pickle); empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) - return -1; - + goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) - return -1; + goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. @@ -4715,7 +4903,18 @@ return 0; error: - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + Py_CLEAR(dispatch_table); + Py_CLEAR(extension_registry); + Py_CLEAR(inverted_registry); + Py_CLEAR(extension_cache); + Py_CLEAR(compat_pickle); + Py_CLEAR(name_mapping_2to3); + Py_CLEAR(import_mapping_2to3); + Py_CLEAR(name_mapping_3to2); + Py_CLEAR(import_mapping_3to2); + Py_CLEAR(empty_tuple); + Py_CLEAR(two_tuple); return -1; } @@ -4773,7 +4972,7 @@ if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; - if (init_stuff() < 0) + if (initmodule() < 0) return NULL; return m;