# HG changeset patch # User Antoine Pitrou # Date 1244069467 -7200 diff -r 4d41e2e9665f -r bec60b4d6b4b Doc/library/pickle.rst --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -141,7 +141,7 @@ an unpickler, then you call the unpickle The :mod:`pickle` module provides the following functions to make the pickling process more convenient: -.. function:: dump(obj, file[, protocol]) +.. function:: dump(obj, file[, protocol, \*, fix_imports=True]) Write a pickled representation of *obj* to the open file object *file*. This is equivalent to ``Pickler(file, protocol).dump(obj)``. @@ -158,7 +158,11 @@ process more convenient: argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. -.. function:: dumps(obj[, protocol]) + If *fix_imports* is True and *protocol* is 2 or lower, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle is readable under Python 2.x. + +.. function:: dumps(obj[, protocol, \*, fix_imports=True]) Return the pickled representation of the object as a :class:`bytes` object, instead of writing it to a file. @@ -171,7 +175,11 @@ process more convenient: supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. -.. function:: load(file, [\*, encoding="ASCII", errors="strict"]) + If *fix_imports* is True and *protocol* is 2 or lower, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle is readable under Python 2.x. + +.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object representation from the open file object *file* and return the reconstituted object hierarchy specified therein. This is @@ -187,11 +195,14 @@ process more convenient: for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. -.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"]) +.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object hierarchy from a :class:`bytes` object and return the reconstituted object hierarchy specified therein @@ -200,9 +211,12 @@ process more convenient: argument is needed. Bytes past the pickled object's representation are ignored. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. The :mod:`pickle` module defines three exceptions: @@ -233,7 +247,7 @@ The :mod:`pickle` module defines three e The :mod:`pickle` module exports two classes, :class:`Pickler` and :class:`Unpickler`: -.. class:: Pickler(file[, protocol]) +.. class:: Pickler(file[, protocol, \*, fix_imports=True]) This takes a binary file for writing a pickle data stream. @@ -249,6 +263,10 @@ The :mod:`pickle` module exports two cla argument. It can thus be a file object opened for binary writing, a io.BytesIO instance, or any other custom object that meets this interface. + If *fix_imports* is True and *protocol* is 2 or lower, pickle will try to + map the new Python 3.x names to the old module names used in Python 2.x, + so that the pickle is readable under Python 2.x. + .. method:: dump(obj) Write a pickled representation of *obj* to the open file object given in @@ -277,7 +295,7 @@ The :mod:`pickle` module exports two cla Use :func:`pickletools.optimize` if you need more compact pickles. -.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"]) +.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) This takes a binary file for reading a pickle data stream. @@ -290,9 +308,12 @@ The :mod:`pickle` module exports two cla for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. .. method:: load() diff -r 4d41e2e9665f -r bec60b4d6b4b Lib/_compat_pickle.py --- /dev/null +++ b/Lib/_compat_pickle.py @@ -0,0 +1,81 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} + +# Same, but for 3.x to 2.x +REVERSE_IMPORT_MAPPING = dict((v, k) for (k, v) in IMPORT_MAPPING.items()) +REVERSE_NAME_MAPPING = dict((v, k) for (k, v) in NAME_MAPPING.items()) diff -r 4d41e2e9665f -r bec60b4d6b4b Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -34,6 +34,7 @@ import struct import re import io import codecs +import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,12 +172,11 @@ SHORT_BINBYTES = b'C' # " " ; __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) - # Pickling machinery class _Pickler: - def __init__(self, file, protocol=None): + def __init__(self, file, protocol=None, *, fix_imports=True): """This takes a binary file for writing a pickle data stream. The optional protocol argument tells the pickler to use the @@ -208,6 +208,7 @@ class _Pickler: self.proto = int(protocol) self.bin = protocol >= 1 self.fast = 0 + self.fix_imports = fix_imports def clear_memo(self): """Clears the pickler's "memo". @@ -698,6 +699,11 @@ class _Pickler: write(GLOBAL + bytes(module, "utf-8") + b'\n' + bytes(name, "utf-8") + b'\n') else: + if self.fix_imports: + if (module, name) in _compat_pickle.REVERSE_NAME_MAPPING: + module, name = _compat_pickle.REVERSE_NAME_MAPPING[(module, name)] + if module in _compat_pickle.REVERSE_IMPORT_MAPPING: + module = _compat_pickle.REVERSE_IMPORT_MAPPING[module] try: write(GLOBAL + bytes(module, "ascii") + b'\n' + bytes(name, "ascii") + b'\n') @@ -766,7 +772,8 @@ def whichmodule(func, funcname): class _Unpickler: - def __init__(self, file, *, encoding="ASCII", errors="strict"): + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no @@ -788,6 +795,8 @@ class _Unpickler: self.memo = {} self.encoding = encoding self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. @@ -838,6 +847,7 @@ class _Unpickler: proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) + self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -1088,7 +1098,12 @@ class _Unpickler: self.append(obj) def find_class(self, module, name): - # Subclasses may override this + # Subclasses may override this. + if self.proto < 3 and self.fix_imports: + if (module, name) in _compat_pickle.NAME_MAPPING: + module, name = _compat_pickle.NAME_MAPPING[(module, name)] + if module in _compat_pickle.IMPORT_MAPPING: + module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) @@ -1327,27 +1342,28 @@ except ImportError: # Shorthands -def dump(obj, file, protocol=None): - Pickler(file, protocol).dump(obj) +def dump(obj, file, protocol=None, *, fix_imports=True): + Pickler(file, protocol, fix_imports=fix_imports).dump(obj) -def dumps(obj, protocol=None): +def dumps(obj, protocol=None, *, fix_imports=True): f = io.BytesIO() - Pickler(f, protocol).dump(obj) + Pickler(f, protocol, fix_imports=fix_imports).dump(obj) res = f.getvalue() assert isinstance(res, bytes_types) return res -def load(file, *, encoding="ASCII", errors="strict"): - return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, encoding=encoding, errors=errors).load() + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Doctest - def _test(): import doctest return doctest.testmod() diff -r 4d41e2e9665f -r bec60b4d6b4b Lib/pickletools.py --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2066,27 +2066,27 @@ _dis_test = r""" 29: ( MARK 30: d DICT (MARK at 29) 31: p PUT 2 - 34: c GLOBAL 'builtins bytes' - 50: p PUT 3 - 53: ( MARK - 54: ( MARK - 55: l LIST (MARK at 54) - 56: p PUT 4 - 59: L LONG 97 - 64: a APPEND - 65: L LONG 98 - 70: a APPEND - 71: L LONG 99 - 76: a APPEND - 77: t TUPLE (MARK at 53) - 78: p PUT 5 - 81: R REDUCE - 82: p PUT 6 - 85: V UNICODE 'def' - 90: p PUT 7 - 93: s SETITEM - 94: a APPEND - 95: . STOP + 34: c GLOBAL '__builtin__ bytes' + 53: p PUT 3 + 56: ( MARK + 57: ( MARK + 58: l LIST (MARK at 57) + 59: p PUT 4 + 62: L LONG 97 + 67: a APPEND + 68: L LONG 98 + 73: a APPEND + 74: L LONG 99 + 79: a APPEND + 80: t TUPLE (MARK at 56) + 81: p PUT 5 + 84: R REDUCE + 85: p PUT 6 + 88: V UNICODE 'def' + 93: p PUT 7 + 96: s SETITEM + 97: a APPEND + 98: . STOP highest protocol among opcodes = 0 Try again with a "binary" pickle. @@ -2105,25 +2105,25 @@ Try again with a "binary" pickle. 14: q BINPUT 1 16: } EMPTY_DICT 17: q BINPUT 2 - 19: c GLOBAL 'builtins bytes' - 35: q BINPUT 3 - 37: ( MARK - 38: ] EMPTY_LIST - 39: q BINPUT 4 - 41: ( MARK - 42: K BININT1 97 - 44: K BININT1 98 - 46: K BININT1 99 - 48: e APPENDS (MARK at 41) - 49: t TUPLE (MARK at 37) - 50: q BINPUT 5 - 52: R REDUCE - 53: q BINPUT 6 - 55: X BINUNICODE 'def' - 63: q BINPUT 7 - 65: s SETITEM - 66: e APPENDS (MARK at 3) - 67: . STOP + 19: c GLOBAL '__builtin__ bytes' + 38: q BINPUT 3 + 40: ( MARK + 41: ] EMPTY_LIST + 42: q BINPUT 4 + 44: ( MARK + 45: K BININT1 97 + 47: K BININT1 98 + 49: K BININT1 99 + 51: e APPENDS (MARK at 44) + 52: t TUPLE (MARK at 40) + 53: q BINPUT 5 + 55: R REDUCE + 56: q BINPUT 6 + 58: X BINUNICODE 'def' + 66: q BINPUT 7 + 68: s SETITEM + 69: e APPENDS (MARK at 3) + 70: . STOP highest protocol among opcodes = 1 Exercise the INST/OBJ/BUILD family. @@ -2141,58 +2141,58 @@ highest protocol among opcodes = 0 0: ( MARK 1: l LIST (MARK at 0) 2: p PUT 0 - 5: c GLOBAL 'copyreg _reconstructor' - 29: p PUT 1 - 32: ( MARK - 33: c GLOBAL 'pickletools _Example' - 55: p PUT 2 - 58: c GLOBAL 'builtins object' - 75: p PUT 3 - 78: N NONE - 79: t TUPLE (MARK at 32) - 80: p PUT 4 - 83: R REDUCE - 84: p PUT 5 - 87: ( MARK - 88: d DICT (MARK at 87) - 89: p PUT 6 - 92: V UNICODE 'value' - 99: p PUT 7 - 102: L LONG 42 - 107: s SETITEM - 108: b BUILD - 109: a APPEND - 110: g GET 5 + 5: c GLOBAL 'copy_reg _reconstructor' + 30: p PUT 1 + 33: ( MARK + 34: c GLOBAL 'pickletools _Example' + 56: p PUT 2 + 59: c GLOBAL '__builtin__ object' + 79: p PUT 3 + 82: N NONE + 83: t TUPLE (MARK at 33) + 84: p PUT 4 + 87: R REDUCE + 88: p PUT 5 + 91: ( MARK + 92: d DICT (MARK at 91) + 93: p PUT 6 + 96: V UNICODE 'value' + 103: p PUT 7 + 106: L LONG 42 + 111: s SETITEM + 112: b BUILD 113: a APPEND - 114: . STOP + 114: g GET 5 + 117: a APPEND + 118: . STOP highest protocol among opcodes = 0 >>> dis(pickle.dumps(x, 1)) 0: ] EMPTY_LIST 1: q BINPUT 0 3: ( MARK - 4: c GLOBAL 'copyreg _reconstructor' - 28: q BINPUT 1 - 30: ( MARK - 31: c GLOBAL 'pickletools _Example' - 53: q BINPUT 2 - 55: c GLOBAL 'builtins object' - 72: q BINPUT 3 - 74: N NONE - 75: t TUPLE (MARK at 30) - 76: q BINPUT 4 - 78: R REDUCE - 79: q BINPUT 5 - 81: } EMPTY_DICT - 82: q BINPUT 6 - 84: X BINUNICODE 'value' - 94: q BINPUT 7 - 96: K BININT1 42 - 98: s SETITEM - 99: b BUILD - 100: h BINGET 5 - 102: e APPENDS (MARK at 3) - 103: . STOP + 4: c GLOBAL 'copy_reg _reconstructor' + 29: q BINPUT 1 + 31: ( MARK + 32: c GLOBAL 'pickletools _Example' + 54: q BINPUT 2 + 56: c GLOBAL '__builtin__ object' + 76: q BINPUT 3 + 78: N NONE + 79: t TUPLE (MARK at 31) + 80: q BINPUT 4 + 82: R REDUCE + 83: q BINPUT 5 + 85: } EMPTY_DICT + 86: q BINPUT 6 + 88: X BINUNICODE 'value' + 98: q BINPUT 7 + 100: K BININT1 42 + 102: s SETITEM + 103: b BUILD + 104: h BINGET 5 + 106: e APPENDS (MARK at 3) + 107: . STOP highest protocol among opcodes = 1 Try "the canonical" recursive-object test. diff -r 4d41e2e9665f -r bec60b4d6b4b Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3,6 +3,7 @@ import unittest import pickle import pickletools import copyreg +from http.cookies import SimpleCookie from test.support import TestFailed, TESTFN, run_with_locale @@ -342,6 +343,24 @@ DATA2_DIS = """\ highest protocol among opcodes = 2 """ +# set([1,2]) pickled from 2.x with protocol 2 +DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' + +# xrange(5) pickled from 2.x with protocol 2 +DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' + +# a SimpleCookie() object pickled from 2.x with protocol 2 +DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' + b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' + b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' + b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' + b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' + b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') + +# set([3]) pickled from 2.x with protocol 2 +DATA6 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01K\x03a\x85q\x02Rq\x03.' + + def create_data(): c = C() c.foo = 1 @@ -956,6 +975,29 @@ class AbstractPickleTests(unittest.TestC for x_key, y_key in zip(x_keys, y_keys): self.assertIs(x_key, y_key) + def test_unpickle_from_2x(self): + # Unpickle non-trivial data from Python 2.x. + loaded = self.loads(DATA3) + self.assertEqual(loaded, set([1, 2])) + loaded = self.loads(DATA4) + self.assertEqual(type(loaded), type(range(0))) + self.assertEqual(list(loaded), list(range(5))) + loaded = self.loads(DATA5) + self.assertEqual(type(loaded), SimpleCookie) + self.assertEqual(list(loaded.keys()), ["key"]) + self.assertEqual(loaded["key"].value, "Set-Cookie: key=value") + + def test_pickle_to_2x(self): + # Pickle non-trivial data with protocol 2, expecting that it yields + # the same result as Python 2.x did. + # NOTE: this test is a bit too strong since we can produce different + # bytecode that 2.x will still understand. + dumped = self.dumps(range(5), 2) + self.assertEqual(dumped, DATA4) + dumped = self.dumps(set([3]), 2) + self.assertEqual(dumped, DATA6) + + # Test classes for reduce_ex class REX_one(object): diff -r 4d41e2e9665f -r bec60b4d6b4b Lib/test/test_pickletools.py --- a/Lib/test/test_pickletools.py +++ b/Lib/test/test_pickletools.py @@ -12,6 +12,9 @@ class OptimizedPickleTests(AbstractPickl def loads(self, buf): return pickle.loads(buf) + # Test relies on precise output of dumps() + test_pickle_to_2x = None + def test_main(): support.run_unittest(OptimizedPickleTests) diff -r 4d41e2e9665f -r bec60b4d6b4b Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -103,25 +103,33 @@ enum { /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; + +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping_2to3 = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping_2to3 = NULL; +/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */ +static PyObject *name_mapping_3to2 = NULL; +static PyObject *import_mapping_3to2 = NULL; /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL; static int stack_underflow(void) @@ -315,6 +323,8 @@ typedef struct PicklerObject { should not be used if with self-referential objects. */ int fast_nesting; + int fix_imports; /* Indicate whether Pickler should fix + the name of globals for Python 2.x. */ PyObject *fast_memo; } PicklerObject; @@ -340,6 +350,9 @@ typedef struct UnpicklerObject { objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ + int proto; /* Protocol of the pickle loaded. */ + int fix_imports; /* Indicate whether Unpickler should fix + the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ @@ -1972,6 +1985,43 @@ save_global(PicklerObject *self, PyObjec unicode_encoder = PyUnicode_AsASCIIString; } + /* For protocol <= 2 and if the user didn't request against doing so, + we convert module names to the old 2.x module names. */ + if (self->proto < 3 && self->fix_imports) { + PyObject *key = Py_BuildValue("(OO)", module_name, global_name); + PyObject *item; + if (key == NULL) + goto error; + item = PyDict_GetItem(name_mapping_3to2, key); + Py_DECREF(key); + if (item != NULL) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING values should be " + "2-tuples, not %.200s", Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + Py_CLEAR(global_name); + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + Py_INCREF(module_name); + Py_INCREF(global_name); + } + item = PyDict_GetItem(import_mapping_3to2, module_name); + if (item != NULL) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + goto error; + } + Py_CLEAR(module_name); + module_name = item; + Py_INCREF(module_name); + } + } + /* Save the name of the module. */ encoded = unicode_encoder(module_name); if (encoded == NULL) { @@ -2613,13 +2663,14 @@ PyDoc_STRVAR(Pickler_doc, static int Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "protocol", 0}; + static char *kwlist[] = {"file", "protocol", "fix_imports", 0}; PyObject *file; PyObject *proto_obj = NULL; long proto = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", - kwlist, &file, &proto_obj)) + int fix_imports = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler", + kwlist, &file, &proto_obj, &fix_imports)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -2628,8 +2679,11 @@ Pickler_init(PicklerObject *self, PyObje if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; - else + else { proto = PyLong_AsLong(proto_obj); + if (proto == -1 && PyErr_Occurred()) + return -1; + } if (proto < 0) proto = HIGHEST_PROTOCOL; @@ -2639,12 +2693,13 @@ Pickler_init(PicklerObject *self, PyObje return -1; } - self->proto = proto; - self->bin = proto > 0; - self->arg = NULL; - self->fast = 0; - self->fast_nesting = 0; - self->fast_memo = NULL; + self->proto = proto; + self->bin = proto > 0; + self->arg = NULL; + self->fast = 0; + self->fast_nesting = 0; + self->fast_memo = NULL; + self->fix_imports = fix_imports; if (!PyObject_HasAttrString(file, "write")) { PyErr_SetString(PyExc_TypeError, @@ -4220,8 +4275,10 @@ load_proto(UnpicklerObject *self) return -1; i = (unsigned char)s[0]; - if (i <= HIGHEST_PROTOCOL) - return 0; + if (i <= HIGHEST_PROTOCOL) { + self->proto = i; + return 0; + } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; @@ -4383,6 +4440,38 @@ Unpickler_find_class(UnpicklerObject *se &module_name, &global_name)) return NULL; + /* Try to map the old names used in Python 2.x to the new ones used in + Python 3.x. We do this only with old pickle protocols and when the user + has not disabled the feature. */ + if (self->proto < 3 && self->fix_imports) { + PyObject *key = Py_BuildValue("(OO)", module_name, global_name); + PyObject *item; + if (key == NULL) + return NULL; + item = PyDict_GetItem(name_mapping_2to3, key); + Py_DECREF(key); + if (item != NULL) { + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "2-tuples, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + } + item = PyDict_GetItem(import_mapping_2to3, module_name); + if (item != NULL) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = item; + } + } + modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; @@ -4484,8 +4573,9 @@ PyDoc_STRVAR(Unpickler_doc, static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "encoding", "errors", 0}; + static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; + int fix_imports = 1; char *encoding = NULL; char *errors = NULL; @@ -4504,8 +4594,8 @@ Unpickler_init(UnpicklerObject *self, Py extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, + &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4639,8 @@ Unpickler_init(UnpicklerObject *self, Py self->last_string = NULL; self->arg = NULL; + self->proto = 0; + self->fix_imports = fix_imports; return 0; } @@ -4672,40 +4764,84 @@ static PyTypeObject Unpickler_Type = { }; static int -init_stuff(void) -{ - PyObject *copyreg; +initmodule(void) +{ + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + + /* XXX: We should ensure that the types of the dictionaries imported are + exactly PyDict objects. Otherwise, it is possible to crash the pickle + since we use the PyDict API directly to access these dictionaries. */ copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) - return -1; - + goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; - extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; - - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + + /* Load the 2.x -> 3.x stdlib module mapping tables */ + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!name_mapping_2to3) + goto error; + if (!PyDict_CheckExact(name_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(name_mapping_2to3)->tp_name); + goto error; + } + import_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); + if (!import_mapping_2to3) + goto error; + if (!PyDict_CheckExact(import_mapping_2to3)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING should be a dict, not %.200s", + Py_TYPE(import_mapping_2to3)->tp_name); + goto error; + } + /* ... and the 3.x -> 2.x mapping tables */ + name_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_NAME_MAPPING"); + if (!name_mapping_3to2) + goto error; + if (!PyDict_CheckExact(name_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(name_mapping_3to2)->tp_name); + goto error; + } + import_mapping_3to2 = PyObject_GetAttrString(compat_pickle, + "REVERSE_IMPORT_MAPPING"); + if (!import_mapping_3to2) + goto error; + if (!PyDict_CheckExact(import_mapping_3to2)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, not %.200s", + Py_TYPE(import_mapping_3to2)->tp_name); + goto error; + } + Py_CLEAR(compat_pickle); empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) - return -1; - + goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) - return -1; + goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. @@ -4715,7 +4851,18 @@ init_stuff(void) return 0; error: - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + Py_CLEAR(dispatch_table); + Py_CLEAR(extension_registry); + Py_CLEAR(inverted_registry); + Py_CLEAR(extension_cache); + Py_CLEAR(compat_pickle); + Py_CLEAR(name_mapping_2to3); + Py_CLEAR(import_mapping_2to3); + Py_CLEAR(name_mapping_3to2); + Py_CLEAR(import_mapping_3to2); + Py_CLEAR(empty_tuple); + Py_CLEAR(two_tuple); return -1; } @@ -4773,7 +4920,7 @@ PyInit__pickle(void) if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; - if (init_stuff() < 0) + if (initmodule() < 0) return NULL; return m;