Index: Doc/library/pickle.rst =================================================================== --- Doc/library/pickle.rst (revision 73040) +++ Doc/library/pickle.rst (working copy) @@ -171,7 +171,7 @@ supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. -.. function:: load(file, [\*, encoding="ASCII", errors="strict"]) +.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object representation from the open file object *file* and return the reconstituted object hierarchy specified therein. This is @@ -187,11 +187,14 @@ for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. -.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"]) +.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object hierarchy from a :class:`bytes` object and return the reconstituted object hierarchy specified therein @@ -200,9 +203,12 @@ argument is needed. Bytes past the pickled object's representation are ignored. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. The :mod:`pickle` module defines three exceptions: @@ -277,7 +283,7 @@ Use :func:`pickletools.optimize` if you need more compact pickles. -.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"]) +.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) This takes a binary file for reading a pickle data stream. @@ -290,9 +296,12 @@ for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. .. method:: load() Index: Lib/_compat_pickle.py =================================================================== --- Lib/_compat_pickle.py (revision 0) +++ Lib/_compat_pickle.py (revision 0) @@ -0,0 +1,76 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} Index: Lib/pickle.py =================================================================== --- Lib/pickle.py (revision 73040) +++ Lib/pickle.py (working copy) @@ -34,6 +34,7 @@ import re import io import codecs +import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,7 +172,6 @@ __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) - # Pickling machinery class _Pickler: @@ -766,7 +766,8 @@ class _Unpickler: - def __init__(self, file, *, encoding="ASCII", errors="strict"): + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no @@ -788,6 +789,8 @@ self.memo = {} self.encoding = encoding self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. @@ -838,6 +841,7 @@ proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) + self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -1088,7 +1092,12 @@ self.append(obj) def find_class(self, module, name): - # Subclasses may override this + # Subclasses may override this. + if self.proto < 3 and self.fix_imports: + if (module, name) in _compat_pickle.NAME_MAPPING: + module, name = _compat_pickle.NAME_MAPPING[(module, name)] + if module in _compat_pickle.IMPORT_MAPPING: + module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) @@ -1337,17 +1346,18 @@ assert isinstance(res, bytes_types) return res -def load(file, *, encoding="ASCII", errors="strict"): - return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, encoding=encoding, errors=errors).load() + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Doctest - def _test(): import doctest return doctest.testmod() Index: Modules/_pickle.c =================================================================== --- Modules/_pickle.c (revision 73040) +++ Modules/_pickle.c (working copy) @@ -103,25 +103,30 @@ /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping = NULL; + /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL; static int stack_underflow(void) @@ -340,6 +345,9 @@ objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ + int proto; /* Protocol of the pickle loaded. */ + int fix_imports; /* Indicate whether Unpickler should fixes + the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ @@ -2628,8 +2636,11 @@ if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; - else + else { proto = PyLong_AsLong(proto_obj); + if (proto == -1 && PyErr_Occurred()) + return -1; + } if (proto < 0) proto = HIGHEST_PROTOCOL; @@ -4220,8 +4231,10 @@ return -1; i = (unsigned char)s[0]; - if (i <= HIGHEST_PROTOCOL) + if (i <= HIGHEST_PROTOCOL) { + self->proto = i; return 0; + } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; @@ -4378,11 +4391,36 @@ PyObject *modules_dict; PyObject *module; PyObject *module_name, *global_name; + int result; if (!PyArg_UnpackTuple(args, "find_class", 2, 2, &module_name, &global_name)) return NULL; + /* Try to map the old names used in Python 2.x to the new ones used in + Python 3.x. We do this only with old pickle protocols and when the user + has not disabled the feature. */ + if (self->proto < 3 && self->fix_imports) { + PyTuple_SET_ITEM(two_tuple, 0, module_name); + PyTuple_SET_ITEM(two_tuple, 1, global_name); + result = PyDict_Contains(name_mapping, two_tuple); + if (result) { + PyObject *item; + if (result < 0) + return NULL; + item = PyDict_GetItem(name_mapping, two_tuple); + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + } + + result = PyDict_Contains(import_mapping, module_name); + if (result) { + if (result < 0) + return NULL; + module_name = PyDict_GetItem(import_mapping, module_name); + } + } + modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; @@ -4484,8 +4522,9 @@ static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "encoding", "errors", 0}; + static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; + int fix_imports = 1; char *encoding = NULL; char *errors = NULL; @@ -4504,8 +4543,8 @@ extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, + &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4588,8 @@ self->last_string = NULL; self->arg = NULL; + self->proto = 0; + self->fix_imports = fix_imports; return 0; } @@ -4672,40 +4713,50 @@ }; static int -init_stuff(void) +initmodule(void) { - PyObject *copyreg; + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + /* XXX: We should ensure that the types of the dictionaries imported are + exactly PyDict objects. Otherwise, it is possible to crash the pickle + since we use the PyDict API directly to access these dictionaries. */ + copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) - return -1; - + goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; - extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; - Py_DECREF(copyreg); + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + name_mapping = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!name_mapping) + goto error; + import_mapping = PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); + if (!import_mapping) + goto error; + Py_DECREF(compat_pickle); + empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) - return -1; - + goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) - return -1; + goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. @@ -4715,7 +4766,16 @@ return 0; error: - Py_DECREF(copyreg); + Py_XDECREF(copyreg); + Py_XDECREF(dispatch_table); + Py_XDECREF(extension_registry); + Py_XDECREF(inverted_registry); + Py_XDECREF(extension_cache); + Py_XDECREF(compat_pickle); + Py_XDECREF(name_mapping); + Py_XDECREF(import_mapping); + Py_XDECREF(empty_tuple); + Py_XDECREF(two_tuple); return -1; } @@ -4773,7 +4833,7 @@ if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; - if (init_stuff() < 0) + if (initmodule() < 0) return NULL; return m;