# HG changeset patch # User Antoine Pitrou # Date 1244064703 -7200 diff -r 4d41e2e9665f -r e0333a24b9e5 Doc/library/pickle.rst --- a/Doc/library/pickle.rst +++ b/Doc/library/pickle.rst @@ -171,7 +171,7 @@ process more convenient: supported. The higher the protocol used, the more recent the version of Python needed to read the pickle produced. -.. function:: load(file, [\*, encoding="ASCII", errors="strict"]) +.. function:: load(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object representation from the open file object *file* and return the reconstituted object hierarchy specified therein. This is @@ -187,11 +187,14 @@ process more convenient: for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. -.. function:: loads(bytes_object, [\*, encoding="ASCII", errors="strict"]) +.. function:: loads(bytes_object, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) Read a pickled object hierarchy from a :class:`bytes` object and return the reconstituted object hierarchy specified therein @@ -200,9 +203,12 @@ process more convenient: argument is needed. Bytes past the pickled object's representation are ignored. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. The :mod:`pickle` module defines three exceptions: @@ -277,7 +283,7 @@ The :mod:`pickle` module exports two cla Use :func:`pickletools.optimize` if you need more compact pickles. -.. class:: Unpickler(file, [\*, encoding="ASCII", errors="strict"]) +.. class:: Unpickler(file, [\*, fix_imports=True, encoding="ASCII", errors="strict"]) This takes a binary file for reading a pickle data stream. @@ -290,9 +296,12 @@ The :mod:`pickle` module exports two cla for reading, a BytesIO object, or any other custom object that meets this interface. - Optional keyword arguments are encoding and errors, which are used to decode - 8-bit string instances pickled by Python 2.x. These default to 'ASCII' and - 'strict', respectively. + Optional keyword arguments are fix_imports, encoding and errors, which are + used to control compatiblity support for pickle stream generated by + Python 2.x. If *fix_imports* is True, pickle will try to map the old + Python 2.x names to the new names used in Python 3.x. The *encoding* and + *errors* tell pickle how to decode 8-bit string instances pickled by + Python 2.x; these default to 'ASCII' and 'strict', respectively. .. method:: load() diff -r 4d41e2e9665f -r e0333a24b9e5 Lib/_compat_pickle.py --- /dev/null +++ b/Lib/_compat_pickle.py @@ -0,0 +1,76 @@ +# This module is used to map the old Python 2 names to the new names used in +# Python 3 for the pickle module. This needed to make pickle streams +# generated with Python 2 loadable by Python 3. + +# This is a copy of lib2to3.fixes.fix_imports.MAPPING. We cannot import +# lib2to3 and use the mapping defined there, because lib2to3 uses pickle. +# Thus, this could cause the module to be imported recursively. +IMPORT_MAPPING = { + 'StringIO': 'io', + 'cStringIO': 'io', + 'cPickle': 'pickle', + '__builtin__' : 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'SocketServer': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + 'FileDialog': 'tkinter.filedialog', + 'tkFileDialog': 'tkinter.filedialog', + 'SimpleDialog': 'tkinter.simpledialog', + 'tkSimpleDialog': 'tkinter.simpledialog', + 'tkColorChooser': 'tkinter.colorchooser', + 'tkCommonDialog': 'tkinter.commondialog', + 'Dialog': 'tkinter.dialog', + 'Tkdnd': 'tkinter.dnd', + 'tkFont': 'tkinter.font', + 'tkMessageBox': 'tkinter.messagebox', + 'ScrolledText': 'tkinter.scrolledtext', + 'Tkconstants': 'tkinter.constants', + 'Tix': 'tkinter.tix', + 'ttk': 'tkinter.ttk', + 'Tkinter': 'tkinter', + 'markupbase': '_markupbase', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + 'dbhash': 'dbm.bsd', + 'dumbdbm': 'dbm.dumb', + 'dbm': 'dbm.ndbm', + 'gdbm': 'dbm.gnu', + 'xmlrpclib': 'xmlrpc.client', + 'DocXMLRPCServer': 'xmlrpc.server', + 'SimpleXMLRPCServer': 'xmlrpc.server', + 'httplib': 'http.client', + 'htmlentitydefs' : 'html.entities', + 'HTMLParser' : 'html.parser', + 'Cookie': 'http.cookies', + 'cookielib': 'http.cookiejar', + 'BaseHTTPServer': 'http.server', + 'SimpleHTTPServer': 'http.server', + 'CGIHTTPServer': 'http.server', + 'test.test_support': 'test.support', + 'commands': 'subprocess', + 'UserString' : 'collections', + 'UserList' : 'collections', + 'urlparse' : 'urllib.parse', + 'robotparser' : 'urllib.robotparser', + 'whichdb': 'dbm', + 'anydbm': 'dbm' +} + +# This contains rename rules that are easy to handle. We ignore the more +# complex stuff (e.g. mapping the names in the urllib and types modules). +# These rules should be run before import names are fixed. +NAME_MAPPING = { + ('__builtin__', 'xrange'): ('builtins', 'range'), + ('__builtin__', 'reduce'): ('functools', 'reduce'), + ('__builtin__', 'intern'): ('sys', 'intern'), + ('__builtin__', 'unichr'): ('builtins', 'chr'), + ('__builtin__', 'basestring'): ('builtins', 'str'), + ('__builtin__', 'long'): ('builtins', 'int'), + ('itertools', 'izip'): ('builtins', 'zip'), + ('itertools', 'imap'): ('builtins', 'map'), + ('itertools', 'ifilter'): ('builtins', 'filter'), + ('itertools', 'ifilterfalse'): ('itertools', 'filterfalse'), +} diff -r 4d41e2e9665f -r e0333a24b9e5 Lib/pickle.py --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -34,6 +34,7 @@ import struct import re import io import codecs +import _compat_pickle __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", "Unpickler", "dump", "dumps", "load", "loads"] @@ -171,7 +172,6 @@ SHORT_BINBYTES = b'C' # " " ; __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) - # Pickling machinery class _Pickler: @@ -766,7 +766,8 @@ def whichmodule(func, funcname): class _Unpickler: - def __init__(self, file, *, encoding="ASCII", errors="strict"): + def __init__(self, file, *, fix_imports=True, + encoding="ASCII", errors="strict"): """This takes a binary file for reading a pickle data stream. The protocol version of the pickle is detected automatically, so no @@ -788,6 +789,8 @@ class _Unpickler: self.memo = {} self.encoding = encoding self.errors = errors + self.proto = 0 + self.fix_imports = fix_imports def load(self): """Read a pickled object representation from the open file. @@ -838,6 +841,7 @@ class _Unpickler: proto = ord(self.read(1)) if not 0 <= proto <= HIGHEST_PROTOCOL: raise ValueError("unsupported pickle protocol: %d" % proto) + self.proto = proto dispatch[PROTO[0]] = load_proto def load_persid(self): @@ -1088,7 +1092,12 @@ class _Unpickler: self.append(obj) def find_class(self, module, name): - # Subclasses may override this + # Subclasses may override this. + if self.proto < 3 and self.fix_imports: + if (module, name) in _compat_pickle.NAME_MAPPING: + module, name = _compat_pickle.NAME_MAPPING[(module, name)] + if module in _compat_pickle.IMPORT_MAPPING: + module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) mod = sys.modules[module] klass = getattr(mod, name) @@ -1337,17 +1346,18 @@ def dumps(obj, protocol=None): assert isinstance(res, bytes_types) return res -def load(file, *, encoding="ASCII", errors="strict"): - return Unpickler(file, encoding=encoding, errors=errors).load() +def load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() -def loads(s, *, encoding="ASCII", errors="strict"): +def loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): if isinstance(s, str): raise TypeError("Can't load pickle from unicode string") file = io.BytesIO(s) - return Unpickler(file, encoding=encoding, errors=errors).load() + return Unpickler(file, fix_imports=fix_imports, + encoding=encoding, errors=errors).load() # Doctest - def _test(): import doctest return doctest.testmod() diff -r 4d41e2e9665f -r e0333a24b9e5 Lib/test/pickletester.py --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -3,6 +3,7 @@ import unittest import pickle import pickletools import copyreg +from http.cookies import SimpleCookie from test.support import TestFailed, TESTFN, run_with_locale @@ -342,6 +343,21 @@ DATA2_DIS = """\ highest protocol among opcodes = 2 """ +# set([1,2]) pickled from 2.x with protocol 2 +DATA3 = b'\x80\x02c__builtin__\nset\nq\x00]q\x01(K\x01K\x02e\x85q\x02Rq\x03.' + +# xrange(5) pickled from 2.x with protocol 2 +DATA4 = b'\x80\x02c__builtin__\nxrange\nq\x00K\x00K\x05K\x01\x87q\x01Rq\x02.' + +# a SimpleCookie() object pickled from 2.x with protocol 2 +DATA5 = (b'\x80\x02cCookie\nSimpleCookie\nq\x00)\x81q\x01U\x03key' + b'q\x02cCookie\nMorsel\nq\x03)\x81q\x04(U\x07commentq\x05U' + b'\x00q\x06U\x06domainq\x07h\x06U\x06secureq\x08h\x06U\x07' + b'expiresq\th\x06U\x07max-ageq\nh\x06U\x07versionq\x0bh\x06U' + b'\x04pathq\x0ch\x06U\x08httponlyq\rh\x06u}q\x0e(U\x0b' + b'coded_valueq\x0fU\x05valueq\x10h\x10h\x10h\x02h\x02ubs}q\x11b.') + + def create_data(): c = C() c.foo = 1 @@ -956,6 +972,18 @@ class AbstractPickleTests(unittest.TestC for x_key, y_key in zip(x_keys, y_keys): self.assertIs(x_key, y_key) + def test_unpickle_from_2x(self): + # Unpickle non-trivial data from Python 2.x. + loaded = self.loads(DATA3) + self.assertEqual(loaded, set([1, 2])) + loaded = self.loads(DATA4) + self.assertEqual(type(loaded), type(range(0))) + self.assertEqual(list(loaded), list(range(5))) + loaded = self.loads(DATA5) + self.assertEqual(type(loaded), SimpleCookie) + self.assertEqual(list(loaded.keys()), ["key"]) + self.assertEqual(loaded["key"].value, "Set-Cookie: key=value") + # Test classes for reduce_ex class REX_one(object): diff -r 4d41e2e9665f -r e0333a24b9e5 Modules/_pickle.c --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -103,25 +103,30 @@ enum { /* Exception classes for pickle. These should override the ones defined in pickle.py, when the C-optimized Pickler and Unpickler are used. */ -static PyObject *PickleError; -static PyObject *PicklingError; -static PyObject *UnpicklingError; +static PyObject *PickleError = NULL; +static PyObject *PicklingError = NULL; +static PyObject *UnpicklingError = NULL; /* copyreg.dispatch_table, {type_object: pickling_function} */ -static PyObject *dispatch_table; +static PyObject *dispatch_table = NULL; /* For EXT[124] opcodes. */ /* copyreg._extension_registry, {(module_name, function_name): code} */ -static PyObject *extension_registry; +static PyObject *extension_registry = NULL; /* copyreg._inverted_registry, {code: (module_name, function_name)} */ -static PyObject *inverted_registry; +static PyObject *inverted_registry = NULL; /* copyreg._extension_cache, {code: object} */ -static PyObject *extension_cache; +static PyObject *extension_cache = NULL; + +/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */ +static PyObject *name_mapping = NULL; +/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */ +static PyObject *import_mapping = NULL; /* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ -static PyObject *empty_tuple; +static PyObject *empty_tuple = NULL; /* For looking up name pairs in copyreg._extension_registry. */ -static PyObject *two_tuple; +static PyObject *two_tuple = NULL; static int stack_underflow(void) @@ -340,6 +345,9 @@ typedef struct UnpicklerObject { objects. */ Py_ssize_t num_marks; /* Number of marks in the mark stack. */ Py_ssize_t marks_size; /* Current allocated size of the mark stack. */ + int proto; /* Protocol of the pickle loaded. */ + int fix_imports; /* Indicate whether Unpickler should fixes + the name of globals pickled by Python 2.x. */ } UnpicklerObject; /* Forward declarations */ @@ -2628,8 +2636,11 @@ Pickler_init(PicklerObject *self, PyObje if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; - else + else { proto = PyLong_AsLong(proto_obj); + if (proto == -1 && PyErr_Occurred()) + return -1; + } if (proto < 0) proto = HIGHEST_PROTOCOL; @@ -4220,8 +4231,10 @@ load_proto(UnpicklerObject *self) return -1; i = (unsigned char)s[0]; - if (i <= HIGHEST_PROTOCOL) - return 0; + if (i <= HIGHEST_PROTOCOL) { + self->proto = i; + return 0; + } PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i); return -1; @@ -4383,6 +4396,40 @@ Unpickler_find_class(UnpicklerObject *se &module_name, &global_name)) return NULL; + /* Try to map the old names used in Python 2.x to the new ones used in + Python 3.x. We do this only with old pickle protocols and when the user + has not disabled the feature. */ + if (self->proto < 3 && self->fix_imports) { + PyObject *key = Py_BuildValue("(OO)", module_name, global_name); + PyObject *item; + if (key == NULL) + return NULL; + item = PyDict_GetItem(name_mapping, key); + Py_DECREF(key); + if (item != NULL) { + if (!PyTuple_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING values should be " + "tuples, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = PyTuple_GET_ITEM(item, 0); + global_name = PyTuple_GET_ITEM(item, 1); + } + else { + item = PyDict_GetItem(import_mapping, module_name); + if (item != NULL) { + if (!PyUnicode_Check(item)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING values should be " + "strings, not %.200s", Py_TYPE(item)->tp_name); + return NULL; + } + module_name = item; + } + } + } + modules_dict = PySys_GetObject("modules"); if (modules_dict == NULL) return NULL; @@ -4484,8 +4531,9 @@ PyDoc_STRVAR(Unpickler_doc, static int Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) { - static char *kwlist[] = {"file", "encoding", "errors", 0}; + static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0}; PyObject *file; + int fix_imports = 1; char *encoding = NULL; char *errors = NULL; @@ -4504,8 +4552,8 @@ Unpickler_init(UnpicklerObject *self, Py extra careful in the other Unpickler methods, since a subclass could forget to call Unpickler.__init__() thus breaking our internal invariants. */ - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist, + &file, &fix_imports, &encoding, &errors)) return -1; /* In case of multiple __init__() calls, clear previous content. */ @@ -4549,6 +4597,8 @@ Unpickler_init(UnpicklerObject *self, Py self->last_string = NULL; self->arg = NULL; + self->proto = 0; + self->fix_imports = fix_imports; return 0; } @@ -4672,40 +4722,63 @@ static PyTypeObject Unpickler_Type = { }; static int -init_stuff(void) -{ - PyObject *copyreg; +initmodule(void) +{ + PyObject *copyreg = NULL; + PyObject *compat_pickle = NULL; + + /* XXX: We should ensure that the types of the dictionaries imported are + exactly PyDict objects. Otherwise, it is possible to crash the pickle + since we use the PyDict API directly to access these dictionaries. */ copyreg = PyImport_ImportModule("copyreg"); if (!copyreg) - return -1; - + goto error; dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table"); if (!dispatch_table) goto error; - extension_registry = \ PyObject_GetAttrString(copyreg, "_extension_registry"); if (!extension_registry) goto error; - inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry"); if (!inverted_registry) goto error; - extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache"); if (!extension_cache) goto error; - - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + + /* Load the 2.x -> 3.x stdlib module mapping tables */ + compat_pickle = PyImport_ImportModule("_compat_pickle"); + if (!compat_pickle) + goto error; + name_mapping = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING"); + if (!name_mapping) + goto error; + if (!PyDict_CheckExact(name_mapping)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.NAME_MAPPING should be a dict, not %.200s", + Py_TYPE(name_mapping)->tp_name); + goto error; + } + import_mapping = PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING"); + if (!import_mapping) + goto error; + if (!PyDict_CheckExact(import_mapping)) { + PyErr_Format(PyExc_RuntimeError, + "_compat_pickle.IMPORT_MAPPING should be a dict, not %.200s", + Py_TYPE(import_mapping)->tp_name); + goto error; + } + Py_CLEAR(compat_pickle); empty_tuple = PyTuple_New(0); if (empty_tuple == NULL) - return -1; - + goto error; two_tuple = PyTuple_New(2); if (two_tuple == NULL) - return -1; + goto error; /* We use this temp container with no regard to refcounts, or to * keeping containees alive. Exempt from GC, because we don't * want anything looking at two_tuple() by magic. @@ -4715,7 +4788,16 @@ init_stuff(void) return 0; error: - Py_DECREF(copyreg); + Py_CLEAR(copyreg); + Py_CLEAR(dispatch_table); + Py_CLEAR(extension_registry); + Py_CLEAR(inverted_registry); + Py_CLEAR(extension_cache); + Py_CLEAR(compat_pickle); + Py_CLEAR(name_mapping); + Py_CLEAR(import_mapping); + Py_CLEAR(empty_tuple); + Py_CLEAR(two_tuple); return -1; } @@ -4773,7 +4855,7 @@ PyInit__pickle(void) if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0) return NULL; - if (init_stuff() < 0) + if (initmodule() < 0) return NULL; return m;