--- Modules/_pickle.c~old 2008-06-02 01:14:34.000000000 -0400 +++ Modules/_pickle.c 2008-06-04 13:50:09.000000000 -0400 @@ -117,6 +117,7 @@ /* copyreg._extension_cache, {code: object} */ static PyObject *extension_cache; +/* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ static PyObject *empty_tuple; /* For looking up name pairs in copyreg._extension_registry. */ @@ -372,7 +373,7 @@ XXX: Does caching the argument tuple provides any real performance benefits? - A quick benchmark, on a 2.0GHz Atlon64 3200+ running Linux 2.6.24 with + A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then immediately DECREF it) and 1,200,000 calls when allocating brand new tuples @@ -383,7 +384,7 @@ object allocation overhead. So, I really doubt these functions provide any real benefits. - On the other hand, oprofile reports that pickle spend a lot of time in + On the other hand, oprofile reports that pickle spends a lot of time in these functions. But, that is probably more related to the function call overhead, than the argument tuple allocation. @@ -457,7 +458,7 @@ } /* XXX: These read/readline functions ought to be optimized. Buffered I/O - might help a lot, especially with the new (but mush slower) io library. + might help a lot, especially with the new (but much slower) io library. */ static Py_ssize_t unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n) @@ -1707,7 +1708,7 @@ * so generate an EXT opcode. */ PyObject *code_obj; /* extension code as Python object */ - long code; /* extension code as C value */ + long code; /* extension code as C value */ char pdata[5]; int n; @@ -1974,7 +1975,8 @@ Thus when save() is called on newargstup (the 2nd item) recursion ensue. Of course, the bug in the complex class which has a broken __getnewargs__() that emits another complex object. But, the point, - here, is it is quite easy to end up with a broken reduce function. */ + here, is it is quite easy to end up with a broken reduce + function. */ /* Save the class and its __new__ arguments. */ if (save(self, cls, 0) < 0) @@ -2031,6 +2033,7 @@ PyObject *memo_key = NULL; int status = 0; + /* XXX: Use Py_EnterRecursiveCall()? */ if (++self->nesting > Py_GetRecursionLimit()) { PyErr_SetString(PyExc_RuntimeError, "maximum recursion depth exceeded"); @@ -2165,10 +2168,10 @@ /* XXX: If the __reduce__ method is defined, __reduce_ex__ is automatically defined as __reduce__. While this is convenient, this make it impossible to know which method was actually called. Of - course, this is not a big deal, but still it would be nice to let + course, this is not a big deal. But still, it would be nice to let the user know which method was called when something go wrong. Incidentally, this means if __reduce_ex__ is not defined, we - don't have to check for a __reduce__ method. */ + don't actually have to check for a __reduce__ method. */ /* Check for a __reduce_ex__ method. */ reduce_func = PyObject_GetAttr(obj, reduce_ex_str); @@ -2276,7 +2279,6 @@ Py_RETURN_NONE; } - PyDoc_STRVAR(Pickler_dump_doc, "dump(obj) -> None. Write a pickled representation of obj to the open file."); @@ -2302,10 +2304,51 @@ {NULL, NULL} /* sentinel */ }; -static PyObject * -Pickler_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static void +Pickler_dealloc(PicklerObject *self) +{ + PyObject_GC_UnTrack(self); + + Py_XDECREF(self->write); + Py_XDECREF(self->memo); + Py_XDECREF(self->pers_func); + Py_XDECREF(self->arg); + Py_XDECREF(self->fast_memo); + + PyMem_Free(self->write_buf); + + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->write); + Py_VISIT(self->memo); + Py_VISIT(self->pers_func); + Py_VISIT(self->arg); + Py_VISIT(self->fast_memo); + return 0; +} + +static int +Pickler_clear(PicklerObject *self) +{ + Py_CLEAR(self->write); + Py_CLEAR(self->memo); + Py_CLEAR(self->pers_func); + Py_CLEAR(self->arg); + Py_CLEAR(self->fast_memo); + + PyMem_Free(self->write_buf); + self->write_buf = NULL; + + return 0; +} + +static int +Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { - PicklerObject *self; static char *kwlist[] = {"file", "protocol", 0}; PyObject *file; PyObject *proto_obj = NULL; @@ -2313,7 +2356,11 @@ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", kwlist, &file, &proto_obj)) - return NULL; + return -1; + + /* In case of multiple __init__() calls, clear previous content. */ + if (self->read != NULL) + (void)Pickler_clear(self); if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; @@ -2325,16 +2372,11 @@ if (proto > HIGHEST_PROTOCOL) { PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", HIGHEST_PROTOCOL); - return NULL; + return -1; } - self = (PicklerObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - self->proto = proto; self->bin = proto > 0; - self->memo = NULL; self->arg = NULL; self->nesting = 0; self->fast = 0; @@ -2344,70 +2386,28 @@ if (!PyObject_HasAttrString(file, "write")) { PyErr_SetString(PyExc_TypeError, "file must have a 'write' attribute"); - goto error; + return -1; } self->write = PyObject_GetAttrString(file, "write"); if (self->write == NULL) - goto error; + return -1; self->buf_size = 0; self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE); if (self->write_buf == NULL) { PyErr_NoMemory(); - goto error; + return -1; } self->pers_func = NULL; if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) { self->pers_func = PyObject_GetAttrString((PyObject *)self, "persistent_id"); if (self->pers_func == NULL) - goto error; + return -1; } self->memo = PyDict_New(); if (self->memo == NULL) - goto error; - - return (PyObject *)self; - - error: - Py_DECREF(self); - return NULL; -} - -static void -Pickler_dealloc(PicklerObject *self) -{ - PyObject_GC_UnTrack(self); - - Py_XDECREF(self->write); - Py_XDECREF(self->memo); - Py_XDECREF(self->pers_func); - Py_XDECREF(self->arg); - Py_XDECREF(self->fast_memo); - - PyMem_Free(self->write_buf); - - Py_TYPE(self)->tp_free((PyObject *)self); -} + return -1; -static int -Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) -{ - Py_VISIT(self->write); - Py_VISIT(self->memo); - Py_VISIT(self->pers_func); - Py_VISIT(self->arg); - Py_VISIT(self->fast_memo); - return 0; -} - -static int -Pickler_clear(PicklerObject *self) -{ - Py_CLEAR(self->write); - Py_CLEAR(self->memo); - Py_CLEAR(self->pers_func); - Py_CLEAR(self->arg); - Py_CLEAR(self->fast_memo); return 0; } @@ -2554,39 +2554,25 @@ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - Pickler_new, /*tp_new*/ - 0, /*tp_free*/ + (initproc)Pickler_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ + PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ }; -/* The name of find_class() is misleading. In newer pickle protocols, this - function is used for loading any global (i.e., functions), not just - classes. The name is kept only for backward compatibility. */ +/* Temporary helper for calling self.find_class(). + + XXX: It would be nice to able to avoid Python function call overhead, by + using directly the C version of find_class(), when find_class() is not + overridden by a subclass. Although, this could become rather hackish. A + simpler optimization would be to call the C function when self is not a + subclass instance. */ static PyObject * -find_class(PyObject *module_name, PyObject *global_name) +find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) { - PyObject *global; - PyObject *modules_dict; - PyObject *module; - - modules_dict = PySys_GetObject("modules"); - if (modules_dict == NULL) - return NULL; - - module = PyDict_GetItem(modules_dict, module_name); - if (module == NULL) { - module = PyImport_Import(module_name); - if (module == NULL) - return NULL; - global = PyObject_GetAttr(module, global_name); - Py_DECREF(module); - } - else { - global = PyObject_GetAttr(module, global_name); - } - return global; + return PyObject_CallMethod((PyObject *)self, "find_class", "OO", + module_name, global_name); } static int @@ -3231,7 +3217,7 @@ return bad_readline(); class_name = PyUnicode_FromStringAndSize(s, len - 1); if (class_name == NULL) { - cls = find_class(module_name, class_name); + cls = find_class(self, module_name, class_name); Py_DECREF(class_name); } } @@ -3327,7 +3313,7 @@ } global_name = PyUnicode_FromStringAndSize(s, len - 1); if (global_name) { - global = find_class(module_name, global_name); + global = find_class(self, module_name, global_name); Py_DECREF(global_name); } } @@ -3594,7 +3580,7 @@ return -1; } /* Load the object. */ - obj = find_class(module_name, class_name); + obj = find_class(self, module_name, class_name); if (obj == NULL) { Py_DECREF(py_code); return -1; @@ -4052,6 +4038,10 @@ case STOP: break; + case '\0': + PyErr_SetNone(PyExc_EOFError); + return NULL; + default: PyErr_Format(UnpicklingError, "invalid load key, '%c'.", s[0]); @@ -4073,83 +4063,82 @@ return value; } +PyDoc_STRVAR(Unpickler_load_doc, +"load() -> object. Load a pickle." +"\n" +"Read a pickled object representation from the open file object given in\n" +"the constructor, and return the reconstituted object hierarchy specified\n" +"therein.\n"); + static PyObject * Unpickler_load(UnpicklerObject *self) { + /* Check whether the Unpickler was initialized correctly. This prevents + segfaulting if a subclass overridden __init__ with a function that does + not call Unpickler.__init__(). Here, we simply ensure that self->read + is not NULL. */ + if (self->read == NULL) { + PyErr_Format(UnpicklingError, + "Unpickler.__init__() was not called by %s.__init__()", + Py_TYPE(self)->tp_name); + return NULL; + } + return load(self); } -static struct PyMethodDef Unpickler_methods[] = { - {"load", (PyCFunction)Unpickler_load, METH_NOARGS, - PyDoc_STR("load() -> None. Load a pickle")}, - {NULL, NULL} /* sentinel */ -}; +/* The name of find_class() is misleading. In newer pickle protocols, this + function is used for loading any global (i.e., functions), not just + classes. The name is kept only for backward compatibility. */ + +PyDoc_STRVAR(Unpickler_find_class_doc, +"find_class(module_name, global_name) -> object.\n" +"\n" +"Return an object from a specified module, importing the module if\n" +"necessary. Subclasses may override this method (e.g. to restrict\n" +"unpickling of arbitrary classes and functions).\n" +"\n" +"This method is called whenever a class or a function object is\n" +"needed. Both arguments passed are str objects.\n"); static PyObject * -Unpickler_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +Unpickler_find_class(UnpicklerObject *self, PyObject *args) { - UnpicklerObject *self; - static char *kwlist[] = {"file", "encoding", "errors", 0}; - PyObject *file; - char *encoding = NULL; - char *errors = NULL; + PyObject *global; + PyObject *modules_dict; + PyObject *module; + PyObject *module_name, *global_name; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_UnpackTuple(args, "find_class", 2, 2, + &module_name, &global_name)) return NULL; - self = (UnpicklerObject *)type->tp_alloc(type, 0); - if (self == NULL) + modules_dict = PySys_GetObject("modules"); + if (modules_dict == NULL) return NULL; - self->arg = NULL; - self->last_string = NULL; - self->marks = NULL; - self->num_marks = 0; - self->marks_size = 0; - - if (encoding == NULL) - encoding = "ASCII"; - if (errors == NULL) - errors = "strict"; - - self->encoding = strdup(encoding); - self->errors = strdup(errors); - if (self->encoding == NULL || self->errors == NULL) { - PyErr_NoMemory(); - goto error; - } - - self->read = PyObject_GetAttrString(file, "read"); - self->readline = PyObject_GetAttrString(file, "readline"); - if (self->readline == NULL || self->read == NULL) - goto error; - - if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) { - self->pers_func = PyObject_GetAttrString((PyObject *)self, - "persistent_load"); - if (self->pers_func == NULL) - goto error; + module = PyDict_GetItem(modules_dict, module_name); + if (module == NULL) { + module = PyImport_Import(module_name); + if (module == NULL) + return NULL; + global = PyObject_GetAttr(module, global_name); + Py_DECREF(module); } - else { - self->pers_func = NULL; + else { + global = PyObject_GetAttr(module, global_name); } - - self->memo = PyDict_New(); - if (self->memo == NULL) - goto error; - - self->stack = (Pdata *)Pdata_New(); - if (self->stack == NULL) - goto error; - - return (PyObject *)self; - - error: - Py_DECREF(self); - return NULL; + return global; } +static struct PyMethodDef Unpickler_methods[] = { + {"load", (PyCFunction)Unpickler_load, METH_NOARGS, + Unpickler_load_doc}, + {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS, + Unpickler_find_class_doc}, + {NULL, NULL} /* sentinel */ +}; + static void Unpickler_dealloc(UnpicklerObject *self) { @@ -4162,9 +4151,7 @@ Py_XDECREF(self->arg); Py_XDECREF(self->last_string); - if (self->marks) { - PyMem_Free(self->marks); - } + PyMem_Free(self->marks); free(self->encoding); free(self->errors); @@ -4194,11 +4181,79 @@ Py_CLEAR(self->pers_func); Py_CLEAR(self->arg); Py_CLEAR(self->last_string); + + PyMem_Free(self->marks); + self->marks = NULL; + free(self->encoding); + self->encoding = NULL; + free(self->errors); + self->errors = NULL; + + return 0; +} + +static int +Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"file", "encoding", "errors", 0}; + PyObject *file; + char *encoding = NULL; + char *errors = NULL; + + /* Arguments parsing needs to be done in the __init__() method to allow + subclasses to define their own __init__() method, which may (or may + not) support Unpickler arguments. However, this means we need to be + extra careful in the other Unpickler methods, since a subclass could + forget to call Unpickler.__init__() thus breaking our internal + invariants. */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, + &file, &encoding, &errors)) + return -1; + + /* In case of multiple __init__() calls, clear previous content. */ + if (self->read != NULL) + (void)Unpickler_clear(self); + + self->read = PyObject_GetAttrString(file, "read"); + self->readline = PyObject_GetAttrString(file, "readline"); + if (self->readline == NULL || self->read == NULL) + return -1; + + if (encoding == NULL) + encoding = "ASCII"; + if (errors == NULL) + errors = "strict"; + + self->encoding = strdup(encoding); + self->errors = strdup(errors); + if (self->encoding == NULL || self->errors == NULL) { + PyErr_NoMemory(); + return -1; + } + + if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) { + self->pers_func = PyObject_GetAttrString((PyObject *)self, + "persistent_load"); + if (self->pers_func == NULL) + return -1; + } + else { + self->pers_func = NULL; + } + + self->stack = (Pdata *)Pdata_New(); + if (self->stack == NULL) + return -1; + + self->memo = PyDict_New(); + if (self->memo == NULL) + return -1; + return 0; } static PyObject * -Unpickler_get_memo(PicklerObject *self) +Unpickler_get_memo(UnpicklerObject *self) { if (self->memo == NULL) PyErr_SetString(PyExc_AttributeError, "memo"); @@ -4208,7 +4263,7 @@ } static int -Unpickler_set_memo(PicklerObject *self, PyObject *value) +Unpickler_set_memo(UnpicklerObject *self, PyObject *value) { PyObject *tmp; @@ -4225,7 +4280,7 @@ tmp = self->memo; Py_INCREF(value); self->memo = value; - Py_DECREF(tmp); + Py_XDECREF(tmp); return 0; } @@ -4304,7 +4359,7 @@ 0, /*tp_hash*/ 0, /*tp_call*/ 0, /*tp_str*/ - 0, /*tp_getattro*/ + PyObject_GenericGetAttr, /*tp_getattro*/ 0, /*tp_setattro*/ 0, /*tp_as_buffer*/ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, @@ -4323,10 +4378,10 @@ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - Unpickler_new, /*tp_new*/ - 0, /*tp_free*/ + (initproc)Unpickler_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ + PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ };