--- Modules/_pickle.c~old 2008-06-02 01:14:34.000000000 -0400 +++ Modules/_pickle.c 2008-06-09 20:45:00.000000000 -0400 @@ -98,7 +98,7 @@ /* Size of the write buffer of Pickler. Higher values will reduce the number of calls to the write() method of the output stream. */ - WRITE_BUF_SIZE = 256 + WRITE_BUF_SIZE = 256, }; /* Exception classes for pickle. These should override the ones defined in @@ -117,6 +117,7 @@ /* copyreg._extension_cache, {code: object} */ static PyObject *extension_cache; +/* XXX: Are these really nescessary? */ /* As the name says, an empty tuple. */ static PyObject *empty_tuple; /* For looking up name pairs in copyreg._extension_registry. */ @@ -372,7 +373,7 @@ XXX: Does caching the argument tuple provides any real performance benefits? - A quick benchmark, on a 2.0GHz Atlon64 3200+ running Linux 2.6.24 with + A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then immediately DECREF it) and 1,200,000 calls when allocating brand new tuples @@ -383,7 +384,7 @@ object allocation overhead. So, I really doubt these functions provide any real benefits. - On the other hand, oprofile reports that pickle spend a lot of time in + On the other hand, oprofile reports that pickle spends a lot of time in these functions. But, that is probably more related to the function call overhead, than the argument tuple allocation. @@ -457,8 +458,12 @@ } /* XXX: These read/readline functions ought to be optimized. Buffered I/O - might help a lot, especially with the new (but mush slower) io library. + might help a lot, especially with the new (but much slower) io library. + On the other hand, the added complexity might not worth it. */ + +/* Read at least n characters from the input stream and set s to the current + reading position. */ static Py_ssize_t unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n) { @@ -473,7 +478,7 @@ if (data == NULL) return -1; - /* XXX: Should bytearray be supported too? */ + /* XXX: Should bytearray be supported too? */ if (!PyBytes_Check(data)) { PyErr_SetString(PyExc_ValueError, "read() from the underlying stream did not" @@ -499,7 +504,7 @@ if (data == NULL) return -1; - /* XXX: Should bytearray be supported too? */ + /* XXX: Should bytearray be supported too? */ if (!PyBytes_Check(data)) { PyErr_SetString(PyExc_ValueError, "readline() from the underlying stream did not" @@ -1646,8 +1651,6 @@ PyObject *module = NULL; PyObject *cls; int status = 0; - char *string; - Py_ssize_t size; const char global_op = GLOBAL; @@ -1682,21 +1685,21 @@ module = PyImport_Import(module_name); if (module == NULL) { PyErr_Format(PicklingError, - "Can't pickle %R: import of module %R failed", + "can't pickle %R: import of module %R failed", obj, module_name); goto error; } cls = PyObject_GetAttr(module, global_name); if (cls == NULL) { PyErr_Format(PicklingError, - "Can't pickle %R: attribute lookup %S.%S failed", + "can't pickle %R: attribute lookup %S.%S failed", obj, module_name, global_name); goto error; } if (cls != obj) { Py_DECREF(cls); PyErr_Format(PicklingError, - "Can't pickle %R: it's not the same object as %S.%S", + "can't pickle %R: it's not the same object as %S.%S", obj, module_name, global_name); goto error; } @@ -1707,7 +1710,7 @@ * so generate an EXT opcode. */ PyObject *code_obj; /* extension code as Python object */ - long code; /* extension code as C value */ + long code; /* extension code as C value */ char pdata[5]; int n; @@ -1726,14 +1729,14 @@ /* Verify code_obj has the right type and value. */ if (!PyLong_Check(code_obj)) { PyErr_Format(PicklingError, - "Can't pickle %R: extension code %R isn't an integer", + "can't pickle %R: extension code %R isn't an integer", obj, code_obj); goto error; } code = PyLong_AS_LONG(code_obj); if (code <= 0 || code > 0x7fffffffL) { PyErr_Format(PicklingError, - "Can't pickle %R: extension code %i is out of range", + "can't pickle %R: extension code %ld is out of range", obj, code); goto error; } @@ -1766,20 +1769,58 @@ /* Generate a normal global opcode if we are using a pickle protocol <= 2, or if the object is not registered in the extension registry. */ + PyObject *encoded; + PyObject *(*unicode_encoder)(PyObject *); + gen_global: if (pickler_write(self, &global_op, 1) < 0) goto error; + /* Since Python 3.0 now supports non-ASCII identifiers, we encode both + the module name and the global name using UTF-8. We do so only when + we are using the pickle protocol newer than version 3. This is to + ensure compatibility with older Unpickler running on Python 2.x. */ + if (self->proto >= 3) { + unicode_encoder = PyUnicode_AsUTF8String; + } + else { + unicode_encoder = PyUnicode_AsASCIIString; + } + /* Save the name of the module. */ - string = PyUnicode_AsStringAndSize(module_name, &size); - if (pickler_write(self, string, size) < 0 || - pickler_write(self, "\n", 1) < 0) + encoded = unicode_encoder(module_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(PicklingError, + "can't pickle module identifier '%S' using " + "pickle protocol %i", module_name, self->proto); + goto error; + } + if (pickler_write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); + goto error; + } + Py_DECREF(encoded); + if(pickler_write(self, "\n", 1) < 0) goto error; - /* Save the name of the object. */ - string = PyUnicode_AsStringAndSize(global_name, &size); - if (pickler_write(self, string, size) < 0 || - pickler_write(self, "\n", 1) < 0) + /* Save the name of the module. */ + encoded = unicode_encoder(global_name); + if (encoded == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + PyErr_Format(PicklingError, + "can't pickle global identifier '%S' using " + "pickle protocol %i", global_name, self->proto); + goto error; + } + if (pickler_write(self, PyBytes_AS_STRING(encoded), + PyBytes_GET_SIZE(encoded)) < 0) { + Py_DECREF(encoded); + goto error; + } + Py_DECREF(encoded); + if(pickler_write(self, "\n", 1) < 0) goto error; /* Memoize the object. */ @@ -1955,7 +1996,8 @@ what happen if the value returned by the __reduce__() method of some extension type contains another object of the same type. Ouch! - Here is a quick example of what I mean: + Here is a quick example, that I ran into, to illustrate what I + mean: >>> import pickle, copyreg >>> copyreg.dispatch_table.pop(complex) @@ -1964,17 +2006,18 @@ ... RuntimeError: maximum recursion depth exceeded - Removing the complex class from copyreg.dispatch_table makes the + Removing the complex class from copyreg.dispatch_table made the __reduce_ex__() method emit another complex object: >>> (1+1j).__reduce_ex__(2) (, (, (1+1j)), None, None, None) - Thus when save() is called on newargstup (the 2nd item) recursion - ensue. Of course, the bug in the complex class which has a broken - __getnewargs__() that emits another complex object. But, the point, - here, is it is quite easy to end up with a broken reduce function. */ + Thus when save() was called on newargstup (the 2nd item) recursion + ensued. Of course, the bug was in the complex class which had a + broken __getnewargs__() that emitted another complex object. But, + the point, here, is it is quite easy to end up with a broken reduce + function. */ /* Save the class and its __new__ arguments. */ if (save(self, cls, 0) < 0) @@ -2031,6 +2074,7 @@ PyObject *memo_key = NULL; int status = 0; + /* XXX: Use Py_EnterRecursiveCall()? */ if (++self->nesting > Py_GetRecursionLimit()) { PyErr_SetString(PyExc_RuntimeError, "maximum recursion depth exceeded"); @@ -2165,10 +2209,10 @@ /* XXX: If the __reduce__ method is defined, __reduce_ex__ is automatically defined as __reduce__. While this is convenient, this make it impossible to know which method was actually called. Of - course, this is not a big deal, but still it would be nice to let + course, this is not a big deal. But still, it would be nice to let the user know which method was called when something go wrong. Incidentally, this means if __reduce_ex__ is not defined, we - don't have to check for a __reduce__ method. */ + don't actually have to check for a __reduce__ method. */ /* Check for a __reduce_ex__ method. */ reduce_func = PyObject_GetAttr(obj, reduce_ex_str); @@ -2276,7 +2320,6 @@ Py_RETURN_NONE; } - PyDoc_STRVAR(Pickler_dump_doc, "dump(obj) -> None. Write a pickled representation of obj to the open file."); @@ -2302,10 +2345,71 @@ {NULL, NULL} /* sentinel */ }; -static PyObject * -Pickler_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +static void +Pickler_dealloc(PicklerObject *self) +{ + PyObject_GC_UnTrack(self); + + Py_XDECREF(self->write); + Py_XDECREF(self->memo); + Py_XDECREF(self->pers_func); + Py_XDECREF(self->arg); + Py_XDECREF(self->fast_memo); + + PyMem_Free(self->write_buf); + + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->write); + Py_VISIT(self->memo); + Py_VISIT(self->pers_func); + Py_VISIT(self->arg); + Py_VISIT(self->fast_memo); + return 0; +} + +static int +Pickler_clear(PicklerObject *self) +{ + Py_CLEAR(self->write); + Py_CLEAR(self->memo); + Py_CLEAR(self->pers_func); + Py_CLEAR(self->arg); + Py_CLEAR(self->fast_memo); + + PyMem_Free(self->write_buf); + self->write_buf = NULL; + + return 0; +} + +PyDoc_STRVAR(Pickler_doc, +"Pickler(file, protocol=None)" +"\n" +"This takes a binary file for writing a pickle data stream.\n" +"\n" +"The optional protocol argument tells the pickler to use the\n" +"given protocol; supported protocols are 0, 1, 2, 3. The default\n" +"protocol is 3; a backward-incompatible protocol designed for\n" +"Python 3.0.\n" +"\n" +"Specifying a negative protocol version selects the highest\n" +"protocol version supported. The higher the protocol used, the\n" +"more recent the version of Python needed to read the pickle\n" +"produced.\n" +"\n" +"The file argument must have a write() method that accepts a single\n" +"bytes argument. It can thus be a file object opened for binary\n" +"writing, a io.BytesIO instance, or any other custom object that\n" +"meets this interface.\n"); + +static int +Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds) { - PicklerObject *self; static char *kwlist[] = {"file", "protocol", 0}; PyObject *file; PyObject *proto_obj = NULL; @@ -2313,7 +2417,11 @@ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler", kwlist, &file, &proto_obj)) - return NULL; + return -1; + + /* In case of multiple __init__() calls, clear previous content. */ + if (self->write != NULL) + (void)Pickler_clear(self); if (proto_obj == NULL || proto_obj == Py_None) proto = DEFAULT_PROTOCOL; @@ -2325,16 +2433,11 @@ if (proto > HIGHEST_PROTOCOL) { PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d", HIGHEST_PROTOCOL); - return NULL; + return -1; } - self = (PicklerObject *)type->tp_alloc(type, 0); - if (self == NULL) - return NULL; - self->proto = proto; self->bin = proto > 0; - self->memo = NULL; self->arg = NULL; self->nesting = 0; self->fast = 0; @@ -2344,70 +2447,28 @@ if (!PyObject_HasAttrString(file, "write")) { PyErr_SetString(PyExc_TypeError, "file must have a 'write' attribute"); - goto error; + return -1; } self->write = PyObject_GetAttrString(file, "write"); if (self->write == NULL) - goto error; + return -1; self->buf_size = 0; self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE); if (self->write_buf == NULL) { PyErr_NoMemory(); - goto error; + return -1; } self->pers_func = NULL; if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) { self->pers_func = PyObject_GetAttrString((PyObject *)self, "persistent_id"); if (self->pers_func == NULL) - goto error; + return -1; } self->memo = PyDict_New(); if (self->memo == NULL) - goto error; - - return (PyObject *)self; - - error: - Py_DECREF(self); - return NULL; -} - -static void -Pickler_dealloc(PicklerObject *self) -{ - PyObject_GC_UnTrack(self); - - Py_XDECREF(self->write); - Py_XDECREF(self->memo); - Py_XDECREF(self->pers_func); - Py_XDECREF(self->arg); - Py_XDECREF(self->fast_memo); - - PyMem_Free(self->write_buf); - - Py_TYPE(self)->tp_free((PyObject *)self); -} - -static int -Pickler_traverse(PicklerObject *self, visitproc visit, void *arg) -{ - Py_VISIT(self->write); - Py_VISIT(self->memo); - Py_VISIT(self->pers_func); - Py_VISIT(self->arg); - Py_VISIT(self->fast_memo); - return 0; -} + return -1; -static int -Pickler_clear(PicklerObject *self) -{ - Py_CLEAR(self->write); - Py_CLEAR(self->memo); - Py_CLEAR(self->pers_func); - Py_CLEAR(self->arg); - Py_CLEAR(self->fast_memo); return 0; } @@ -2492,32 +2553,6 @@ {NULL} }; -PyDoc_STRVAR(Pickler_doc, -"Pickler(file, protocol=0) -> new pickler object" -"\n" -"This takes a file-like object for writing a pickle data stream.\n" -"\n" -"The optional protocol argument tells the pickler to use the\n" -"given protocol; supported protocols are 0, 1, 2. The default\n" -"protocol is 0, to be backwards compatible. (Protocol 0 is the\n" -"only protocol that can be written to a file opened in text\n" -"mode and read back successfully. When using a protocol higher\n" -"than 0, make sure the file is opened in binary mode, both when\n" -"pickling and unpickling.)\n" -"\n" -"Protocol 1 is more efficient than protocol 0; protocol 2 is\n" -"more efficient than protocol 1.\n" -"\n" -"Specifying a negative protocol version selects the highest\n" -"protocol version supported. The higher the protocol used, the\n" -"more recent the version of Python needed to read the pickle\n" -"produced.\n" -"\n" -"The file parameter must have a write() method that accepts a single\n" -"string argument. It can thus be an open file object, a StringIO\n" -"object, or any other custom object that meets this interface.\n"); - - static PyTypeObject Pickler_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Pickler" , /*tp_name*/ @@ -2554,39 +2589,25 @@ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - Pickler_new, /*tp_new*/ - 0, /*tp_free*/ + (initproc)Pickler_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ + PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ }; -/* The name of find_class() is misleading. In newer pickle protocols, this - function is used for loading any global (i.e., functions), not just - classes. The name is kept only for backward compatibility. */ +/* Temporary helper for calling self.find_class(). + + XXX: It would be nice to able to avoid Python function call overhead, by + using directly the C version of find_class(), when find_class() is not + overridden by a subclass. Although, this could become rather hackish. A + simpler optimization would be to call the C function when self is not a + subclass instance. */ static PyObject * -find_class(PyObject *module_name, PyObject *global_name) +find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name) { - PyObject *global; - PyObject *modules_dict; - PyObject *module; - - modules_dict = PySys_GetObject("modules"); - if (modules_dict == NULL) - return NULL; - - module = PyDict_GetItem(modules_dict, module_name); - if (module == NULL) { - module = PyImport_Import(module_name); - if (module == NULL) - return NULL; - global = PyObject_GetAttr(module, global_name); - Py_DECREF(module); - } - else { - global = PyObject_GetAttr(module, global_name); - } - return global; + return PyObject_CallMethod((PyObject *)self, "find_class", "OO", + module_name, global_name); } static int @@ -2753,6 +2774,7 @@ if (len < 2) return bad_readline(); + /* XXX: Should the base argument explicitly set to 10? */ if ((value = PyLong_FromString(s, NULL, 0)) == NULL) return -1; @@ -2983,7 +3005,7 @@ unsigned char x; char *s; - if (unpickler_read(self, &s, 4) < 0) + if (unpickler_read(self, &s, 1) < 0) return -1; x = (unsigned char)s[0]; @@ -3222,16 +3244,19 @@ if (len < 2) return bad_readline(); - module_name = PyUnicode_FromStringAndSize(s, len - 1); + /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII + identifiers are permitted in Python 3.0, since the INST opcode is only + supported by older protocols on Python 2.x. */ + module_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); if (module_name == NULL) return -1; if ((len = unpickler_readline(self, &s)) >= 0) { if (len < 2) return bad_readline(); - class_name = PyUnicode_FromStringAndSize(s, len - 1); + class_name = PyUnicode_DecodeASCII(s, len - 1, "strict"); if (class_name == NULL) { - cls = find_class(module_name, class_name); + cls = find_class(self, module_name, class_name); Py_DECREF(class_name); } } @@ -3316,7 +3341,7 @@ return -1; if (len < 2) return bad_readline(); - module_name = PyUnicode_FromStringAndSize(s, len - 1); + module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); if (!module_name) return -1; @@ -3325,9 +3350,9 @@ Py_DECREF(module_name); return bad_readline(); } - global_name = PyUnicode_FromStringAndSize(s, len - 1); + global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict"); if (global_name) { - global = find_class(module_name, global_name); + global = find_class(self, module_name, global_name); Py_DECREF(global_name); } } @@ -3463,7 +3488,7 @@ if (len < 2) return bad_readline(); - key = PyBytes_FromStringAndSize(s, len - 1); + key = PyLong_FromString(s, NULL, 10); if (key == NULL) return -1; @@ -3594,7 +3619,7 @@ return -1; } /* Load the object. */ - obj = find_class(module_name, class_name); + obj = find_class(self, module_name, class_name); if (obj == NULL) { Py_DECREF(py_code); return -1; @@ -3622,10 +3647,10 @@ return -1; if (len < 2) return bad_readline(); - if ((x = self->stack->length) <= 0) return stack_underflow(); - key = PyBytes_FromStringAndSize(s, len - 1); + + key = PyLong_FromString(s, NULL, 10); if (key == NULL) return -1; value = self->stack->data[x - 1]; @@ -3644,9 +3669,9 @@ if (unpickler_read(self, &s, 1) < 0) return -1; - if ((x = self->stack->length) <= 0) return stack_underflow(); + key = PyLong_FromLong((long)(unsigned char)s[0]); if (key == NULL) return -1; @@ -3667,7 +3692,6 @@ if (unpickler_read(self, &s, 4) < 0) return -1; - if ((x = self->stack->length) <= 0) return stack_underflow(); @@ -4052,6 +4076,10 @@ case STOP: break; + case '\0': + PyErr_SetNone(PyExc_EOFError); + return NULL; + default: PyErr_Format(UnpicklingError, "invalid load key, '%c'.", s[0]); @@ -4073,83 +4101,82 @@ return value; } +PyDoc_STRVAR(Unpickler_load_doc, +"load() -> object. Load a pickle." +"\n" +"Read a pickled object representation from the open file object given in\n" +"the constructor, and return the reconstituted object hierarchy specified\n" +"therein.\n"); + static PyObject * Unpickler_load(UnpicklerObject *self) { + /* Check whether the Unpickler was initialized correctly. This prevents + segfaulting if a subclass overridden __init__ with a function that does + not call Unpickler.__init__(). Here, we simply ensure that self->read + is not NULL. */ + if (self->read == NULL) { + PyErr_Format(UnpicklingError, + "Unpickler.__init__() was not called by %s.__init__()", + Py_TYPE(self)->tp_name); + return NULL; + } + return load(self); } -static struct PyMethodDef Unpickler_methods[] = { - {"load", (PyCFunction)Unpickler_load, METH_NOARGS, - PyDoc_STR("load() -> None. Load a pickle")}, - {NULL, NULL} /* sentinel */ -}; +/* The name of find_class() is misleading. In newer pickle protocols, this + function is used for loading any global (i.e., functions), not just + classes. The name is kept only for backward compatibility. */ + +PyDoc_STRVAR(Unpickler_find_class_doc, +"find_class(module_name, global_name) -> object.\n" +"\n" +"Return an object from a specified module, importing the module if\n" +"necessary. Subclasses may override this method (e.g. to restrict\n" +"unpickling of arbitrary classes and functions).\n" +"\n" +"This method is called whenever a class or a function object is\n" +"needed. Both arguments passed are str objects.\n"); static PyObject * -Unpickler_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +Unpickler_find_class(UnpicklerObject *self, PyObject *args) { - UnpicklerObject *self; - static char *kwlist[] = {"file", "encoding", "errors", 0}; - PyObject *file; - char *encoding = NULL; - char *errors = NULL; + PyObject *global; + PyObject *modules_dict; + PyObject *module; + PyObject *module_name, *global_name; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, - &file, &encoding, &errors)) + if (!PyArg_UnpackTuple(args, "find_class", 2, 2, + &module_name, &global_name)) return NULL; - self = (UnpicklerObject *)type->tp_alloc(type, 0); - if (self == NULL) + modules_dict = PySys_GetObject("modules"); + if (modules_dict == NULL) return NULL; - self->arg = NULL; - self->last_string = NULL; - self->marks = NULL; - self->num_marks = 0; - self->marks_size = 0; - - if (encoding == NULL) - encoding = "ASCII"; - if (errors == NULL) - errors = "strict"; - - self->encoding = strdup(encoding); - self->errors = strdup(errors); - if (self->encoding == NULL || self->errors == NULL) { - PyErr_NoMemory(); - goto error; - } - - self->read = PyObject_GetAttrString(file, "read"); - self->readline = PyObject_GetAttrString(file, "readline"); - if (self->readline == NULL || self->read == NULL) - goto error; - - if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) { - self->pers_func = PyObject_GetAttrString((PyObject *)self, - "persistent_load"); - if (self->pers_func == NULL) - goto error; + module = PyDict_GetItem(modules_dict, module_name); + if (module == NULL) { + module = PyImport_Import(module_name); + if (module == NULL) + return NULL; + global = PyObject_GetAttr(module, global_name); + Py_DECREF(module); } - else { - self->pers_func = NULL; + else { + global = PyObject_GetAttr(module, global_name); } - - self->memo = PyDict_New(); - if (self->memo == NULL) - goto error; - - self->stack = (Pdata *)Pdata_New(); - if (self->stack == NULL) - goto error; - - return (PyObject *)self; - - error: - Py_DECREF(self); - return NULL; + return global; } +static struct PyMethodDef Unpickler_methods[] = { + {"load", (PyCFunction)Unpickler_load, METH_NOARGS, + Unpickler_load_doc}, + {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS, + Unpickler_find_class_doc}, + {NULL, NULL} /* sentinel */ +}; + static void Unpickler_dealloc(UnpicklerObject *self) { @@ -4162,9 +4189,7 @@ Py_XDECREF(self->arg); Py_XDECREF(self->last_string); - if (self->marks) { - PyMem_Free(self->marks); - } + PyMem_Free(self->marks); free(self->encoding); free(self->errors); @@ -4194,11 +4219,107 @@ Py_CLEAR(self->pers_func); Py_CLEAR(self->arg); Py_CLEAR(self->last_string); + + PyMem_Free(self->marks); + self->marks = NULL; + free(self->encoding); + self->encoding = NULL; + free(self->errors); + self->errors = NULL; + + return 0; +} + +PyDoc_STRVAR(Unpickler_doc, +"Unpickler(file, *, encoding='ASCII', errors='strict')" +"\n" +"This takes a binary file for reading a pickle data stream.\n" +"\n" +"The protocol version of the pickle is detected automatically, so no\n" +"proto argument is needed.\n" +"\n" +"The file-like object must have two methods, a read() method\n" +"that takes an integer argument, and a readline() method that\n" +"requires no arguments. Both methods should return bytes.\n" +"Thus file-like object can be a binary file object opened for\n" +"reading, a BytesIO object, or any other custom object that\n" +"meets this interface.\n" +"\n" +"Optional keyword arguments are encoding and errors, which are\n" +"used to decode 8-bit string instances pickled by Python 2.x.\n" +"These default to 'ASCII' and 'strict', respectively.\n"); + +static int +Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"file", "encoding", "errors", 0}; + PyObject *file; + char *encoding = NULL; + char *errors = NULL; + + /* XXX: That is an horrible error message. But, I don't know how to do + better... */ + if (Py_SIZE(args) != 1) { + PyErr_Format(PyExc_TypeError, + "%s takes exactly one positional argument (%zd given)", + Py_TYPE(self)->tp_name, Py_SIZE(args)); + return -1; + } + + /* Arguments parsing needs to be done in the __init__() method to allow + subclasses to define their own __init__() method, which may (or may + not) support Unpickler arguments. However, this means we need to be + extra careful in the other Unpickler methods, since a subclass could + forget to call Unpickler.__init__() thus breaking our internal + invariants. */ + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist, + &file, &encoding, &errors)) + return -1; + + /* In case of multiple __init__() calls, clear previous content. */ + if (self->read != NULL) + (void)Unpickler_clear(self); + + self->read = PyObject_GetAttrString(file, "read"); + self->readline = PyObject_GetAttrString(file, "readline"); + if (self->readline == NULL || self->read == NULL) + return -1; + + if (encoding == NULL) + encoding = "ASCII"; + if (errors == NULL) + errors = "strict"; + + self->encoding = strdup(encoding); + self->errors = strdup(errors); + if (self->encoding == NULL || self->errors == NULL) { + PyErr_NoMemory(); + return -1; + } + + if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) { + self->pers_func = PyObject_GetAttrString((PyObject *)self, + "persistent_load"); + if (self->pers_func == NULL) + return -1; + } + else { + self->pers_func = NULL; + } + + self->stack = (Pdata *)Pdata_New(); + if (self->stack == NULL) + return -1; + + self->memo = PyDict_New(); + if (self->memo == NULL) + return -1; + return 0; } static PyObject * -Unpickler_get_memo(PicklerObject *self) +Unpickler_get_memo(UnpicklerObject *self) { if (self->memo == NULL) PyErr_SetString(PyExc_AttributeError, "memo"); @@ -4208,7 +4329,7 @@ } static int -Unpickler_set_memo(PicklerObject *self, PyObject *value) +Unpickler_set_memo(UnpicklerObject *self, PyObject *value) { PyObject *tmp; @@ -4225,7 +4346,7 @@ tmp = self->memo; Py_INCREF(value); self->memo = value; - Py_DECREF(tmp); + Py_XDECREF(tmp); return 0; } @@ -4272,21 +4393,6 @@ {NULL} }; -/* XXX: Need to be updated for the new decoding arguments. */ -PyDoc_STRVAR(Unpickler_doc, -"Unpickler(file) -> new unpickler object" -"\n" -"This takes a file-like object for reading a pickle data stream.\n" -"\n" -"The protocol version of the pickle is detected automatically, so no\n" -"proto argument is needed.\n" -"\n" -"The file-like object must have two methods, a read() method that\n" -"takes an integer argument, and a readline() method that requires no\n" -"arguments. Both methods should return a string. Thus file-like\n" -"object can be a file object opened for reading, a StringIO object,\n" -"or any other custom object that meets this interface.\n"); - static PyTypeObject Unpickler_Type = { PyVarObject_HEAD_INIT(NULL, 0) "_pickle.Unpickler", /*tp_name*/ @@ -4323,10 +4429,10 @@ 0, /*tp_descr_get*/ 0, /*tp_descr_set*/ 0, /*tp_dictoffset*/ - 0, /*tp_init*/ - 0, /*tp_alloc*/ - Unpickler_new, /*tp_new*/ - 0, /*tp_free*/ + (initproc)Unpickler_init, /*tp_init*/ + PyType_GenericAlloc, /*tp_alloc*/ + PyType_GenericNew, /*tp_new*/ + PyObject_GC_Del, /*tp_free*/ 0, /*tp_is_gc*/ };