diff -r 3e02d70cd07b Modules/_pickle.c --- a/Modules/_pickle.c Thu Apr 18 01:44:27 2013 +0200 +++ b/Modules/_pickle.c Thu Apr 18 02:37:35 2013 -0700 @@ -380,6 +380,9 @@ } UnpicklerObject; /* Forward declarations */ +typedef int (*save_func_t)(PicklerObject *, PyObject *); +static int save_with_type_caching(PicklerObject *, PyObject *, int, + PyTypeObject **, save_func_t *); static int save(PicklerObject *, PyObject *, int); static int save_reduce(PicklerObject *, PyObject *, PyObject *); static PyTypeObject Pickler_Type; @@ -489,6 +492,8 @@ PyMemoEntry *entry; Py_hash_t hash = (Py_hash_t)key >> 3; + assert(key != NULL); + i = hash & mask; entry = &table[i]; if (entry->me_key == NULL || entry->me_key == key) @@ -1282,6 +1287,8 @@ if (self->fast) return 0; + assert(!PyMemoTable_Get(self->memo, obj)); + x = PyMemoTable_Size(self->memo); if (PyMemoTable_Set(self->memo, obj, x) < 0) goto error; @@ -1707,6 +1714,15 @@ static int save_bytes(PicklerObject *self, PyObject *obj) { + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + if (self->proto < 3) { /* Older pickle protocols do not have an opcode for pickling bytes objects. Therefore, we need to fake the copy protocol (i.e., @@ -1932,6 +1948,15 @@ static int save_unicode(PicklerObject *self, PyObject *obj) { + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + if (self->bin) { if (write_unicode_binary(self, obj) < 0) return -1; @@ -2003,6 +2028,15 @@ const char pop_mark_op = POP_MARK; const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3}; + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + if ((len = PyTuple_Size(obj)) < 0) return -1; @@ -2219,6 +2253,8 @@ { PyObject *item = NULL; Py_ssize_t this_batch, total; + save_func_t save_func = NULL; + PyTypeObject *cached_type = NULL; const char append_op = APPEND; const char appends_op = APPENDS; @@ -2245,8 +2281,22 @@ return -1; while (total < PyList_GET_SIZE(obj)) { item = PyList_GET_ITEM(obj, total); - if (save(self, item, 0) < 0) + + /* If the type of this item is the same as the previous item, + reuse the pickling function that was used for the previous + item. This saves us the dispatch cost in the save() function, + which can be significant in this hot loop. We disable this + optimization if the persistent IDs feature of pickle is being + used. */ + if (Py_TYPE(item) == cached_type && !self->pers_func) { + if (save_func(self, item) < 0) + return -1; + } + else if (save_with_type_caching(self, item, 0, + &cached_type, + &save_func) < 0) { return -1; + } total++; if (++this_batch == BATCHSIZE) break; @@ -2269,6 +2319,15 @@ if (self->fast && !fast_save_enter(self, obj)) goto error; + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + /* Create an empty list. */ if (self->bin) { header[0] = EMPTY_LIST; @@ -2470,6 +2529,8 @@ PyObject *key = NULL, *value = NULL; int i; Py_ssize_t dict_size, ppos = 0; + save_func_t key_save_func = NULL, value_save_func = NULL; + PyTypeObject *key_cached_type = NULL, *value_cached_type = NULL; const char mark_op = MARK; const char setitem_op = SETITEM; @@ -2497,14 +2558,36 @@ i = 0; if (_Pickler_Write(self, &mark_op, 1) < 0) return -1; + while (PyDict_Next(obj, &ppos, &key, &value)) { - if (save(self, key, 0) < 0) + /* If the type of this item is the same as the previous item, + reuse the pickling function that was used for the previous + item. This saves us the dispatch cost in the save() function, + which can be significant in this hot loop. We disable this + optimization if the persistent IDs feature of pickle is being + used. */ + if (Py_TYPE(key) == key_cached_type && !self->pers_func) { + if (key_save_func(self, key) < 0) + return -1; + } + else if (save_with_type_caching(self, key, 0, + &key_cached_type, + &key_save_func) < 0) { return -1; - if (save(self, value, 0) < 0) + } + if (Py_TYPE(value) == value_cached_type && !self->pers_func) { + if (value_save_func(self, value) < 0) + return -1; + } + else if (save_with_type_caching(self, value, 0, + &value_cached_type, + &value_save_func) < 0) { return -1; + } if (++i == BATCHSIZE) break; } + if (_Pickler_Write(self, &setitems_op, 1) < 0) return -1; if (PyDict_Size(obj) != dict_size) { @@ -2529,6 +2612,15 @@ if (self->fast && !fast_save_enter(self, obj)) goto error; + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + /* Create an empty dict. */ if (self->bin) { header[0] = EMPTY_DICT; @@ -2602,6 +2694,15 @@ const char global_op = GLOBAL; + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + if (name_str == NULL) { name_str = PyUnicode_InternFromString("__name__"); if (name_str == NULL) @@ -2964,6 +3065,15 @@ const char build_op = BUILD; const char newobj_op = NEWOBJ; + /* Check the memo to see if it has the object. If so, generate + a GET (or BINGET) opcode, instead of pickling the object + once again. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + size = PyTuple_Size(args); if (size < 2 || size > 5) { PyErr_SetString(PicklingError, "tuple returned by " @@ -3137,7 +3247,8 @@ } static int -save(PicklerObject *self, PyObject *obj, int pers_save) +save_with_type_caching(PicklerObject *self, PyObject *obj, int pers_save, + PyTypeObject **cached_type, save_func_t *save_func) { PyTypeObject *type; PyObject *reduce_func = NULL; @@ -3166,8 +3277,6 @@ since benchmarks shown that this optimization was actually slowing things down. */ - /* Atom types; these aren't memoized, so don't check the memo. */ - if (obj == Py_None) { status = save_none(self, obj); goto done; @@ -3180,46 +3289,52 @@ status = save_notimplemented(self, obj); goto done; } - else if (obj == Py_False || obj == Py_True) { + else if (type == &PyBool_Type) { status = save_bool(self, obj); + *cached_type = type; + *save_func = save_bool; goto done; } else if (type == &PyLong_Type) { status = save_long(self, obj); + *cached_type = type; + *save_func = save_long; goto done; } else if (type == &PyFloat_Type) { status = save_float(self, obj); + *cached_type = type; + *save_func = save_float; goto done; } - - /* Check the memo to see if it has the object. If so, generate - a GET (or BINGET) opcode, instead of pickling the object - once again. */ - if (PyMemoTable_Get(self->memo, obj)) { - if (memo_get(self, obj) < 0) - goto error; - goto done; - } - - if (type == &PyBytes_Type) { + else if (type == &PyBytes_Type) { status = save_bytes(self, obj); + *cached_type = type; + *save_func = save_bytes; goto done; } else if (type == &PyUnicode_Type) { status = save_unicode(self, obj); + *cached_type = type; + *save_func = save_unicode; goto done; } else if (type == &PyDict_Type) { status = save_dict(self, obj); + *cached_type = type; + *save_func = save_dict; goto done; } else if (type == &PyList_Type) { status = save_list(self, obj); + *cached_type = type; + *save_func = save_list; goto done; } else if (type == &PyTuple_Type) { status = save_tuple(self, obj); + *cached_type = type; + *save_func = save_tuple; goto done; } else if (type == &PyType_Type) { @@ -3241,6 +3356,16 @@ goto done; } + /* We don't really need to check the memo here, since save_global() and + save_reduce() will check it for us. But it is cheap to verify before + going through the reduce protocol again and again for objects which + are already memoized. */ + if (PyMemoTable_Get(self->memo, obj)) { + if (memo_get(self, obj) < 0) + return -1; + return 0; + } + /* XXX: This part needs some unit tests. */ /* Get a reduction callable, and call it. This may come from @@ -3347,6 +3472,16 @@ } static int +save(PicklerObject *self, PyObject *obj, int pers_save) +{ + PyTypeObject *cached_type = NULL; + save_func_t save_func = NULL; + return save_with_type_caching(self, obj, pers_save, + &cached_type, + &save_func); +} + +static int dump(PicklerObject *self, PyObject *obj) { const char stop_op = STOP;