diff -r a8bb571b41ab Modules/_pickle.c --- a/Modules/_pickle.c Sat Mar 29 16:14:52 2014 -0600 +++ b/Modules/_pickle.c Sun Mar 30 15:01:10 2014 +0100 @@ -1,4 +1,5 @@ #include "Python.h" +#include "hashtable.h" #include "structmember.h" PyDoc_STRVAR(pickle_module_doc, @@ -513,15 +514,7 @@ } typedef struct { - PyObject *me_key; - Py_ssize_t me_value; -} PyMemoEntry; - -typedef struct { - Py_ssize_t mt_mask; - Py_ssize_t mt_used; - Py_ssize_t mt_allocated; - PyMemoEntry *mt_table; + _Py_hashtable_t *hashtable; } PyMemoTable; typedef struct PicklerObject { @@ -619,10 +612,6 @@ a bunch of unnecessary object creation. This makes a huge performance difference. */ -#define MT_MINSIZE 8 -#define PERTURB_SHIFT 5 - - static PyMemoTable * PyMemoTable_New(void) { @@ -632,45 +621,47 @@ return NULL; } - memo->mt_used = 0; - memo->mt_allocated = MT_MINSIZE; - memo->mt_mask = MT_MINSIZE - 1; - memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry)); - if (memo->mt_table == NULL) { + memo->hashtable = _Py_hashtable_new(sizeof(Py_ssize_t), _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct); + if (memo->hashtable == NULL) { PyMem_FREE(memo); PyErr_NoMemory(); return NULL; } - memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry)); return memo; } +static int +PyMemoTable_incref_entry(_Py_hashtable_entry_t *entry, void *unused) +{ + Py_XINCREF(entry->key); + return 0; +} + +static int +PyMemoTable_decref_entry(_Py_hashtable_entry_t *entry, void *unused) +{ + Py_XDECREF(entry->key); + return 0; +} + static PyMemoTable * PyMemoTable_Copy(PyMemoTable *self) { - Py_ssize_t i; PyMemoTable *new = PyMemoTable_New(); if (new == NULL) return NULL; - new->mt_used = self->mt_used; - new->mt_allocated = self->mt_allocated; - new->mt_mask = self->mt_mask; - /* The table we get from _New() is probably smaller than we wanted. - Free it and allocate one that's the right size. */ - PyMem_FREE(new->mt_table); - new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry)); - if (new->mt_table == NULL) { + new->hashtable = _Py_hashtable_copy(self->hashtable); + + if (new->hashtable == NULL) { PyMem_FREE(new); PyErr_NoMemory(); return NULL; } - for (i = 0; i < self->mt_allocated; i++) { - Py_XINCREF(self->mt_table[i].me_key); - } - memcpy(new->mt_table, self->mt_table, - sizeof(PyMemoEntry) * self->mt_allocated); + + _Py_hashtable_foreach(self->hashtable, PyMemoTable_incref_entry, NULL); return new; } @@ -678,19 +669,15 @@ static Py_ssize_t PyMemoTable_Size(PyMemoTable *self) { - return self->mt_used; + return self->hashtable->entries; } static int PyMemoTable_Clear(PyMemoTable *self) { - Py_ssize_t i = self->mt_allocated; - - while (--i >= 0) { - Py_XDECREF(self->mt_table[i].me_key); - } - self->mt_used = 0; - memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry)); + _Py_hashtable_foreach(self->hashtable, PyMemoTable_decref_entry, NULL); + _Py_hashtable_clear(self->hashtable); + return 0; } @@ -699,137 +686,42 @@ { if (self == NULL) return; + PyMemoTable_Clear(self); - - PyMem_FREE(self->mt_table); + _Py_hashtable_destroy(self->hashtable); PyMem_FREE(self); } -/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup() - can be considerably simpler than dictobject.c's lookdict(). */ -static PyMemoEntry * -_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key) -{ - size_t i; - size_t perturb; - size_t mask = (size_t)self->mt_mask; - PyMemoEntry *table = self->mt_table; - PyMemoEntry *entry; - Py_hash_t hash = (Py_hash_t)key >> 3; - - i = hash & mask; - entry = &table[i]; - if (entry->me_key == NULL || entry->me_key == key) - return entry; - - for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { - i = (i << 2) + i + perturb + 1; - entry = &table[i & mask]; - if (entry->me_key == NULL || entry->me_key == key) - return entry; - } - assert(0); /* Never reached */ - return NULL; -} - -/* Returns -1 on failure, 0 on success. */ -static int -_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size) -{ - PyMemoEntry *oldtable = NULL; - PyMemoEntry *oldentry, *newentry; - Py_ssize_t new_size = MT_MINSIZE; - Py_ssize_t to_process; - - assert(min_size > 0); - - /* Find the smallest valid table size >= min_size. */ - while (new_size < min_size && new_size > 0) - new_size <<= 1; - if (new_size <= 0) { - PyErr_NoMemory(); - return -1; - } - /* new_size needs to be a power of two. */ - assert((new_size & (new_size - 1)) == 0); - - /* Allocate new table. */ - oldtable = self->mt_table; - self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry)); - if (self->mt_table == NULL) { - self->mt_table = oldtable; - PyErr_NoMemory(); - return -1; - } - self->mt_allocated = new_size; - self->mt_mask = new_size - 1; - memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size); - - /* Copy entries from the old table. */ - to_process = self->mt_used; - for (oldentry = oldtable; to_process > 0; oldentry++) { - if (oldentry->me_key != NULL) { - to_process--; - /* newentry is a pointer to a chunk of the new - mt_table, so we're setting the key:value pair - in-place. */ - newentry = _PyMemoTable_Lookup(self, oldentry->me_key); - newentry->me_key = oldentry->me_key; - newentry->me_value = oldentry->me_value; - } - } - - /* Deallocate the old table. */ - PyMem_FREE(oldtable); - return 0; -} - /* Returns NULL on failure, a pointer to the value otherwise. */ static Py_ssize_t * PyMemoTable_Get(PyMemoTable *self, PyObject *key) { - PyMemoEntry *entry = _PyMemoTable_Lookup(self, key); - if (entry->me_key == NULL) + _Py_hashtable_entry_t *entry; + + if ((entry = _Py_hashtable_get_entry(self->hashtable, key))) { + return (Py_ssize_t *)_Py_HASHTABLE_ENTRY_DATA(entry); + } else { return NULL; - return &entry->me_value; + } } /* Returns -1 on failure, 0 on success. */ static int PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) { - PyMemoEntry *entry; - - assert(key != NULL); - - entry = _PyMemoTable_Lookup(self, key); - if (entry->me_key != NULL) { - entry->me_value = value; - return 0; - } - Py_INCREF(key); - entry->me_key = key; - entry->me_value = value; - self->mt_used++; - - /* If we added a key, we can safely resize. Otherwise just return! - * If used >= 2/3 size, adjust size. Normally, this quaduples the size. - * - * Quadrupling the size improves average table sparseness - * (reducing collisions) at the cost of some memory. It also halves - * the number of expensive resize operations in a growing memo table. - * - * Very large memo tables (over 50K items) use doubling instead. - * This may help applications with severe memory constraints. - */ - if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2)) - return 0; - return _PyMemoTable_ResizeTable(self, - (self->mt_used > 50000 ? 2 : 4) * self->mt_used); -} - -#undef MT_MINSIZE -#undef PERTURB_SHIFT + _Py_hashtable_entry_t *entry; + + if ((entry = _Py_hashtable_get_entry(self->hashtable, key))) { + *(Py_ssize_t *)_Py_HASHTABLE_ENTRY_DATA(entry) = value; + } else { + if (_Py_HASHTABLE_SET(self->hashtable, key, value) != 0) { + return -1; + } + Py_INCREF(key); + } + + return 0; +} /*************************************************************************/ @@ -4087,6 +3979,27 @@ Py_RETURN_NONE; } +static int _pickle_PicklerMemoProxy_add_entry(_Py_hashtable_entry_t *entry, void *arg) +{ + PyObject *new_memo = arg; + int status; + PyObject *key, *value; + + key = PyLong_FromVoidPtr((void *)entry->key); + value = Py_BuildValue("nO", *(Py_ssize_t *)_Py_HASHTABLE_ENTRY_DATA(entry), + entry->key); + + if (key == NULL || value == NULL) { + Py_XDECREF(key); + Py_XDECREF(value); + return -1; + } + status = PyDict_SetItem(new_memo, key, value); + Py_DECREF(key); + Py_DECREF(value); + return (status == 0) ? 0 : -1; +} + /*[clinic input] _pickle.PicklerMemoProxy.copy @@ -4097,39 +4010,20 @@ _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self) /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/ { - Py_ssize_t i; PyMemoTable *memo; PyObject *new_memo = PyDict_New(); if (new_memo == NULL) return NULL; memo = self->pickler->memo; - for (i = 0; i < memo->mt_allocated; ++i) { - PyMemoEntry entry = memo->mt_table[i]; - if (entry.me_key != NULL) { - int status; - PyObject *key, *value; - - key = PyLong_FromVoidPtr(entry.me_key); - value = Py_BuildValue("nO", entry.me_value, entry.me_key); - - if (key == NULL || value == NULL) { - Py_XDECREF(key); - Py_XDECREF(value); - goto error; - } - status = PyDict_SetItem(new_memo, key, value); - Py_DECREF(key); - Py_DECREF(value); - if (status < 0) - goto error; - } - } + + if (_Py_hashtable_foreach(memo->hashtable, + _pickle_PicklerMemoProxy_add_entry, new_memo) != 0) { + Py_XDECREF(new_memo); + return NULL; + } + return new_memo; - - error: - Py_XDECREF(new_memo); - return NULL; } /*[clinic input]