diff --git a/Objects/dictobject.c b/Objects/dictobject.c --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -281,29 +281,37 @@ void PyDict_Fini(void) { PyDict_ClearFreeList(); } +static inline Py_ssize_t +index_size(Py_ssize_t size) +{ + if (size <= 0xff) { + return 1; + } + else if (size <= 0xffff) { + return 2; + } +#if SIZEOF_VOID_P > 4 + else if (size <= 0xffffffff) { + return 4; + } +#endif + else { + return sizeof(Py_ssize_t); + } +} + #define DK_SIZE(dk) ((dk)->dk_size) -#if SIZEOF_VOID_P > 4 -#define DK_IXSIZE(dk) \ - (DK_SIZE(dk) <= 0xff ? \ - 1 : DK_SIZE(dk) <= 0xffff ? \ - 2 : DK_SIZE(dk) <= 0xffffffff ? \ - 4 : sizeof(int64_t)) -#else -#define DK_IXSIZE(dk) \ - (DK_SIZE(dk) <= 0xff ? \ - 1 : DK_SIZE(dk) <= 0xffff ? \ - 2 : sizeof(int32_t)) -#endif +#define DK_IXSIZE(dk) (index_size(DK_SIZE(dk))) #define DK_ENTRIES(dk) \ ((PyDictKeyEntry*)(&(dk)->dk_indices.as_1[DK_SIZE(dk) * DK_IXSIZE(dk)])) #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt) #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk) @@ -501,37 +509,22 @@ return 1; } #endif static PyDictKeysObject *new_keys_object(Py_ssize_t size) { - PyDictKeysObject *dk; - Py_ssize_t es, usable; - assert(size >= PyDict_MINSIZE); assert(IS_POWER_OF_2(size)); - usable = USABLE_FRACTION(size); - if (size <= 0xff) { - es = 1; - } - else if (size <= 0xffff) { - es = 2; - } -#if SIZEOF_VOID_P > 4 - else if (size <= 0xffffffff) { - es = 4; - } -#endif - else { - es = sizeof(Py_ssize_t); - } + Py_ssize_t usable = USABLE_FRACTION(size); + Py_ssize_t es = index_size(size); + PyDictKeysObject *dk; if (size == PyDict_MINSIZE && numfreekeys > 0) { dk = keys_free_list[--numfreekeys]; } else { dk = PyObject_MALLOC(sizeof(PyDictKeysObject) - Py_MEMBER_SIZE(PyDictKeysObject, dk_indices) + es * size @@ -1184,126 +1177,128 @@ *value_addr = value; mp->ma_used++; mp->ma_version_tag = DICT_NEXT_VERSION(); assert(_PyDict_CheckConsistency(mp)); return 0; } /* -Internal routine used by dictresize() to insert an item which is -known to be absent from the dict. This routine also assumes that -the dict contains no deleted entries. Besides the performance benefit, -using insertdict() in dictresize() is dangerous (SF bug #1456209). -Note that no refcounts are changed by this routine; if needed, the caller -is responsible for incref'ing `key` and `value`. -Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller -must set them correctly +Internal routine used by dictresize() to insert only an index of an entry into +dk_indices. +Neither k->dk_usable nor k->dk_nentries are modified by this routine; the +caller must set it correctly. */ static void -insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash, - PyObject *value) +insertdict_hash(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t ix) { - size_t i, perturb; - PyDictKeysObject *k = mp->ma_keys; - size_t mask = (size_t)DK_SIZE(k)-1; - PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); - PyDictKeyEntry *ep; - - assert(k->dk_lookup != NULL); - assert(value != NULL); - assert(key != NULL); - assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict); - i = hash & mask; - for (perturb = hash; dk_get_index(k, i) != DKIX_EMPTY; + size_t mask = (size_t)DK_SIZE(k) - 1; + size_t i = hash & mask; + for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY; perturb >>= PERTURB_SHIFT) { i = mask & ((i << 2) + i + perturb + 1); } - ep = &ep0[k->dk_nentries]; - assert(ep->me_value == NULL); - dk_set_index(k, i, k->dk_nentries); - k->dk_nentries++; - ep->me_key = key; - ep->me_hash = hash; - ep->me_value = value; + dk_set_index(k, i, ix); } /* Restructure the table by allocating a new table and reinserting all items again. When entries have been deleted, the new table may actually be smaller than the old one. If a table is split (its keys and hashes are shared, its values are not), then the values are temporarily copied into the table, it is resized as a combined table, then the me_value slots in the old table are NULLed out. After resizing a table is always combined, but can be resplit by make_keys_shared(). */ static int dictresize(PyDictObject *mp, Py_ssize_t minused) { - Py_ssize_t i, newsize; - PyDictKeysObject *oldkeys; - PyObject **oldvalues; - PyDictKeyEntry *ep0; - + Py_ssize_t newsize; /* Find the smallest table size > minused. */ for (newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) ; if (newsize <= 0) { PyErr_NoMemory(); return -1; } - oldkeys = mp->ma_keys; - oldvalues = mp->ma_values; - /* Allocate a new table. */ + + PyDictKeysObject *oldkeys = mp->ma_keys; + + /* NOTE: Current odict checks mp->ma_keys to detect risize happen. + * So we can't reuse oldkeys even if oldkeys->dk_size == newsize. + * TODO: Try reusing oldkeys when reimplement odict. + */ + + // Allocate a new table. mp->ma_keys = new_keys_object(newsize); if (mp->ma_keys == NULL) { mp->ma_keys = oldkeys; return -1; } - if (oldkeys->dk_lookup == lookdict) + if (oldkeys->dk_lookup == lookdict) { mp->ma_keys->dk_lookup = lookdict; + } + + Py_ssize_t num_oldentries = oldkeys->dk_nentries, num_newentries = 0; + PyDictKeyEntry *oldentries = DK_ENTRIES(oldkeys); + PyDictKeyEntry *newentries = DK_ENTRIES(mp->ma_keys); + + PyObject **oldvalues = mp->ma_values; mp->ma_values = NULL; - ep0 = DK_ENTRIES(oldkeys); - /* Main loop below assumes we can transfer refcount to new keys - * and that value is stored in me_value. - * Increment ref-counts and copy values here to compensate - * This (resizing a split table) should be relatively rare */ + if (oldvalues != NULL) { - for (i = 0; i < oldkeys->dk_nentries; i++) { - if (oldvalues[i] != NULL) { - Py_INCREF(ep0[i].me_key); - ep0[i].me_value = oldvalues[i]; - } + /* Convert split table into new combined table. + * We must incref keys; we can transfer values. + * Note that values of split table is always dense. + */ + for (Py_ssize_t i = 0; i < mp->ma_used; i++) { + assert(oldvalues[i] != NULL); + PyDictKeyEntry *ep = &oldentries[i]; + + Py_INCREF(ep->me_key); + newentries[i].me_key = ep->me_key; + newentries[i].me_hash = ep->me_hash; + newentries[i].me_value = oldvalues[i]; + + insertdict_hash(mp->ma_keys, ep->me_hash, i); } - } - /* Main loop */ - for (i = 0; i < oldkeys->dk_nentries; i++) { - PyDictKeyEntry *ep = &ep0[i]; - if (ep->me_value != NULL) { - insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value); - } - } - mp->ma_keys->dk_usable -= mp->ma_used; - if (oldvalues != NULL) { - /* NULL out me_value slot in oldkeys, in case it was shared */ - for (i = 0; i < oldkeys->dk_nentries; i++) - ep0[i].me_value = NULL; + num_newentries = mp->ma_used; + DK_DECREF(oldkeys); if (oldvalues != empty_values) { free_values(oldvalues); } } - else { + else { // combined table. + for (Py_ssize_t i = 0; i < num_oldentries; i++) { + PyDictKeyEntry *ep = &oldentries[i]; + if (ep->me_value != NULL) { + newentries[num_newentries] = *ep; + insertdict_hash(mp->ma_keys, ep->me_hash, num_newentries); + num_newentries++; + } + } + assert(num_newentries == mp->ma_used); assert(oldkeys->dk_lookup != lookdict_split); assert(oldkeys->dk_refcnt == 1); - DK_DEBUG_DECREF PyObject_FREE(oldkeys); + + if (oldkeys->dk_size == PyDict_MINSIZE && + numfreekeys < PyDict_MAXFREELIST) { + DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys; + } + else { + DK_DEBUG_DECREF PyObject_FREE(oldkeys); + } } + + mp->ma_keys->dk_usable -= mp->ma_used; + mp->ma_keys->dk_nentries = num_newentries; return 0; } /* Returns NULL if unable to split table. * A NULL return does not necessarily indicate an error */ static PyDictKeysObject * make_keys_shared(PyObject *op) {