diff -r 34fb29694c76 Objects/dictobject.c --- a/Objects/dictobject.c Wed Jan 25 23:35:46 2017 -0800 +++ b/Objects/dictobject.c Fri Jan 27 03:39:47 2017 +0900 @@ -631,29 +631,20 @@ PyDict_New(void) static Py_ssize_t lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index) { - size_t i; size_t mask = DK_MASK(k); - Py_ssize_t ix; - - i = (size_t)hash & mask; - ix = dk_get_index(k, i); - if (ix == index) { - return i; - } - if (ix == DKIX_EMPTY) { - return DKIX_EMPTY; - } - - for (size_t perturb = hash;;) { - perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); - ix = dk_get_index(k, i); + size_t perturb = (size_t)hash; + size_t i = (size_t)hash & mask; + + for (;;) { + Py_ssize_t ix = dk_get_index(k, i); if (ix == index) { return i; } if (ix == DKIX_EMPTY) { return DKIX_EMPTY; } + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); } assert(0); /* NOT REACHED */ return DKIX_ERROR; @@ -680,16 +671,16 @@ lookdict_unicode() below is specialized never raise an exception; that function can never return DKIX_ERROR. lookdict_unicode_nodummy is further specialized for string keys that cannot be the value. -For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns -where the key index should be inserted. +For both, when the key isn't found a DKIX_EMPTY is returned. + +hashpos returns the position of dk_indices, when key is found. +You must not use it when return value is <0. */ static Py_ssize_t _Py_HOT_FUNCTION lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr, Py_ssize_t *hashpos) { - size_t i, mask; - Py_ssize_t ix, freeslot; - int cmp; + size_t i, mask, perturb; PyDictKeysObject *dk; PyDictKeyEntry *ep0, *ep; PyObject *startkey; @@ -698,100 +689,49 @@ top: dk = mp->ma_keys; mask = DK_MASK(dk); ep0 = DK_ENTRIES(dk); + perturb = (size_t)hash; i = (size_t)hash & mask; - ix = dk_get_index(dk, i); - if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = NULL; - return DKIX_EMPTY; - } - if (ix == DKIX_DUMMY) { - freeslot = i; - } - else { - ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key) { - *value_addr = ep->me_value; - if (hashpos != NULL) - *hashpos = i; - return ix; + for (;;) { + Py_ssize_t ix = dk_get_index(dk, i); + if (ix == DKIX_EMPTY) { + *value_addr = NULL; + return DKIX_EMPTY; } - if (ep->me_hash == hash) { - startkey = ep->me_key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - *value_addr = NULL; - return DKIX_ERROR; + if (ix >= 0) { + ep = &ep0[ix]; + assert(ep->me_key != NULL); + if (ep->me_key == key) { + *value_addr = ep->me_value; + if (hashpos != NULL) + *hashpos = (Py_ssize_t)i; + return ix; } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - *value_addr = ep->me_value; - if (hashpos != NULL) - *hashpos = i; - return ix; + if (ep->me_hash == hash) { + startkey = ep->me_key; + Py_INCREF(startkey); + int cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); + Py_DECREF(startkey); + if (cmp < 0) { + *value_addr = NULL; + return DKIX_ERROR; } - } - else { - /* The dict was mutated, restart */ - goto top; + if (dk == mp->ma_keys && ep->me_key == startkey) { + if (cmp > 0) { + *value_addr = ep->me_value; + if (hashpos != NULL) + *hashpos = (Py_ssize_t)i; + return ix; + } + } + else { + /* The dict was mutated, restart */ + goto top; + } } } - freeslot = -1; - } - - for (size_t perturb = hash;;) { perturb >>= PERTURB_SHIFT; - i = ((i << 2) + i + perturb + 1) & mask; - ix = dk_get_index(dk, i); - if (ix == DKIX_EMPTY) { - if (hashpos != NULL) { - *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot; - } - *value_addr = NULL; - return ix; - } - if (ix == DKIX_DUMMY) { - if (freeslot == -1) - freeslot = i; - continue; - } - ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key) { - if (hashpos != NULL) { - *hashpos = i; - } - *value_addr = ep->me_value; - return ix; - } - if (ep->me_hash == hash) { - startkey = ep->me_key; - Py_INCREF(startkey); - cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); - Py_DECREF(startkey); - if (cmp < 0) { - *value_addr = NULL; - return DKIX_ERROR; - } - if (dk == mp->ma_keys && ep->me_key == startkey) { - if (cmp > 0) { - if (hashpos != NULL) { - *hashpos = i; - } - *value_addr = ep->me_value; - return ix; - } - } - else { - /* The dict was mutated, restart */ - goto top; - } - } + i = (i*5 + perturb + 1) & mask; } assert(0); /* NOT REACHED */ return 0; @@ -802,11 +742,6 @@ static Py_ssize_t _Py_HOT_FUNCTION lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr, Py_ssize_t *hashpos) { - size_t i; - size_t mask = DK_MASK(mp->ma_keys); - Py_ssize_t ix, freeslot; - PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); - assert(mp->ma_values == NULL); /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -816,56 +751,32 @@ lookdict_unicode(PyDictObject *mp, PyObj mp->ma_keys->dk_lookup = lookdict; return lookdict(mp, key, hash, value_addr, hashpos); } - i = (size_t)hash & mask; - ix = dk_get_index(mp->ma_keys, i); - if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = NULL; - return DKIX_EMPTY; - } - if (ix == DKIX_DUMMY) { - freeslot = i; - } - else { - ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key - || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = ep->me_value; - return ix; - } - freeslot = -1; - } - - for (size_t perturb = hash;;) { - perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); - ix = dk_get_index(mp->ma_keys, i); + + size_t mask = DK_MASK(mp->ma_keys); + PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); + size_t perturb = hash; + size_t i = (size_t)hash & mask; + + for (;;) { + Py_ssize_t ix = dk_get_index(mp->ma_keys, i); if (ix == DKIX_EMPTY) { - if (hashpos != NULL) { - *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot; - } *value_addr = NULL; return DKIX_EMPTY; } - if (ix == DKIX_DUMMY) { - if (freeslot == -1) - freeslot = i; - continue; + if (ix >= 0) { + PyDictKeyEntry *ep = &ep0[ix]; + assert(ep->me_key != NULL); + if (ep->me_key == key + || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { + if (hashpos != NULL) + *hashpos = i; + *value_addr = ep->me_value; + return ix; + } } - ep = &ep0[ix]; - assert(ep->me_key != NULL); - if (ep->me_key == key - || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - *value_addr = ep->me_value; - if (hashpos != NULL) { - *hashpos = i; - } - return ix; - } + + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); } assert(0); /* NOT REACHED */ return 0; @@ -878,11 +789,6 @@ lookdict_unicode_nodummy(PyDictObject *m Py_hash_t hash, PyObject **value_addr, Py_ssize_t *hashpos) { - size_t i; - size_t mask = DK_MASK(mp->ma_keys); - Py_ssize_t ix; - PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); - assert(mp->ma_values == NULL); /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass @@ -892,38 +798,24 @@ lookdict_unicode_nodummy(PyDictObject *m mp->ma_keys->dk_lookup = lookdict; return lookdict(mp, key, hash, value_addr, hashpos); } - i = (size_t)hash & mask; - ix = dk_get_index(mp->ma_keys, i); - assert (ix != DKIX_DUMMY); - if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = NULL; - return DKIX_EMPTY; - } - ep = &ep0[ix]; - assert(ep->me_key != NULL); - assert(PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = ep->me_value; - return ix; - } - for (size_t perturb = hash;;) { - perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); - ix = dk_get_index(mp->ma_keys, i); + + PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); + size_t mask = DK_MASK(mp->ma_keys); + size_t i = (size_t)hash & mask; + size_t perturb = hash; + + for (;;) { + Py_ssize_t ix = dk_get_index(mp->ma_keys, i); assert (ix != DKIX_DUMMY); + if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; *value_addr = NULL; return DKIX_EMPTY; } - ep = &ep0[ix]; - assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key)); + + PyDictKeyEntry *ep = &ep0[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); if (ep->me_key == key || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { if (hashpos != NULL) @@ -931,6 +823,9 @@ lookdict_unicode_nodummy(PyDictObject *m *value_addr = ep->me_value; return ix; } + + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); } assert(0); /* NOT REACHED */ return 0; @@ -945,52 +840,34 @@ static Py_ssize_t _Py_HOT_FUNCTION lookdict_split(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr, Py_ssize_t *hashpos) { - size_t i; - size_t mask = DK_MASK(mp->ma_keys); - Py_ssize_t ix; - PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys); - /* mp must split table */ assert(mp->ma_values != NULL); + if (!PyUnicode_CheckExact(key)) { - ix = lookdict(mp, key, hash, value_addr, hashpos); + Py_ssize_t ix = lookdict(mp, key, hash, value_addr, hashpos); if (ix >= 0) { *value_addr = mp->ma_values[ix]; } return ix; } - i = (size_t)hash & mask; - ix = dk_get_index(mp->ma_keys, i); - if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = NULL; - return DKIX_EMPTY; - } - assert(ix >= 0); - ep = &ep0[ix]; - assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key)); - if (ep->me_key == key || - (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { - if (hashpos != NULL) - *hashpos = i; - *value_addr = mp->ma_values[ix]; - return ix; - } - for (size_t perturb = hash;;) { - perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); - ix = dk_get_index(mp->ma_keys, i); + PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); + size_t mask = DK_MASK(mp->ma_keys); + size_t i = (size_t)hash & mask; + size_t perturb = hash; + + for (;;) { + Py_ssize_t ix = dk_get_index(mp->ma_keys, i); + assert (ix != DKIX_DUMMY); + if (ix == DKIX_EMPTY) { - if (hashpos != NULL) - *hashpos = i; *value_addr = NULL; return DKIX_EMPTY; } - assert(ix >= 0); - ep = &ep0[ix]; - assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key)); + + PyDictKeyEntry *ep = &ep0[ix]; + assert(ep->me_key != NULL); + assert(PyUnicode_CheckExact(ep->me_key)); if (ep->me_key == key || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) { if (hashpos != NULL) @@ -998,6 +875,9 @@ lookdict_split(PyDictObject *mp, PyObjec *value_addr = mp->ma_values[ix]; return ix; } + + perturb >>= PERTURB_SHIFT; + i = mask & (i*5 + perturb + 1); } assert(0); /* NOT REACHED */ return 0; @@ -1064,26 +944,24 @@ void _PyObject_GC_UNTRACK(op); } -/* Internal function to find slot for an item from its hash +/* Internal function to find empty or dummy slot for an item from its hash when it is known that the key is not present in the dict. - - The dict must be combined. */ +*/ static Py_ssize_t find_empty_slot(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) { - size_t i; - size_t mask = DK_MASK(keys); - Py_ssize_t ix; - assert(key != NULL); - i = hash & mask; - ix = dk_get_index(keys, i); - for (size_t perturb = hash; ix != DKIX_EMPTY;) { + size_t mask = DK_MASK(keys); + size_t i = hash & mask; + Py_ssize_t ix = dk_get_index(keys, i); + + for (size_t perturb = hash; ix >= 0;) { perturb >>= PERTURB_SHIFT; - i = (i << 2) + i + perturb + 1; + i = i*5 + perturb + 1; ix = dk_get_index(keys, i & mask); } + assert(DK_ENTRIES(keys)[keys->dk_nentries].me_value == NULL); return i & mask; } @@ -1102,6 +980,7 @@ Returns -1 if an error occurred, or 0 on static int insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) { + // NOTE: PyDict_SetDefault() is another function to add entry. PyObject *old_value; PyDictKeyEntry *ep; Py_ssize_t hashpos, ix; @@ -1130,7 +1009,6 @@ insertdict(PyDictObject *mp, PyObject *k Py_DECREF(value); return -1; } - hashpos = find_empty_slot(mp->ma_keys, key, hash); ix = DKIX_EMPTY; } @@ -1143,9 +1021,9 @@ insertdict(PyDictObject *mp, PyObject *k Py_DECREF(value); return -1; } - hashpos = find_empty_slot(mp->ma_keys, key, hash); } ep = &DK_ENTRIES(mp->ma_keys)[mp->ma_keys->dk_nentries]; + hashpos = find_empty_slot(mp->ma_keys, key, hash); dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); Py_INCREF(key); ep->me_key = key; @@ -1197,7 +1075,7 @@ build_indices(PyDictKeysObject *keys, Py size_t i = hash & mask; for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) { perturb >>= PERTURB_SHIFT; - i = mask & ((i << 2) + i + perturb + 1); + i = mask & (i*5 + perturb + 1); } dk_set_index(keys, i, ix); } @@ -2859,7 +2737,6 @@ PyDict_SetDefault(PyObject *d, PyObject if (insertion_resize(mp) < 0) { return NULL; } - hashpos = find_empty_slot(mp->ma_keys, key, hash); ix = DKIX_EMPTY; } @@ -2870,9 +2747,9 @@ PyDict_SetDefault(PyObject *d, PyObject if (insertion_resize(mp) < 0) { return NULL; } - hashpos = find_empty_slot(mp->ma_keys, key, hash); } ep0 = DK_ENTRIES(mp->ma_keys); + hashpos = find_empty_slot(mp->ma_keys, key, hash); ep = &ep0[mp->ma_keys->dk_nentries]; dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); Py_INCREF(key);