diff --git a/Misc/NEWS b/Misc/NEWS --- a/Misc/NEWS +++ b/Misc/NEWS @@ -5,16 +5,18 @@ What's New in Python 3.6.0 beta 2 ================================= *Release date: XXXX-XX-XX* Core and Builtins ----------------- +- Issue #28183: Optimize and cleanup dict iteration. + - Issue #21578: Fixed misleading error message when ImportError called with invalid keyword args. - Issue #28203: Fix incorrect type in complex(1.0, {2:3}) error message. Patch by Soumya Sharma. - Issue #28086: Single var-positional argument of tuple subtype was passed unscathed to the C-defined function. Now it is converted to exact tuple. diff --git a/Objects/dictobject.c b/Objects/dictobject.c --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -993,25 +993,29 @@ } assert(0); /* NOT REACHED */ return 0; } int _PyDict_HasOnlyStringKeys(PyObject *dict) { + assert(PyDict_Check(dict)); + // Shortcut + if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict) + return 1; + Py_ssize_t pos = 0; PyObject *key, *value; - assert(PyDict_Check(dict)); - /* Shortcut */ - if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict) - return 1; - while (PyDict_Next(dict, &pos, &key, &value)) - if (!PyUnicode_Check(key)) + Py_hash_t hash; + while (_PyDict_Next(dict, &pos, &key, &value, &hash)) { + if (!PyUnicode_Check(key)) { return 0; + } + } return 1; } #define MAINTAIN_TRACKING(mp, key, value) \ do { \ if (!_PyObject_GC_IS_TRACKED(mp)) { \ if (_PyObject_GC_MAY_BE_TRACKED(key) || \ _PyObject_GC_MAY_BE_TRACKED(value)) { \ @@ -1679,103 +1683,112 @@ } else { assert(oldkeys->dk_refcnt == 1); DK_DECREF(oldkeys); } assert(_PyDict_CheckConsistency(mp)); } -/* Returns -1 if no more items (or op is not a dict), - * index of item otherwise. Stores value in pvalue +/* Internal version of PyDict_Next that returns a hash value in addition + * to the key and value. + * + * Return 1 on success, return 0 when the reached the end of the dictionary. + * Returned &key and &value are borrowed reference. */ -static inline Py_ssize_t -dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue) +int +_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, + PyObject **pvalue, Py_hash_t *phash) { - Py_ssize_t n; - PyDictObject *mp; - PyObject **value_ptr = NULL; - - if (!PyDict_Check(op)) - return -1; - mp = (PyDictObject *)op; - if (i < 0) - return -1; - - n = mp->ma_keys->dk_nentries; - if (mp->ma_values) { - for (; i < n; i++) { - value_ptr = &mp->ma_values[i]; - if (*value_ptr != NULL) - break; + assert(PyDict_Check(op)); + assert(ppos != NULL); + assert(pkey != NULL); + assert(pvalue != NULL); + assert(phash != NULL); + + PyDictObject *mp = (PyDictObject *)op; + Py_ssize_t i = *ppos; + if (i < 0) { + return 0; + } + + Py_ssize_t n = mp->ma_keys->dk_nentries; + PyDictKeyEntry *entry_ptr; + PyObject *value; + + if (_PyDict_HasSplitTable(mp)) { + PyObject **value_ptr = &mp->ma_values[i]; + while (i < n && *value_ptr == NULL) { + value_ptr++; + i++; } + if (i >= n) { + return 0; + } + entry_ptr = &DK_ENTRIES(mp->ma_keys)[i]; + value = *value_ptr; } else { - PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); - for (; i < n; i++) { - value_ptr = &ep0[i].me_value; - if (*value_ptr != NULL) - break; + entry_ptr = &DK_ENTRIES(mp->ma_keys)[i]; + while (i < n && entry_ptr->me_value == NULL) { + entry_ptr++; + i++; } + if (i >= n) { + return 0; + } + value = entry_ptr->me_value; } - if (i >= n) - return -1; - if (pvalue) - *pvalue = *value_ptr; - return i; + + *ppos = i+1; + *pkey = entry_ptr->me_key; + *phash = entry_ptr->me_hash; + *pvalue = value; + return 1; } /* * Iterate over a dict. Use like so: * * Py_ssize_t i; * PyObject *key, *value; * i = 0; # important! i should not otherwise be changed by you * while (PyDict_Next(yourdict, &i, &key, &value)) { - * Refer to borrowed references in key and value. + * Refer to borrowed references in key and value. * } * + * Return 1 on success, return 0 when the reached the end of the dictionary. + * Returned &key and &value are borrowed reference. + * + * This function doesn't raise TypeError. When op is not dict, returns 0 on + * normal Python build, and C level assert fails on debug build. + * * CAUTION: In general, it isn't safe to use PyDict_Next in a loop that * mutates the dict. One exception: it is safe if the loop merely changes * the values associated with the keys (but doesn't insert new keys or * delete keys), via PyDict_SetItem(). */ int PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) { - PyDictObject *mp = (PyDictObject*)op; - Py_ssize_t i = dict_next(op, *ppos, pvalue); - if (i < 0) + if (!PyDict_Check(op)) { + assert(0); return 0; - mp = (PyDictObject *)op; - *ppos = i+1; - if (pkey) - *pkey = DK_ENTRIES(mp->ma_keys)[i].me_key; - return 1; -} - -/* Internal version of PyDict_Next that returns a hash value in addition - * to the key and value. - */ -int -_PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, - PyObject **pvalue, Py_hash_t *phash) -{ - PyDictObject *mp; - PyDictKeyEntry *ep0; - Py_ssize_t i = dict_next(op, *ppos, pvalue); - if (i < 0) - return 0; - mp = (PyDictObject *)op; - ep0 = DK_ENTRIES(mp->ma_keys); - *ppos = i+1; - *phash = ep0[i].me_hash; - if (pkey) - *pkey = ep0[i].me_key; - return 1; + } + + PyObject *key, *value; + Py_hash_t hash; + if (pkey == NULL) { + pkey = &key; + } + if (pvalue == NULL) { + pvalue = &value; + } + + return _PyDict_Next(op, ppos, pkey, pvalue, &hash); } /* Internal version of dict.pop(). */ PyObject * _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt) { Py_hash_t hash; Py_ssize_t ix, hashpos; @@ -1978,17 +1991,18 @@ if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0) goto error; /* Do repr() on each key+value pair, and insert ": " between them. Note that repr may mutate the dict. */ i = 0; first = 1; - while (PyDict_Next((PyObject *)mp, &i, &key, &value)) { + Py_hash_t hash; + while (_PyDict_Next((PyObject *)mp, &i, &key, &value, &hash)) { PyObject *s; int res; /* Prevent repr from deleting key or value during key format. */ Py_INCREF(key); Py_INCREF(value); if (!first) { @@ -3309,62 +3323,66 @@ static PyObject * dictiter_reduce(dictiterobject *di); PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); static PyMethodDef dictiter_methods[] = { {"__length_hint__", (PyCFunction)dictiter_len, METH_NOARGS, length_hint_doc}, - {"__reduce__", (PyCFunction)dictiter_reduce, METH_NOARGS, - reduce_doc}, - {NULL, NULL} /* sentinel */ + {"__reduce__", (PyCFunction)dictiter_reduce, METH_NOARGS, reduce_doc}, + {NULL, NULL} // sentinel }; -static PyObject *dictiter_iternextkey(dictiterobject *di) +static PyObject* +dictiter_iternextkey(dictiterobject *di) { - PyObject *key; - Py_ssize_t i, n, offset; - PyDictKeysObject *k; PyDictObject *d = di->di_dict; - PyObject **value_ptr; - - if (d == NULL) + if (d == NULL) { return NULL; + } assert (PyDict_Check(d)); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); di->di_used = -1; /* Make this state sticky */ return NULL; } - i = di->di_pos; - if (i < 0) - goto fail; - k = d->ma_keys; - if (d->ma_values) { - value_ptr = &d->ma_values[i]; - offset = sizeof(PyObject *); + PyObject *key; + Py_ssize_t i = di->di_pos; + PyDictKeysObject *k = d->ma_keys; + Py_ssize_t n = k->dk_nentries; + + if (_PyDict_HasSplitTable(d)) { + PyObject **value_ptr = &d->ma_values[i]; + while (i < n && *value_ptr == NULL) { + value_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + key = DK_ENTRIES(k)[i].me_key; } else { - value_ptr = &DK_ENTRIES(k)[i].me_value; - offset = sizeof(PyDictKeyEntry); + PyDictKeyEntry *entry_ptr = &DK_ENTRIES(k)[i]; + while (i < n && entry_ptr->me_value == NULL) { + entry_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + key = entry_ptr->me_key; } - n = k->dk_nentries - 1; - while (i <= n && *value_ptr == NULL) { - value_ptr = (PyObject **)(((char *)value_ptr) + offset); - i++; - } + di->di_pos = i+1; - if (i > n) - goto fail; di->len--; - key = DK_ENTRIES(k)[i].me_key; Py_INCREF(key); return key; fail: di->di_dict = NULL; Py_DECREF(d); return NULL; } @@ -3397,55 +3415,60 @@ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dictiter_iternextkey, /* tp_iternext */ dictiter_methods, /* tp_methods */ 0, }; -static PyObject *dictiter_iternextvalue(dictiterobject *di) +static PyObject * +dictiter_iternextvalue(dictiterobject *di) { - PyObject *value; - Py_ssize_t i, n, offset; PyDictObject *d = di->di_dict; - PyObject **value_ptr; - if (d == NULL) return NULL; assert (PyDict_Check(d)); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); di->di_used = -1; /* Make this state sticky */ return NULL; } - i = di->di_pos; - n = d->ma_keys->dk_nentries - 1; - if (i < 0 || i > n) - goto fail; - if (d->ma_values) { - value_ptr = &d->ma_values[i]; - offset = sizeof(PyObject *); + Py_ssize_t i = di->di_pos; + Py_ssize_t n = d->ma_keys->dk_nentries; + PyObject *value; + + if (_PyDict_HasSplitTable(d)) { + PyObject **value_ptr = &d->ma_values[i]; + while (i < n && *value_ptr == NULL) { + value_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + value = *value_ptr; } else { - value_ptr = &DK_ENTRIES(d->ma_keys)[i].me_value; - offset = sizeof(PyDictKeyEntry); + PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i]; + while (i < n && entry_ptr->me_value == NULL) { + entry_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + value = entry_ptr->me_value; } - while (i <= n && *value_ptr == NULL) { - value_ptr = (PyObject **)(((char *)value_ptr) + offset); - i++; - if (i > n) - goto fail; - } + di->di_pos = i+1; di->len--; - value = *value_ptr; Py_INCREF(value); return value; fail: di->di_dict = NULL; Py_DECREF(d); return NULL; } @@ -3466,78 +3489,86 @@ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ PyObject_GenericGetAttr, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 0, /* tp_doc */ (traverseproc)dictiter_traverse, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ PyObject_SelfIter, /* tp_iter */ (iternextfunc)dictiter_iternextvalue, /* tp_iternext */ dictiter_methods, /* tp_methods */ 0, }; -static PyObject *dictiter_iternextitem(dictiterobject *di) +static PyObject * +dictiter_iternextitem(dictiterobject *di) { - PyObject *key, *value, *result = di->di_result; - Py_ssize_t i, n, offset; PyDictObject *d = di->di_dict; - PyObject **value_ptr; - - if (d == NULL) + if (d == NULL) { return NULL; + } assert (PyDict_Check(d)); if (di->di_used != d->ma_used) { PyErr_SetString(PyExc_RuntimeError, "dictionary changed size during iteration"); di->di_used = -1; /* Make this state sticky */ return NULL; } - i = di->di_pos; - if (i < 0) - goto fail; - n = d->ma_keys->dk_nentries - 1; - if (d->ma_values) { - value_ptr = &d->ma_values[i]; - offset = sizeof(PyObject *); + Py_ssize_t i = di->di_pos; + Py_ssize_t n = d->ma_keys->dk_nentries; + PyObject *key, *value, *result = di->di_result; + + if (_PyDict_HasSplitTable(d)) { + PyObject **value_ptr = &d->ma_values[i]; + while (i < n && *value_ptr == NULL) { + value_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + key = DK_ENTRIES(d->ma_keys)[i].me_key; + value = *value_ptr; } else { - value_ptr = &DK_ENTRIES(d->ma_keys)[i].me_value; - offset = sizeof(PyDictKeyEntry); + PyDictKeyEntry *entry_ptr = &DK_ENTRIES(d->ma_keys)[i]; + while (i < n && entry_ptr->me_value == NULL) { + entry_ptr++; + i++; + } + if (i >= n) { + goto fail; + } + key = entry_ptr->me_key; + value = entry_ptr->me_value; } - while (i <= n && *value_ptr == NULL) { - value_ptr = (PyObject **)(((char *)value_ptr) + offset); - i++; - } + di->di_pos = i+1; - if (i > n) - goto fail; - + di->len--; if (result->ob_refcnt == 1) { Py_INCREF(result); Py_DECREF(PyTuple_GET_ITEM(result, 0)); Py_DECREF(PyTuple_GET_ITEM(result, 1)); - } else { + } + else { result = PyTuple_New(2); - if (result == NULL) + if (result == NULL) { return NULL; + } } - di->len--; - key = DK_ENTRIES(d->ma_keys)[i].me_key; - value = *value_ptr; Py_INCREF(key); Py_INCREF(value); PyTuple_SET_ITEM(result, 0, key); /* steals reference */ PyTuple_SET_ITEM(result, 1, value); /* steals reference */ return result; fail: di->di_dict = NULL;