diff --git a/Include/dictobject.h b/Include/dictobject.h --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -88,6 +88,8 @@ PyAPI_FUNC(int) PyDict_DelItem(PyObject #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key, Py_hash_t hash); +PyAPI_FUNC(int) _PyDict_DelItemIf(PyObject *mp, PyObject *key, + int (*predicate)(PyObject *value)); #endif PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); PyAPI_FUNC(int) PyDict_Next( diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -1676,6 +1676,17 @@ class MappingTestCase(TestBase): x = d.pop(10, 10) self.assertIsNot(x, None) # we never put None in there! + def test_threaded_weak_valued_consistency(self): + d = weakref.WeakValueDictionary() + with collect_in_thread(): + for i in range(200000): + #o = None # lose previous ref + o = RefCycle() + d[10] = o + # o is still alive, so the dict can't be empty + self.assertEqual(len(d), 1) + o = None + from test import mapping_tests diff --git a/Lib/weakref.py b/Lib/weakref.py --- a/Lib/weakref.py +++ b/Lib/weakref.py @@ -16,7 +16,8 @@ from _weakref import ( proxy, CallableProxyType, ProxyType, - ReferenceType) + ReferenceType, + _remove_dead_weakref) from _weakrefset import WeakSet, _IterationGuard @@ -111,7 +112,9 @@ class WeakValueDictionary(collections.Mu if self._iterating: self._pending_removals.append(wr.key) else: - del self.data[wr.key] + # Atomic removal is necessary since this function + # can be called asynchronously by the GC + _remove_dead_weakref(d, wr.key) self._remove = remove # A list of keys to be removed self._pending_removals = [] @@ -125,9 +128,12 @@ class WeakValueDictionary(collections.Mu # We shouldn't encounter any KeyError, because this method should # always be called *before* mutating the dict. while l: - del d[l.pop()] + key = l.pop() + _remove_dead_weakref(d, key) def __getitem__(self, key): + if self._pending_removals: + self._commit_removals() o = self.data[key]() if o is None: raise KeyError(key) @@ -140,9 +146,13 @@ class WeakValueDictionary(collections.Mu del self.data[key] def __len__(self): - return len(self.data) - len(self._pending_removals) + if self._pending_removals: + self._commit_removals() + return len(self.data) def __contains__(self, key): + if self._pending_removals: + self._commit_removals() try: o = self.data[key]() except KeyError: @@ -158,6 +168,8 @@ class WeakValueDictionary(collections.Mu self.data[key] = KeyedRef(value, self._remove, key) def copy(self): + if self._pending_removals: + self._commit_removals() new = WeakValueDictionary() for key, wr in self.data.items(): o = wr() @@ -169,6 +181,8 @@ class WeakValueDictionary(collections.Mu def __deepcopy__(self, memo): from copy import deepcopy + if self._pending_removals: + self._commit_removals() new = self.__class__() for key, wr in self.data.items(): o = wr() @@ -177,6 +191,8 @@ class WeakValueDictionary(collections.Mu return new def get(self, key, default=None): + if self._pending_removals: + self._commit_removals() try: wr = self.data[key] except KeyError: @@ -190,6 +206,8 @@ class WeakValueDictionary(collections.Mu return o def items(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for k, wr in self.data.items(): v = wr() @@ -197,6 +215,8 @@ class WeakValueDictionary(collections.Mu yield k, v def keys(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for k, wr in self.data.items(): if wr() is not None: @@ -214,10 +234,14 @@ class WeakValueDictionary(collections.Mu keep the values around longer than needed. """ + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): yield from self.data.values() def values(self): + if self._pending_removals: + self._commit_removals() with _IterationGuard(self): for wr in self.data.values(): obj = wr() @@ -249,13 +273,13 @@ class WeakValueDictionary(collections.Mu return o def setdefault(self, key, default=None): + if self._pending_removals: + self._commit_removals() try: o = self.data[key]() except KeyError: o = None if o is None: - if self._pending_removals: - self._commit_removals() self.data[key] = KeyedRef(default, self._remove, key) return default else: @@ -290,6 +314,8 @@ class WeakValueDictionary(collections.Mu keep the values around longer than needed. """ + if self._pending_removals: + self._commit_removals() return list(self.data.values()) diff --git a/Modules/_weakref.c b/Modules/_weakref.c --- a/Modules/_weakref.c +++ b/Modules/_weakref.c @@ -35,6 +35,47 @@ static Py_ssize_t } +static int +is_dead_weakref(PyObject *value) +{ + if (!PyWeakref_Check(value)) { + PyErr_SetString(PyExc_TypeError, "not a weakref"); + return -1; + } + return PyWeakref_GET_OBJECT(value) == Py_None; +} + +/*[clinic input] + +_weakref._remove_dead_weakref -> object + + dct: object(subclass_of='&PyDict_Type') + key: object + / + +Atomically remove key from dict if it points to a dead weakref. + +[clinic start generated code]*/ + +static PyObject * +_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct, + PyObject *key) +/*[clinic end generated code: output=d9ff53061fcb875c input=db4f47f9ff9d33f9]*/ +{ + if (_PyDict_DelItemIf(dct, key, is_dead_weakref) < 0) { + if (PyErr_ExceptionMatches(PyExc_KeyError)) + /* This function is meant to allow safe weak-value dicts + with GC in another thread (see issue #28427), so it's + ok if the key doesn't exist anymore. + */ + PyErr_Clear(); + else + return NULL; + } + Py_RETURN_NONE; +} + + PyDoc_STRVAR(weakref_getweakrefs__doc__, "getweakrefs(object) -- return a list of all weak reference objects\n" "that point to 'object'."); @@ -88,6 +129,7 @@ weakref_proxy(PyObject *self, PyObject * static PyMethodDef weakref_functions[] = { _WEAKREF_GETWEAKREFCOUNT_METHODDEF + _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF {"getweakrefs", weakref_getweakrefs, METH_O, weakref_getweakrefs__doc__}, {"proxy", weakref_proxy, METH_VARARGS, diff --git a/Modules/clinic/_weakref.c.h b/Modules/clinic/_weakref.c.h --- a/Modules/clinic/_weakref.c.h +++ b/Modules/clinic/_weakref.c.h @@ -29,4 +29,34 @@ static PyObject * exit: return return_value; } -/*[clinic end generated code: output=e1ad587147323e19 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_weakref__remove_dead_weakref__doc__, +"_remove_dead_weakref($module, dct, key, /)\n" +"--\n" +"\n" +"Atomically remove key from dict if it points to a dead weakref."); + +#define _WEAKREF__REMOVE_DEAD_WEAKREF_METHODDEF \ + {"_remove_dead_weakref", (PyCFunction)_weakref__remove_dead_weakref, METH_VARARGS, _weakref__remove_dead_weakref__doc__}, + +static PyObject * +_weakref__remove_dead_weakref_impl(PyObject *module, PyObject *dct, + PyObject *key); + +static PyObject * +_weakref__remove_dead_weakref(PyObject *module, PyObject *args) +{ + PyObject *return_value = NULL; + PyObject *dct; + PyObject *key; + + if (!PyArg_ParseTuple(args, "O!O:_remove_dead_weakref", + &PyDict_Type, &dct, &key)) { + goto exit; + } + return_value = _weakref__remove_dead_weakref_impl(module, dct, key); + +exit: + return return_value; +} +/*[clinic end generated code: output=e860dd818a44bc9b input=a9049054013a1b77]*/ diff --git a/Objects/dictobject.c b/Objects/dictobject.c --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1591,11 +1591,34 @@ int return insertdict(mp, key, hash, value); } +static int +delitem_common(PyDictObject *mp, Py_ssize_t hashpos, Py_ssize_t ix, + PyObject **value_addr) +{ + PyObject *old_key, *old_value; + PyDictKeyEntry *ep; + + old_value = *value_addr; + assert(old_value != NULL); + *value_addr = NULL; + mp->ma_used--; + mp->ma_version_tag = DICT_NEXT_VERSION(); + ep = &DK_ENTRIES(mp->ma_keys)[ix]; + dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); + ENSURE_ALLOWS_DELETIONS(mp); + old_key = ep->me_key; + ep->me_key = NULL; + Py_DECREF(old_key); + Py_DECREF(old_value); + + assert(_PyDict_CheckConsistency(mp)); + return 0; +} + int PyDict_DelItem(PyObject *op, PyObject *key) { Py_hash_t hash; - assert(key); if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -1612,8 +1635,6 @@ int { Py_ssize_t hashpos, ix; PyDictObject *mp; - PyDictKeyEntry *ep; - PyObject *old_key, *old_value; PyObject **value_addr; if (!PyDict_Check(op)) { @@ -1640,24 +1661,60 @@ int ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos); assert(ix >= 0); } - - old_value = *value_addr; - assert(old_value != NULL); - *value_addr = NULL; - mp->ma_used--; - mp->ma_version_tag = DICT_NEXT_VERSION(); - ep = &DK_ENTRIES(mp->ma_keys)[ix]; - dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); - ENSURE_ALLOWS_DELETIONS(mp); - old_key = ep->me_key; - ep->me_key = NULL; - Py_DECREF(old_key); - Py_DECREF(old_value); - - assert(_PyDict_CheckConsistency(mp)); - return 0; + return delitem_common(mp, hashpos, ix, value_addr); } +/* This function promises that the predicate -> deletion sequence is atomic + * (i.e. protected by the GIL), assuming the predicate itself doesn't + * release the GIL. + */ +int +_PyDict_DelItemIf(PyObject *op, PyObject *key, + int (*predicate)(PyObject *value)) +{ + Py_ssize_t hashpos, ix; + PyDictObject *mp; + Py_hash_t hash; + PyObject **value_addr; + int res; + + if (!PyDict_Check(op)) { + PyErr_BadInternalCall(); + return -1; + } + assert(key); + hash = PyObject_Hash(key); + if (hash == -1) + return -1; + mp = (PyDictObject *)op; + ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos); + if (ix == DKIX_ERROR) + return -1; + if (ix == DKIX_EMPTY || *value_addr == NULL) { + _PyErr_SetKeyError(key); + return -1; + } + assert(dk_get_index(mp->ma_keys, hashpos) == ix); + + // Split table doesn't allow deletion. Combine it. + if (_PyDict_HasSplitTable(mp)) { + if (dictresize(mp, DK_SIZE(mp->ma_keys))) { + return -1; + } + ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos); + assert(ix >= 0); + } + + res = predicate(*value_addr); + if (res == -1) + return -1; + if (res > 0) + return delitem_common(mp, hashpos, ix, value_addr); + else + return 0; +} + + void PyDict_Clear(PyObject *op) {