Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 46986) +++ Python/pythonrun.c (working copy) @@ -451,8 +451,8 @@ PyCFunction_Fini(); PyTuple_Fini(); PyList_Fini(); + PyString_Fini(); PySet_Fini(); - PyString_Fini(); PyInt_Fini(); PyFloat_Fini(); Index: Include/setobject.h =================================================================== --- Include/setobject.h (revision 46986) +++ Include/setobject.h (working copy) @@ -85,6 +85,7 @@ PyAPI_FUNC(int) _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **entry); PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); +PyAPI_FUNC(PyObject *) _PySet_InternString(PyObject *set, PyObject *key); #ifdef __cplusplus } Index: Objects/setobject.c =================================================================== --- Objects/setobject.c (revision 46986) +++ Objects/setobject.c (working copy) @@ -2049,6 +2049,38 @@ return set_update_internal((PySetObject *)set, iterable); } +PyObject * +_PySet_InternString(PyObject *set, PyObject *key) +{ + register PySetObject *so = (PySetObject *)set; + register setentry *entry; + register PyObject *res; + register long hash; + register int n_used; + + if ((hash = ((PyStringObject *) key)->ob_shash) == -1) + hash = PyObject_Hash(key); + entry = set_lookkey_string(so, key, hash); + res = entry->key; + if (res != NULL && res != dummy) { + Py_INCREF(res); + return res; + } + /* XXX the following logic is copied too many times */ + assert(so->fill <= so->mask); /* at least one empty slot */ + n_used = so->used; + Py_INCREF(key); + if (set_insert_key(so, key, hash) == -1) { + Py_DECREF(key); + return NULL; + } + if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) + return key; + if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1) + return NULL; + return key; +} + #ifdef Py_DEBUG /* Test code to be called with any three element set. Index: Objects/stringobject.c =================================================================== --- Objects/stringobject.c (revision 46986) +++ Objects/stringobject.c (working copy) @@ -13,13 +13,13 @@ static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; -/* This dictionary holds all interned strings. Note that references to - strings in this dictionary are *not* counted in the string's ob_refcnt. +/* This set holds all interned strings. Note that references to + strings in this set are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. + function will delete the reference from this set. Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) + count of a string is: s->ob_refcnt + (s->ob_sstate?1:0) */ static PyObject *interned; @@ -520,9 +520,9 @@ break; case SSTATE_INTERNED_MORTAL: - /* revive dead object temporarily for DelItem */ - op->ob_refcnt = 3; - if (PyDict_DelItem(interned, op) != 0) + /* revive dead object temporarily for Discard */ + op->ob_refcnt = 2; + if (PySet_Discard(interned, op) < 0) Py_FatalError( "deletion of interned string failed"); break; @@ -4900,7 +4900,7 @@ void PyString_InternInPlace(PyObject **p) { - register PyStringObject *s = (PyStringObject *)(*p); + register PyObject *s = *p; PyObject *t; if (s == NULL || !PyString_Check(s)) Py_FatalError("PyString_InternInPlace: strings only please!"); @@ -4911,27 +4911,27 @@ if (PyString_CHECK_INTERNED(s)) return; if (interned == NULL) { - interned = PyDict_New(); + interned = PySet_New(NULL); if (interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - t = PyDict_GetItem(interned, (PyObject *)s); - if (t) { + t = _PySet_InternString(interned, s); + if (t != s) { Py_INCREF(t); Py_DECREF(*p); *p = t; return; } - if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { + if (t == NULL) { PyErr_Clear(); return; } - /* The two references in interned are not counted by refcnt. + /* The reference in interned is not counted by refcnt. The string deallocator will take care of this */ - s->ob_refcnt -= 2; + s->ob_refcnt -= 1; PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; } @@ -4970,17 +4970,11 @@ void _Py_ReleaseInternedStrings(void) { - PyObject *keys; + PyObject *key; PyStringObject *s; - Py_ssize_t i, n; - if (interned == NULL || !PyDict_Check(interned)) + if (interned == NULL || interned->ob_type != &PySet_Type) return; - keys = PyDict_Keys(interned); - if (keys == NULL || !PyList_Check(keys)) { - PyErr_Clear(); - return; - } /* Since _Py_ReleaseInternedStrings() is intended to help a leak detector, interned strings are not forcibly deallocated; rather, we @@ -4988,9 +4982,8 @@ the interned dict. */ fprintf(stderr, "releasing interned strings\n"); - n = PyList_GET_SIZE(keys); - for (i = 0; i < n; i++) { - s = (PyStringObject *) PyList_GET_ITEM(keys, i); + while ((key = PySet_Pop(interned))) { + s = (PyStringObject *) key; switch (s->ob_sstate) { case SSTATE_NOT_INTERNED: /* XXX Shouldn't happen */ @@ -5006,8 +4999,6 @@ } s->ob_sstate = SSTATE_NOT_INTERNED; } - Py_DECREF(keys); - PyDict_Clear(interned); Py_DECREF(interned); interned = NULL; }