Index: Include/stringobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/stringobject.h,v retrieving revision 2.34 diff -c -r2.34 stringobject.h *** Include/stringobject.h 24 May 2002 19:01:57 -0000 2.34 --- Include/stringobject.h 6 Jul 2002 14:15:08 -0000 *************** *** 25,31 **** */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sinterned) tries to ensure that only one string object with a given value exists, so equality tests can be one pointer comparison. This is generally restricted to strings that "look like" Python identifiers, although the intern() builtin can be used to force --- 25,31 ---- */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sstate) tries to ensure that only one string object with a given value exists, so equality tests can be one pointer comparison. This is generally restricted to strings that "look like" Python identifiers, although the intern() builtin can be used to force *************** *** 35,49 **** typedef struct { PyObject_VAR_HEAD long ob_shash; ! PyObject *ob_sinterned; char ob_sval[1]; } PyStringObject; extern DL_IMPORT(PyTypeObject) PyBaseString_Type; extern DL_IMPORT(PyTypeObject) PyString_Type; #define PyString_Check(op) PyObject_TypeCheck(op, &PyString_Type) #define PyString_CheckExact(op) ((op)->ob_type == &PyString_Type) extern DL_IMPORT(PyObject *) PyString_FromStringAndSize(const char *, int); extern DL_IMPORT(PyObject *) PyString_FromString(const char *); --- 35,55 ---- typedef struct { PyObject_VAR_HEAD long ob_shash; ! char ob_sstate; char ob_sval[1]; } PyStringObject; + #define SSTATE_NOT_INTERNED 0 + #define SSTATE_INTERNED_MORTAL 1 + #define SSTATE_INTERNED_IMMORTAL 2 + #define SSTATE_INTERNED_ZOMBIE 3 + extern DL_IMPORT(PyTypeObject) PyBaseString_Type; extern DL_IMPORT(PyTypeObject) PyString_Type; #define PyString_Check(op) PyObject_TypeCheck(op, &PyString_Type) #define PyString_CheckExact(op) ((op)->ob_type == &PyString_Type) + #define PyString_CheckInterned(op) (((PyStringObject *)(op))->ob_sstate) extern DL_IMPORT(PyObject *) PyString_FromStringAndSize(const char *, int); extern DL_IMPORT(PyObject *) PyString_FromString(const char *); *************** *** 61,66 **** --- 67,73 ---- extern DL_IMPORT(PyObject *) _PyString_FormatLong(PyObject*, int, int, int, char**, int*); + extern DL_IMPORT(void) PyString_Intern(PyObject **); extern DL_IMPORT(void) PyString_InternInPlace(PyObject **); extern DL_IMPORT(PyObject *) PyString_InternFromString(const char *); extern DL_IMPORT(void) _Py_ReleaseInternedStrings(void); Index: Mac/Python/macimport.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Python/macimport.c,v retrieving revision 1.15 diff -c -r1.15 macimport.c *** Mac/Python/macimport.c 11 Apr 2002 20:48:14 -0000 1.15 --- Mac/Python/macimport.c 6 Jul 2002 14:15:16 -0000 *************** *** 79,91 **** static int max_not_a_file = 0; int i; ! if (obj && obj->ob_sinterned ) { for( i=0; i< max_not_a_file; i++ ) if ( obj == not_a_file[i] ) return 0; } if ( FSMakeFSSpec(0, 0, Pstring(filename), &fss) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && obj->ob_sinterned ) not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; --- 79,92 ---- static int max_not_a_file = 0; int i; ! if (obj && PyString_CheckInterned(obj)) { for( i=0; i< max_not_a_file; i++ ) if ( obj == not_a_file[i] ) return 0; } if ( FSMakeFSSpec(0, 0, Pstring(filename), &fss) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && PyString_CheckInterned(obj) ) ! not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; *************** *** 101,107 **** filerh = -1; } else { if ( FSpGetFInfo(&fss, &finfo) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && obj->ob_sinterned ) not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; --- 102,109 ---- filerh = -1; } else { if ( FSpGetFInfo(&fss, &finfo) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && PyString_CheckInterned(obj) ) ! not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; Index: Objects/classobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/classobject.c,v retrieving revision 2.158 diff -c -r2.158 classobject.c *** Objects/classobject.c 18 Jun 2002 12:38:06 -0000 2.158 --- Objects/classobject.c 6 Jul 2002 14:15:23 -0000 *************** *** 2332,2338 **** Py_DECREF(name); return "?"; } ! PyString_InternInPlace(&name); Py_DECREF(name); return PyString_AS_STRING(name); } --- 2332,2338 ---- Py_DECREF(name); return "?"; } ! PyString_Intern(&name); Py_DECREF(name); return PyString_AS_STRING(name); } Index: Objects/dictobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.126 diff -c -r2.126 dictobject.c *** Objects/dictobject.c 13 Jun 2002 20:32:57 -0000 2.126 --- Objects/dictobject.c 6 Jul 2002 14:15:26 -0000 *************** *** 511,525 **** } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! if (((PyStringObject *)key)->ob_sinterned != NULL) { ! key = ((PyStringObject *)key)->ob_sinterned; ! hash = ((PyStringObject *)key)->ob_shash; ! } ! else { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); ! } } else { hash = PyObject_Hash(key); --- 511,519 ---- } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); } else { hash = PyObject_Hash(key); Index: Objects/object.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/object.c,v retrieving revision 2.179 diff -c -r2.179 object.c *** Objects/object.c 13 Jun 2002 21:42:51 -0000 2.179 --- Objects/object.c 6 Jul 2002 14:15:30 -0000 *************** *** 1206,1212 **** else Py_INCREF(name); ! PyString_InternInPlace(&name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); --- 1206,1212 ---- else Py_INCREF(name); ! PyString_Intern(&name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); Index: Objects/stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.168 diff -c -r2.168 stringobject.c *** Objects/stringobject.c 14 Jun 2002 00:50:41 -0000 2.168 --- Objects/stringobject.c 6 Jul 2002 14:15:37 -0000 *************** *** 15,20 **** --- 15,31 ---- static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; + /* This dictionary holds all interned strings. Note that references to + strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference count + of a string is: s->ob_refcnt + s->ob_sstate?2:0 + */ + static PyObject *interned; + + /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any *************** *** 69,88 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; ! PyString_InternInPlace(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1 && str != NULL) { PyObject *t = (PyObject *)op; ! PyString_InternInPlace(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); --- 80,99 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; ! PyString_Intern(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1 && str != NULL) { PyObject *t = (PyObject *)op; ! PyString_Intern(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); *************** *** 125,142 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; ! PyString_InternInPlace(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1) { PyObject *t = (PyObject *)op; ! PyString_InternInPlace(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); --- 136,153 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { PyObject *t = (PyObject *)op; ! PyString_Intern(&t); op = (PyStringObject *)t; nullstring = op; Py_INCREF(op); } else if (size == 1) { PyObject *t = (PyObject *)op; ! PyString_Intern(&t); op = (PyStringObject *)t; characters[*str & UCHAR_MAX] = op; Py_INCREF(op); *************** *** 479,487 **** --- 490,518 ---- return NULL; } + #define ZOMBIE_REFCNT_BIAS (INT_MAX>>1) + static void string_dealloc(PyObject *op) { + switch (PyString_CheckInterned(op)) { + case SSTATE_NOT_INTERNED: + break; + + case SSTATE_INTERNED_MORTAL: + /* revive dead object temporarily for DelItem */ + op->ob_refcnt = 3; + PyDict_DelItem(interned, op); + break; + + case SSTATE_INTERNED_IMMORTAL: + PyString_CheckInterned(op) = SSTATE_INTERNED_ZOMBIE; + op->ob_refcnt += ZOMBIE_REFCNT_BIAS; + return; + + default: + Py_FatalError("Inconsistend interned string state."); + } op->ob_type->tp_free(op); } *************** *** 724,730 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; --- 755,761 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; *************** *** 767,773 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; --- 798,804 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; *************** *** 925,933 **** if (a->ob_shash != -1) return a->ob_shash; - if (a->ob_sinterned != NULL) - return (a->ob_shash = - ((PyStringObject *)(a->ob_sinterned))->ob_shash); len = a->ob_size; p = (unsigned char *) a->ob_sval; x = *p << 7; --- 956,961 ---- *************** *** 2899,2906 **** memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sinterned = ! ((PyStringObject *)tmp)->ob_sinterned; } Py_DECREF(tmp); return pnew; --- 2927,2933 ---- memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; } Py_DECREF(tmp); return pnew; *************** *** 3805,3876 **** return NULL; } - - - /* This dictionary will leak at PyString_Fini() time. That's acceptable - * because PyString_Fini() specifically frees interned strings that are - * only referenced by this dictionary. The CVS log entry for revision 2.45 - * says: - * - * Change the Fini function to only remove otherwise unreferenced - * strings from the interned table. There are references in - * hard-to-find static variables all over the interpreter, and it's not - * worth trying to get rid of all those; but "uninterning" isn't fair - * either and may cause subtle failures later -- so we have to keep them - * in the interned table. - */ - static PyObject *interned; - void ! PyString_InternInPlace(PyObject **p) { register PyStringObject *s = (PyStringObject *)(*p); PyObject *t; if (s == NULL || !PyString_Check(s)) ! Py_FatalError("PyString_InternInPlace: strings only please!"); ! if ((t = s->ob_sinterned) != NULL) { ! if (t == (PyObject *)s) ! return; ! Py_INCREF(t); ! *p = t; ! Py_DECREF(s); return; - } if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) return; } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); ! *p = s->ob_sinterned = t; ! Py_DECREF(s); return; } ! /* Ensure that only true string objects appear in the intern dict, ! and as the value of ob_sinterned. */ ! if (PyString_CheckExact(s)) { ! t = (PyObject *)s; ! if (PyDict_SetItem(interned, t, t) == 0) { ! s->ob_sinterned = t; ! return; ! } ! } ! else { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t != NULL) { ! if (PyDict_SetItem(interned, t, t) == 0) { ! *p = s->ob_sinterned = t; ! Py_DECREF(s); ! return; ! } ! Py_DECREF(t); } } PyErr_Clear(); } PyObject * PyString_InternFromString(const char *cp) --- 3832,3895 ---- return NULL; } void ! PyString_Intern(PyObject **p) { register PyStringObject *s = (PyStringObject *)(*p); PyObject *t; if (s == NULL || !PyString_Check(s)) ! Py_FatalError("PyString_Intern: strings only please!"); ! if (PyString_CheckInterned(s)) return; if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) return; } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { + if (PyString_CheckInterned(t) == SSTATE_INTERNED_ZOMBIE) { + /* Revive zombies as immortals, of couse */ + PyString_CheckInterned(t) = SSTATE_INTERNED_IMMORTAL; + t->ob_refcnt -= ZOMBIE_REFCNT_BIAS; + } Py_INCREF(t); ! Py_DECREF(*p); ! *p = t; return; } ! /* Ensure that only true string objects appear in the intern dict */ ! if (!PyString_CheckExact(s)) { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t == NULL) { ! PyErr_Clear(); ! return; } + } else { + t = (PyObject*) s; + Py_INCREF(t); + } + + if (PyDict_SetItem(interned, t, t) == 0) { + /* The two references in interned are not counted by + refcnt. The string deallocator will take care of this */ + ((PyObject *)t)->ob_refcnt-=2; + PyString_CheckInterned(t) = SSTATE_INTERNED_MORTAL; + Py_DECREF(*p); + *p = t; + return; } + Py_DECREF(t); PyErr_Clear(); } + void + PyString_InternInPlace(PyObject **p) + { + PyString_Intern(p); + PyString_CheckInterned(*p) = SSTATE_INTERNED_IMMORTAL; + } + PyObject * PyString_InternFromString(const char *cp) *************** *** 3892,3919 **** } Py_XDECREF(nullstring); nullstring = NULL; - if (interned) { - int pos, changed; - PyObject *key, *value; - do { - changed = 0; - pos = 0; - while (PyDict_Next(interned, &pos, &key, &value)) { - if (key->ob_refcnt == 2 && key == value) { - PyDict_DelItem(interned, key); - changed = 1; - } - } - } while (changed); - } } void _Py_ReleaseInternedStrings(void) { - if (interned) { - fprintf(stderr, "releasing interned strings\n"); - PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; - } } --- 3911,3918 ---- Index: Python/codecs.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/codecs.c,v retrieving revision 2.13 diff -c -r2.13 codecs.c *** Python/codecs.c 26 Sep 2000 05:46:01 -0000 2.13 --- Python/codecs.c 6 Jul 2002 14:15:42 -0000 *************** *** 148,154 **** v = normalizestring(encoding); if (v == NULL) goto onError; ! PyString_InternInPlace(&v); /* First, try to lookup the name in the registry dictionary */ result = PyDict_GetItem(_PyCodec_SearchCache, v); --- 148,154 ---- v = normalizestring(encoding); if (v == NULL) goto onError; ! PyString_Intern(&v); /* First, try to lookup the name in the registry dictionary */ result = PyDict_GetItem(_PyCodec_SearchCache, v); Index: Python/compile.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/compile.c,v retrieving revision 2.247 diff -c -r2.247 compile.c *** Python/compile.c 20 Jun 2002 22:23:14 -0000 2.247 --- Python/compile.c 6 Jul 2002 14:15:52 -0000 *************** *** 318,324 **** PyErr_BadInternalCall(); return -1; } ! PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i)); } return 0; } --- 318,324 ---- PyErr_BadInternalCall(); return -1; } ! PyString_Intern(&PyTuple_GET_ITEM(tuple, i)); } return 0; } *************** *** 358,364 **** continue; if (!all_name_chars((unsigned char *)PyString_AS_STRING(v))) continue; ! PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i)); } co = PyObject_NEW(PyCodeObject, &PyCode_Type); if (co != NULL) { --- 358,364 ---- continue; if (!all_name_chars((unsigned char *)PyString_AS_STRING(v))) continue; ! PyString_Intern(&PyTuple_GET_ITEM(consts, i)); } co = PyObject_NEW(PyCodeObject, &PyCode_Type); if (co != NULL) {