Index: Include/stringobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/stringobject.h,v retrieving revision 2.34 diff -c -r2.34 stringobject.h *** Include/stringobject.h 24 May 2002 19:01:57 -0000 2.34 --- Include/stringobject.h 1 Jul 2002 15:19:36 -0000 *************** *** 25,41 **** */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sinterned) tries to ensure that only one string ! object with a given value exists, so equality tests can be one pointer ! comparison. This is generally restricted to strings that "look like" ! Python identifiers, although the intern() builtin can be used to force ! interning of any string. Together, these sped the interpreter by up to 20%. */ typedef struct { PyObject_VAR_HEAD long ob_shash; ! PyObject *ob_sinterned; char ob_sval[1]; } PyStringObject; --- 25,41 ---- */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sinterned_flag) tries to ensure that only one ! string object with a given value exists, so equality tests can be one ! pointer comparison. This is generally restricted to strings that ! "look like" Python identifiers, although the intern() builtin can be used ! to force interning of any string. Together, these sped the interpreter by up to 20%. */ typedef struct { PyObject_VAR_HEAD long ob_shash; ! char ob_sinterned_flag; char ob_sval[1]; } PyStringObject; *************** *** 44,49 **** --- 44,50 ---- #define PyString_Check(op) PyObject_TypeCheck(op, &PyString_Type) #define PyString_CheckExact(op) ((op)->ob_type == &PyString_Type) + #define PyString_CheckInterned(op) (((PyStringObject *)(op))->ob_sinterned_flag) extern DL_IMPORT(PyObject *) PyString_FromStringAndSize(const char *, int); extern DL_IMPORT(PyObject *) PyString_FromString(const char *); *************** *** 63,69 **** extern DL_IMPORT(void) PyString_InternInPlace(PyObject **); extern DL_IMPORT(PyObject *) PyString_InternFromString(const char *); - extern DL_IMPORT(void) _Py_ReleaseInternedStrings(void); /* Macro, trading safety for speed */ #define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) --- 64,69 ---- Index: Mac/Python/macimport.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Mac/Python/macimport.c,v retrieving revision 1.15 diff -c -r1.15 macimport.c *** Mac/Python/macimport.c 11 Apr 2002 20:48:14 -0000 1.15 --- Mac/Python/macimport.c 1 Jul 2002 15:19:44 -0000 *************** *** 79,91 **** static int max_not_a_file = 0; int i; ! if (obj && obj->ob_sinterned ) { for( i=0; i< max_not_a_file; i++ ) if ( obj == not_a_file[i] ) return 0; } if ( FSMakeFSSpec(0, 0, Pstring(filename), &fss) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && obj->ob_sinterned ) not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; --- 79,92 ---- static int max_not_a_file = 0; int i; ! if (obj && PyString_CheckInterned(obj)) { for( i=0; i< max_not_a_file; i++ ) if ( obj == not_a_file[i] ) return 0; } if ( FSMakeFSSpec(0, 0, Pstring(filename), &fss) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && PyString_CheckInterned(obj) ) ! not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; *************** *** 101,107 **** filerh = -1; } else { if ( FSpGetFInfo(&fss, &finfo) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && obj->ob_sinterned ) not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; --- 102,109 ---- filerh = -1; } else { if ( FSpGetFInfo(&fss, &finfo) != noErr ) { ! if ( obj && max_not_a_file < MAXPATHCOMPONENTS && PyString_CheckInterned(obj) ) ! not_a_file[max_not_a_file++] = obj; /* doesn't exist or is folder */ return 0; Index: Modules/main.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/main.c,v retrieving revision 1.64 diff -c -r1.64 main.c *** Modules/main.c 3 Mar 2002 02:59:16 -0000 1.64 --- Modules/main.c 1 Jul 2002 15:19:45 -0000 *************** *** 382,400 **** fprintf(stderr, "\x0cq\x0c"); /* make frontend quit */ #endif - #ifdef __INSURE__ - /* Insure++ is a memory analysis tool that aids in discovering - * memory leaks and other memory problems. On Python exit, the - * interned string dictionary is flagged as being in use at exit - * (which it is). Under normal circumstances, this is fine because - * the memory will be automatically reclaimed by the system. Under - * memory debugging, it's a huge source of useless noise, so we - * trade off slower shutdown for less distraction in the memory - * reports. -baw - */ - _Py_ReleaseInternedStrings(); - #endif /* __INSURE__ */ - return sts; } --- 382,387 ---- Index: Objects/dictobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.126 diff -c -r2.126 dictobject.c *** Objects/dictobject.c 13 Jun 2002 20:32:57 -0000 2.126 --- Objects/dictobject.c 1 Jul 2002 15:19:50 -0000 *************** *** 511,525 **** } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! if (((PyStringObject *)key)->ob_sinterned != NULL) { ! key = ((PyStringObject *)key)->ob_sinterned; ! hash = ((PyStringObject *)key)->ob_shash; ! } ! else { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); ! } } else { hash = PyObject_Hash(key); --- 511,519 ---- } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); } else { hash = PyObject_Hash(key); Index: Objects/stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.168 diff -c -r2.168 stringobject.c *** Objects/stringobject.c 14 Jun 2002 00:50:41 -0000 2.168 --- Objects/stringobject.c 1 Jul 2002 15:19:57 -0000 *************** *** 15,20 **** --- 15,31 ---- static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; + /* This dictionary holds all interned strings. Note that references to + strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference count + of a string is: s->ob_refcnt + 2*s->ob_sinterned_flag + */ + static PyObject *interned; + + /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any *************** *** 69,75 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; --- 80,86 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned_flag = 0; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; *************** *** 125,131 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { --- 136,142 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned_flag = 0; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { *************** *** 482,487 **** --- 493,506 ---- static void string_dealloc(PyObject *op) { + if PyString_CheckInterned(op) { + op->ob_refcnt = 3; /* revive dead object */ + PyDict_DelItem(interned, op); + if (PyDict_Size(interned)==0) { + Py_DECREF(interned); + interned = NULL; + } + } op->ob_type->tp_free(op); } *************** *** 724,730 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; --- 743,749 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned_flag = 0; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; *************** *** 767,773 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; --- 786,792 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned_flag = 0; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; *************** *** 925,933 **** if (a->ob_shash != -1) return a->ob_shash; - if (a->ob_sinterned != NULL) - return (a->ob_shash = - ((PyStringObject *)(a->ob_sinterned))->ob_shash); len = a->ob_size; p = (unsigned char *) a->ob_sval; x = *p << 7; --- 944,949 ---- *************** *** 2899,2906 **** memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sinterned = ! ((PyStringObject *)tmp)->ob_sinterned; } Py_DECREF(tmp); return pnew; --- 2915,2921 ---- memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sinterned_flag = 0; } Py_DECREF(tmp); return pnew; *************** *** 3805,3826 **** return NULL; } - - - /* This dictionary will leak at PyString_Fini() time. That's acceptable - * because PyString_Fini() specifically frees interned strings that are - * only referenced by this dictionary. The CVS log entry for revision 2.45 - * says: - * - * Change the Fini function to only remove otherwise unreferenced - * strings from the interned table. There are references in - * hard-to-find static variables all over the interpreter, and it's not - * worth trying to get rid of all those; but "uninterning" isn't fair - * either and may cause subtle failures later -- so we have to keep them - * in the interned table. - */ - static PyObject *interned; - void PyString_InternInPlace(PyObject **p) { --- 3820,3825 ---- *************** *** 3828,3841 **** PyObject *t; if (s == NULL || !PyString_Check(s)) Py_FatalError("PyString_InternInPlace: strings only please!"); ! if ((t = s->ob_sinterned) != NULL) { ! if (t == (PyObject *)s) ! return; ! Py_INCREF(t); ! *p = t; ! Py_DECREF(s); return; - } if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) --- 3827,3834 ---- PyObject *t; if (s == NULL || !PyString_Check(s)) Py_FatalError("PyString_InternInPlace: strings only please!"); ! if (PyString_CheckInterned(s)) return; if (interned == NULL) { interned = PyDict_New(); if (interned == NULL) *************** *** 3843,3873 **** } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); ! *p = s->ob_sinterned = t; ! Py_DECREF(s); return; } ! /* Ensure that only true string objects appear in the intern dict, ! and as the value of ob_sinterned. */ ! if (PyString_CheckExact(s)) { ! t = (PyObject *)s; ! if (PyDict_SetItem(interned, t, t) == 0) { ! s->ob_sinterned = t; ! return; ! } ! } ! else { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t != NULL) { ! if (PyDict_SetItem(interned, t, t) == 0) { ! *p = s->ob_sinterned = t; ! Py_DECREF(s); ! return; ! } ! Py_DECREF(t); } } PyErr_Clear(); } --- 3836,3867 ---- } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); ! Py_DECREF(*p); ! *p = t; return; } ! /* Ensure that only true string objects appear in the intern dict */ ! if (!PyString_CheckExact(s)) { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t == NULL) { ! PyErr_Clear(); ! return; } + } else { + t = (PyObject*) s; + Py_INCREF(t); } + + if (PyDict_SetItem(interned, t, t) == 0) { + /* The two references in interned are not counted by + refcnt. The string deallocator will take care of this */ + ((PyObject *)t)->ob_refcnt--; + Py_DECREF(*p); + *p = t; + return; + } + Py_DECREF(t); PyErr_Clear(); } *************** *** 3892,3919 **** } Py_XDECREF(nullstring); nullstring = NULL; - if (interned) { - int pos, changed; - PyObject *key, *value; - do { - changed = 0; - pos = 0; - while (PyDict_Next(interned, &pos, &key, &value)) { - if (key->ob_refcnt == 2 && key == value) { - PyDict_DelItem(interned, key); - changed = 1; - } - } - } while (changed); - } } - void _Py_ReleaseInternedStrings(void) - { - if (interned) { - fprintf(stderr, "releasing interned strings\n"); - PyDict_Clear(interned); - Py_DECREF(interned); - interned = NULL; - } - } --- 3886,3890 ----