Index: Include/modsupport.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/modsupport.h,v retrieving revision 2.39 diff -c -r2.39 modsupport.h *** Include/modsupport.h 12 Aug 2002 07:21:57 -0000 2.39 --- Include/modsupport.h 16 Aug 2002 18:37:03 -0000 *************** *** 23,30 **** PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, char *, long); PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, char *, char *); ! #define PYTHON_API_VERSION 1011 ! #define PYTHON_API_STRING "1011" /* The API version is maintained (independently from the Python version) so we can detect mismatches between the interpreter and dynamically loaded modules. These are diagnosed by an error message but --- 23,30 ---- PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, char *, long); PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, char *, char *); ! #define PYTHON_API_VERSION 1012 ! #define PYTHON_API_STRING "1012" /* The API version is maintained (independently from the Python version) so we can detect mismatches between the interpreter and dynamically loaded modules. These are diagnosed by an error message but *************** *** 37,42 **** --- 37,45 ---- Please add a line or two to the top of this log for each API version change: + + 14-Aug-2002 GvR 1012 Changes to string object struct for + interning changes, saving 3 bytes. 17-Jul-2001 GvR 1011 Descr-branch, just to be on the safe side Index: Include/stringobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/stringobject.h,v retrieving revision 2.36 diff -c -r2.36 stringobject.h *** Include/stringobject.h 14 Aug 2002 07:46:22 -0000 2.36 --- Include/stringobject.h 16 Aug 2002 18:37:03 -0000 *************** *** 25,31 **** */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sinterned) tries to ensure that only one string object with a given value exists, so equality tests can be one pointer comparison. This is generally restricted to strings that "look like" Python identifiers, although the intern() builtin can be used to force --- 25,31 ---- */ /* Caching the hash (ob_shash) saves recalculation of a string's hash value. ! Interning strings (ob_sstate) tries to ensure that only one string object with a given value exists, so equality tests can be one pointer comparison. This is generally restricted to strings that "look like" Python identifiers, although the intern() builtin can be used to force *************** *** 35,44 **** typedef struct { PyObject_VAR_HEAD long ob_shash; ! PyObject *ob_sinterned; char ob_sval[1]; } PyStringObject; PyAPI_DATA(PyTypeObject) PyBaseString_Type; PyAPI_DATA(PyTypeObject) PyString_Type; --- 35,48 ---- typedef struct { PyObject_VAR_HEAD long ob_shash; ! char ob_sstate; char ob_sval[1]; } PyStringObject; + #define SSTATE_NOT_INTERNED 0 + #define SSTATE_INTERNED_MORTAL 1 + #define SSTATE_INTERNED_IMMORTAL 2 + PyAPI_DATA(PyTypeObject) PyBaseString_Type; PyAPI_DATA(PyTypeObject) PyString_Type; *************** *** 66,73 **** --- 70,81 ---- const char *); PyAPI_FUNC(void) PyString_InternInPlace(PyObject **); + PyAPI_FUNC(void) PyString_InternImmortal(PyObject **); PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *); PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void); + + /* Use only if you know it's a string */ + #define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) /* Macro, trading safety for speed */ #define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) Index: Objects/classobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/classobject.c,v retrieving revision 2.160 diff -c -r2.160 classobject.c *** Objects/classobject.c 11 Jul 2002 06:23:50 -0000 2.160 --- Objects/classobject.c 16 Aug 2002 18:37:05 -0000 *************** *** 2300,2336 **** return 0; } ! static char * ! getclassname(PyObject *class) { PyObject *name; if (class == NULL) ! name = NULL; ! else ! name = PyObject_GetAttrString(class, "__name__"); if (name == NULL) { /* This function cannot return an exception */ PyErr_Clear(); ! return "?"; } ! if (!PyString_Check(name)) { ! Py_DECREF(name); ! return "?"; } - PyString_InternInPlace(&name); Py_DECREF(name); - return PyString_AS_STRING(name); } ! static char * ! getinstclassname(PyObject *inst) { PyObject *class; - char *name; ! if (inst == NULL) ! return "nothing"; class = PyObject_GetAttrString(inst, "__class__"); if (class == NULL) { --- 2300,2337 ---- return 0; } ! static void ! getclassname(PyObject *class, char *buf, int bufsize) { PyObject *name; + assert(bufsize > 1); + strcpy(buf, "?"); /* Default outcome */ if (class == NULL) ! return; ! name = PyObject_GetAttrString(class, "__name__"); if (name == NULL) { /* This function cannot return an exception */ PyErr_Clear(); ! return; } ! if (PyString_Check(name)) { ! strncpy(buf, PyString_AS_STRING(name), bufsize); ! buf[bufsize-1] = '\0'; } Py_DECREF(name); } ! static void ! getinstclassname(PyObject *inst, char *buf, int bufsize) { PyObject *class; ! if (inst == NULL) { ! assert(bufsize > strlen("nothing")); ! strcpy(buf, "nothing"); ! return; ! } class = PyObject_GetAttrString(inst, "__class__"); if (class == NULL) { *************** *** 2339,2347 **** class = (PyObject *)(inst->ob_type); Py_INCREF(class); } ! name = getclassname(class); Py_XDECREF(class); - return name; } static PyObject * --- 2340,2347 ---- class = (PyObject *)(inst->ob_type); Py_INCREF(class); } ! getclassname(class, buf, bufsize); Py_XDECREF(class); } static PyObject * *************** *** 2366,2379 **** return NULL; } if (!ok) { PyErr_Format(PyExc_TypeError, "unbound method %s%s must be called with " "%s instance as first argument " "(got %s%s instead)", PyEval_GetFuncName(func), PyEval_GetFuncDesc(func), ! getclassname(class), ! getinstclassname(self), self == NULL ? "" : " instance"); return NULL; } --- 2366,2383 ---- return NULL; } if (!ok) { + char clsbuf[256]; + char instbuf[256]; + getclassname(class, clsbuf, sizeof(clsbuf)); + getinstclassname(self, instbuf, sizeof(instbuf)); PyErr_Format(PyExc_TypeError, "unbound method %s%s must be called with " "%s instance as first argument " "(got %s%s instead)", PyEval_GetFuncName(func), PyEval_GetFuncDesc(func), ! clsbuf, ! instbuf, self == NULL ? "" : " instance"); return NULL; } Index: Objects/dictobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.128 diff -c -r2.128 dictobject.c *** Objects/dictobject.c 17 Jul 2002 16:30:37 -0000 2.128 --- Objects/dictobject.c 16 Aug 2002 18:37:07 -0000 *************** *** 511,525 **** } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! if (((PyStringObject *)key)->ob_sinterned != NULL) { ! key = ((PyStringObject *)key)->ob_sinterned; ! hash = ((PyStringObject *)key)->ob_shash; ! } ! else { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); ! } } else { hash = PyObject_Hash(key); --- 511,519 ---- } mp = (dictobject *)op; if (PyString_CheckExact(key)) { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); } else { hash = PyObject_Hash(key); Index: Objects/stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.179 diff -c -r2.179 stringobject.c *** Objects/stringobject.c 14 Aug 2002 18:38:27 -0000 2.179 --- Objects/stringobject.c 16 Aug 2002 18:37:09 -0000 *************** *** 15,20 **** --- 15,31 ---- static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; + /* This dictionary holds all interned strings. Note that references to + strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->ob_sstate?2:0) + */ + static PyObject *interned; + + /* For both PyString_FromString() and PyString_FromStringAndSize(), the parameter `size' denotes number of characters to allocate, not counting any *************** *** 69,75 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; --- 80,86 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; if (str != NULL) memcpy(op->ob_sval, str, size); op->ob_sval[size] = '\0'; *************** *** 125,131 **** return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { --- 136,142 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { *************** *** 486,491 **** --- 497,520 ---- static void string_dealloc(PyObject *op) { + switch (PyString_CHECK_INTERNED(op)) { + case SSTATE_NOT_INTERNED: + break; + + case SSTATE_INTERNED_MORTAL: + /* revive dead object temporarily for DelItem */ + op->ob_refcnt = 3; + if (PyDict_DelItem(interned, op) != 0) + Py_FatalError( + "deletion of interned string failed"); + break; + + case SSTATE_INTERNED_IMMORTAL: + Py_FatalError("Immortal interned string died."); + + default: + Py_FatalError("Inconsistent interned string state."); + } op->ob_type->tp_free(op); } *************** 891 return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; --- 914,920 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size); memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size); op->ob_sval[size] = '\0'; *************** 934 return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sinterned = NULL; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; --- 957,963 ---- return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; ! op->ob_sstate = SSTATE_NOT_INTERNED; for (i = 0; i < size; i += a->ob_size) memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size); op->ob_sval[size] = '\0'; *************** 1101 if (a->ob_shash != -1) return a->ob_shash; - if (a->ob_sinterned != NULL) - return (a->ob_shash = - ((PyStringObject *)(a->ob_sinterned))->ob_shash); len = a->ob_size; p = (unsigned char *) a->ob_sval; x = *p << 7; --- 1122,1127 ---- *************** 3074 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sinterned = ! ((PyStringObject *)tmp)->ob_sinterned; } Py_DECREF(tmp); return pnew; --- 3093,3099 ---- memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ! ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; } Py_DECREF(tmp); return pnew; *************** 4054 return NULL; } - - - /* This dictionary will leak at PyString_Fini() time. That's acceptable - * because PyString_Fini() specifically frees interned strings that are - * only referenced by this dictionary. The CVS log entry for revision 2.45 - * says: - * - * Change the Fini function to only remove otherwise unreferenced - * strings from the interned table. There are references in - * hard-to-find static variables all over the interpreter, and it's not - * worth trying to get rid of all those; but "uninterning" isn't fair - * either and may cause subtle failures later -- so we have to keep them - * in the interned table. - */ - static PyObject *interned; - void PyString_InternInPlace(PyObject **p) { register PyStringObject *s = (PyStringObject *)(*p); PyObject *t; if (s == NULL || !PyString_Check(s)) ! Py_FatalError("PyString_InternInPlace: strings only please!"); ! if ((t = s->ob_sinterned) != NULL) { ! if (t == (PyObject *)s) ! return; ! Py_INCREF(t); ! *p = t; ! Py_DECREF(s); return; - } if (interned == NULL) { interned = PyDict_New(); ! if (interned == NULL) return; } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); ! *p = s->ob_sinterned = t; ! Py_DECREF(s); return; } ! /* Ensure that only true string objects appear in the intern dict, ! and as the value of ob_sinterned. */ ! if (PyString_CheckExact(s)) { ! t = (PyObject *)s; ! if (PyDict_SetItem(interned, t, t) == 0) { ! s->ob_sinterned = t; ! return; ! } ! } ! else { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t != NULL) { ! if (PyDict_SetItem(interned, t, t) == 0) { ! *p = s->ob_sinterned = t; ! Py_DECREF(s); ! return; ! } ! Py_DECREF(t); } } PyErr_Clear(); } PyObject * PyString_InternFromString(const char *cp) --- 4008,4071 ---- return NULL; } void PyString_InternInPlace(PyObject **p) { register PyStringObject *s = (PyStringObject *)(*p); PyObject *t; if (s == NULL || !PyString_Check(s)) ! Py_FatalError("PyString_InternInPlace: strings only please!"); ! if (PyString_CHECK_INTERNED(s)) return; if (interned == NULL) { interned = PyDict_New(); ! if (interned == NULL) { ! PyErr_Clear(); /* Don't leave an exception */ return; + } } if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) { Py_INCREF(t); ! Py_DECREF(*p); ! *p = t; return; } ! /* Ensure that only true string objects appear in the intern dict */ ! if (!PyString_CheckExact(s)) { t = PyString_FromStringAndSize(PyString_AS_STRING(s), PyString_GET_SIZE(s)); ! if (t == NULL) { ! PyErr_Clear(); ! return; } + } else { + t = (PyObject*) s; + Py_INCREF(t); + } + + if (PyDict_SetItem(interned, t, t) == 0) { + /* The two references in interned are not counted by + refcnt. The string deallocator will take care of this */ + ((PyObject *)t)->ob_refcnt-=2; + PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL; + Py_DECREF(*p); + *p = t; + return; } + Py_DECREF(t); PyErr_Clear(); } + void + PyString_InternImmortal(PyObject **p) + { + PyString_InternInPlace(p); + if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { + PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; + Py_INCREF(*p); + } + } + PyObject * PyString_InternFromString(const char *cp) *************** *** 4070,4097 **** } Py_XDECREF(nullstring); nullstring = NULL; - if (interned) { - int pos, changed; - PyObject *key, *value; - do { - changed = 0; - pos = 0; - while (PyDict_Next(interned, &pos, &key, &value)) { - if (key->ob_refcnt == 2 && key == value) { - PyDict_DelItem(interned, key); - changed = 1; - } - } - } while (changed); - } } void _Py_ReleaseInternedStrings(void) { ! if (interned) { ! fprintf(stderr, "releasing interned strings\n"); ! PyDict_Clear(interned); ! Py_DECREF(interned); ! interned = NULL; } } --- 4087,4134 ---- } Py_XDECREF(nullstring); nullstring = NULL; } void _Py_ReleaseInternedStrings(void) { ! PyObject *keys; ! PyStringObject *s; ! int i, n; ! ! if (interned == NULL || !PyDict_Check(interned)) ! return; ! keys = PyDict_Keys(interned); ! if (keys == NULL || !PyList_Check(keys)) { ! PyErr_Clear(); ! return; ! } ! ! /* Since _Py_ReleaseInternedStrings() is intended to help a leak ! detector, interned strings are not forcibly deallocated; rather, we ! give them their stolen references back, and then clear and DECREF ! the interned dict. */ ! ! fprintf(stderr, "releasing interned strings\n"); ! n = PyList_GET_SIZE(keys); ! for (i = 0; i < n; i++) { ! s = (PyStringObject *) PyList_GET_ITEM(keys, i); ! switch (s->ob_sstate) { ! case SSTATE_NOT_INTERNED: ! /* XXX Shouldn't happen */ ! break; ! case SSTATE_INTERNED_IMMORTAL: ! s->ob_refcnt += 1; ! break; ! case SSTATE_INTERNED_MORTAL: ! s->ob_refcnt += 2; ! break; ! default: ! Py_FatalError("Inconsistent interned string state."); ! } ! s->ob_sstate = SSTATE_NOT_INTERNED; } + Py_DECREF(keys); + PyDict_Clear(interned); + Py_DECREF(interned); + interned = NULL; }