diff -r 4b75cb552460 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Mar 20 11:51:27 2008 +0100 +++ b/Objects/unicodeobject.c Thu Mar 20 20:25:50 2008 +0100 @@ -52,28 +52,32 @@ OF OR IN CONNECTION WITH THE USE OR PERF #include #endif -/* Limit for the Unicode object free list */ - -#define PyUnicode_MAXFREELIST 1024 - -/* Limit for the Unicode object free list stay alive optimization. +/* Number of free lists, one per unicode object size. The implementation will keep allocated Unicode memory intact for - all objects on the free list having a size less than this - limit. This reduces malloc() overhead for small Unicode objects. - - At worst this will result in PyUnicode_MAXFREELIST * - (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + - malloc()-overhead) bytes of unused garbage. + objects having a size less than this limit, within a certain number + of objects for each size (as defined by the CAN_SAVE macro below). Setting the limit to 0 effectively turns the feature off. - Note: This is an experimental feature ! If you get core dumps when - using Unicode objects, turn this feature off. - -*/ - -#define KEEPALIVE_SIZE_LIMIT 9 +*/ + +#define MAX_SAVED_SIZE 90 + +/* We keep lots of small objects in the free lists, but less larger ones. + NOTE: obj_length == 0 corresponds to strings longer than MAX_SAVED_SIZE, + their memory buffer is freed in any case and only the PyUnicodeObject + is cached. + + You shouldn't increase those numbers without first assessing the impact on + non cache-friendly situations, e.g.: + python -m timeit -s "s=open('Misc/HISTORY', 'r').read()" "s.split()" + */ + +#define CAN_SAVE(obj_length, list_size) \ + ((obj_length < 3 && list_size < 1000) \ + || (obj_length < 15 && list_size < 50) \ + || (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -105,9 +109,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *free_list; -static int numfree; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -314,38 +317,30 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize } /* Unicode freelist & memory allocation */ - if (free_list) { - unicode = free_list; - free_list = *(PyUnicodeObject **)unicode; - numfree--; - if (unicode->str) { - /* Keep-Alive optimization: we only upsize the buffer, - never downsize it. */ - if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyObject_DEL(unicode->str); - goto onError; - } - } - else { - size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); - unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); - } - PyObject_INIT(unicode, &PyUnicode_Type); - } - else { - size_t new_size; + if (length > 0 && length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc; + } + else if ((unicode = unicode_freelist[0])) { + size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + _Py_NewReference(unicode); + unicode_freelist[0] = (PyUnicodeObject *) unicode->defenc; + unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); + } + else { + size_t new_size; unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); - unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); - } - + new_size = sizeof(Py_UNICODE) * ((size_t)length + 1); + unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size); + } if (!unicode->str) { - PyErr_NoMemory(); - goto onError; - } + PyErr_NoMemory(); + goto onError; + } + /* Initialize the first element to guard against cases where * the caller fails before initializing str -- unicode_resize() * reads str[0], and the Keep-Alive optimization can keep memory @@ -389,28 +384,43 @@ void unicode_dealloc(register PyUnicodeO Py_FatalError("Inconsistent interned unicode string state."); } - if (PyUnicode_CheckExact(unicode) && - numfree < PyUnicode_MAXFREELIST) { - /* Keep-Alive optimization */ - if (unicode->length >= KEEPALIVE_SIZE_LIMIT) { - PyObject_DEL(unicode->str); - unicode->str = NULL; - unicode->length = 0; - } - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - /* Add to free list */ - *(PyUnicodeObject **)unicode = free_list; - free_list = unicode; - numfree++; - } - else { - PyObject_DEL(unicode->str); - Py_XDECREF(unicode->defenc); - Py_TYPE(unicode)->tp_free((PyObject *)unicode); - } + Py_CLEAR(unicode->defenc); + + if (PyUnicode_CheckExact(unicode)) { + Py_ssize_t slot; + PyUnicodeObject *v; + if (unicode->length < MAX_SAVED_SIZE) { + slot = unicode->length; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) { + slot = 0; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) + goto release; + } + } + else { + slot = 0; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) + goto release; + } + /* Keep track of number of items stacked on the freelist */ + unicode->length = v ? v->length + 1 : 1; + unicode->defenc = (PyObject *) v; + if (slot == 0) { + /* Long strings => only cache PyUnicodeObject, not the memory + buffer. */ + PyObject_DEL(unicode->str); + unicode->str = NULL; + } + unicode_freelist[slot] = unicode; + return; + } + +release: + PyObject_DEL(unicode->str); + Py_TYPE(unicode)->tp_free((PyObject *)unicode); } int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) @@ -9120,8 +9130,6 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - free_list = NULL; - numfree = 0; unicode_empty = _PyUnicode_New(0); if (!unicode_empty) return; @@ -9144,21 +9152,21 @@ int int PyUnicode_ClearFreeList(void) { - int freelist_size = numfree; - PyUnicodeObject *u; - - for (u = free_list; u != NULL;) { - PyUnicodeObject *v = u; - u = *(PyUnicodeObject **)u; - if (v->str) - PyObject_DEL(v->str); - Py_XDECREF(v->defenc); - PyObject_Del(v); - numfree--; - } - free_list = NULL; - assert(numfree == 0); - return freelist_size; + int i, freed_objects = 0; + for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *u, *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc; + if (u->str) + PyObject_DEL(u->str); + PyObject_DEL(u); + u = v; + freed_objects++; + } + unicode_freelist[i] = NULL; + } + return freed_objects; } void