diff -r 02e71956b124 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Jan 30 13:01:19 2008 +0100 +++ b/Objects/unicodeobject.c Wed Jan 30 16:49:00 2008 +0100 @@ -52,28 +52,32 @@ OF OR IN CONNECTION WITH THE USE OR PERF #include #endif -/* Limit for the Unicode object free list */ - -#define MAX_UNICODE_FREELIST_SIZE 1024 - -/* Limit for the Unicode object free list stay alive optimization. +/* Number of free lists, one per unicode object size. The implementation will keep allocated Unicode memory intact for - all objects on the free list having a size less than this - limit. This reduces malloc() overhead for small Unicode objects. - - At worst this will result in MAX_UNICODE_FREELIST_SIZE * - (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + - malloc()-overhead) bytes of unused garbage. + objects having a size less than this limit, within a certain number + of objects for each size (as defined by the CAN_SAVE macro below). Setting the limit to 0 effectively turns the feature off. - Note: This is an experimental feature ! If you get core dumps when - using Unicode objects, turn this feature off. - -*/ - -#define KEEPALIVE_SIZE_LIMIT 9 +*/ + +#define MAX_SAVED_SIZE 90 + +/* We keep lots of small objects in the free lists, but less larger ones. + NOTE: obj_length == 0 corresponds to strings longer than MAX_SAVED_SIZE, + their memory buffer is freed in any case and only the PyUnicodeObject + is cached. + + You shouldn't increase those numbers without first assessing the impact on + non cache-friendly situations, e.g.: + python -m timeit -s "s=open('Misc/HISTORY', 'r').read()" "s.split()" + */ + +#define CAN_SAVE(obj_length, list_size) \ + ((obj_length < 3 && list_size < 1000) \ + || (obj_length < 15 && list_size < 50) \ + || (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -105,9 +109,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *unicode_freelist; -static int unicode_freelist_size; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -313,35 +316,27 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize } /* Unicode freelist & memory allocation */ - if (unicode_freelist) { - unicode = unicode_freelist; - unicode_freelist = *(PyUnicodeObject **)unicode; - unicode_freelist_size--; - if (unicode->str) { - /* Keep-Alive optimization: we only upsize the buffer, - never downsize it. */ - if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyMem_DEL(unicode->str); - goto onError; - } - } - else { - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - PyObject_INIT(unicode, &PyUnicode_Type); + if (length > 0 && length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc; + } + else if ((unicode = unicode_freelist[0])) { + _Py_NewReference(unicode); + unicode_freelist[0] = (PyUnicodeObject *) unicode->defenc; + unicode->str = PyMem_NEW(Py_UNICODE, length + 1); } else { unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) return NULL; - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - + unicode->str = PyMem_NEW(Py_UNICODE, length + 1); + } if (!unicode->str) { - PyErr_NoMemory(); - goto onError; - } + PyErr_NoMemory(); + goto onError; + } + /* Initialize the first element to guard against cases where * the caller fails before initializing str -- unicode_resize() * reads str[0], and the Keep-Alive optimization can keep memory @@ -357,7 +352,7 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize unicode->defenc = NULL; return unicode; - onError: +onError: _Py_ForgetReference((PyObject *)unicode); PyObject_Del(unicode); return NULL; @@ -385,28 +380,43 @@ void unicode_dealloc(register PyUnicodeO Py_FatalError("Inconsistent interned unicode string state."); } - if (PyUnicode_CheckExact(unicode) && - unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) { - /* Keep-Alive optimization */ - if (unicode->length >= KEEPALIVE_SIZE_LIMIT) { - PyMem_DEL(unicode->str); - unicode->str = NULL; - unicode->length = 0; - } - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - /* Add to free list */ - *(PyUnicodeObject **)unicode = unicode_freelist; - unicode_freelist = unicode; - unicode_freelist_size++; - } - else { - PyMem_DEL(unicode->str); - Py_XDECREF(unicode->defenc); - Py_TYPE(unicode)->tp_free((PyObject *)unicode); - } + Py_CLEAR(unicode->defenc); + + if (PyUnicode_CheckExact(unicode)) { + Py_ssize_t slot; + PyUnicodeObject *v; + if (unicode->length < MAX_SAVED_SIZE) { + slot = unicode->length; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) { + slot = 0; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) + goto release; + } + } + else { + slot = 0; + v = unicode_freelist[slot]; + if (v && !CAN_SAVE(slot, v->length)) + goto release; + } + /* Keep track of number of items stacked on the freelist */ + unicode->length = v ? v->length + 1 : 1; + unicode->defenc = (PyObject *) v; + if (slot == 0) { + /* Long strings => only cache PyUnicodeObject, not the memory + buffer. */ + PyMem_DEL(unicode->str); + unicode->str = NULL; + } + unicode_freelist[slot] = unicode; + return; + } + +release: + PyMem_DEL(unicode->str); + Py_TYPE(unicode)->tp_free((PyObject *)unicode); } int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) @@ -9090,8 +9100,6 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - unicode_freelist = NULL; - unicode_freelist_size = 0; unicode_empty = _PyUnicode_New(0); if (!unicode_empty) return; @@ -9117,26 +9125,25 @@ _PyUnicode_Fini(void) PyUnicodeObject *u; int i; - Py_XDECREF(unicode_empty); - unicode_empty = NULL; + Py_CLEAR(unicode_empty); for (i = 0; i < 256; i++) { - if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } - - for (u = unicode_freelist; u != NULL;) { - PyUnicodeObject *v = u; - u = *(PyUnicodeObject **)u; - if (v->str) - PyMem_DEL(v->str); - Py_XDECREF(v->defenc); - PyObject_Del(v); - } - unicode_freelist = NULL; - unicode_freelist_size = 0; + if (unicode_latin1[i]) { + Py_CLEAR(unicode_latin1[i]); + } + } + + for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc; + if (u->str) + PyMem_DEL(u->str); + PyObject_Del(u); + u = v; + } + } } void