Index: Include/listobject.h =================================================================== --- Include/listobject.h (revision 77795) +++ Include/listobject.h (working copy) @@ -19,12 +19,20 @@ extern "C" { #endif +#define LIST_ALLOCATION_MASK (PY_SSIZE_T_MAX) +#define LIST_FORWARD_MASK (~PY_SSIZE_T_MAX) + typedef struct { PyObject_VAR_HEAD /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ PyObject **ob_item; - /* ob_item contains space for 'allocated' elements. The number + /* allocation_bitmap + * most significant bit is set if excess_left exist + * reset of bits indicate allocated >= 0 + * if excess_left bit is set, you find the count of excess_left in + * ob_item[-1] + * ob_item contains space for 'allocated' elements. The number * currently in use is ob_size. * Invariants: * 0 <= ob_size <= allocated @@ -35,7 +43,7 @@ * Items must normally not be NULL, except during construction when * the list is not yet visible outside the function that builds it. */ - Py_ssize_t allocated; + Py_ssize_t allocation_bitmap; } PyListObject; PyAPI_DATA(PyTypeObject) PyList_Type; Index: Objects/listobject.c =================================================================== --- Objects/listobject.c (revision 77795) +++ Objects/listobject.c (working copy) @@ -8,6 +8,66 @@ #include /* For size_t */ #endif +/* + * These macros are important to keep PyListObject compact. + */ +#define GET_ALLOCATION(ob) (ob->allocation_bitmap & LIST_ALLOCATION_MASK) + +static Py_ssize_t +get_orphans(PyListObject *self) +{ + PyObject **alloc; + + if (self->allocation_bitmap == -1) + return 0; + + if (self->allocation_bitmap & LIST_FORWARD_MASK) { + alloc = (PyObject **)(self->ob_item[-1]); + return (self->ob_item - alloc); + } + return 0; +} + +static void +list_advance_front(PyListObject *self, Py_ssize_t delta) +{ + PyObject **alloc; + + if (self->allocation_bitmap & LIST_FORWARD_MASK) { + alloc = (PyObject **)(self->ob_item[-1]); + } else { + alloc = self->ob_item; + self->allocation_bitmap |= LIST_FORWARD_MASK; + } + self->ob_item += delta; + self->ob_item[-1] = (PyObject *)alloc; +} + +static void +list_reclaim_orphans(PyListObject *self, Py_ssize_t delta) +{ + PyObject **alloc = (PyObject **)(self->ob_item[-1]); + + self->ob_item -= delta; + if (self->ob_item > alloc) { + self->ob_item[-1] = (PyObject *)alloc; + } +} + +static void +list_compact_orphans(PyListObject *self) +{ + PyObject **items; + Py_ssize_t orphans = get_orphans(self); + + self->allocation_bitmap &= LIST_ALLOCATION_MASK; + items = self->ob_item - orphans; + memmove(items, &items[orphans], + (Py_SIZE(self))*sizeof(PyObject *)); + self->ob_item = items; +} + + /* Ensure ob_item has room for at least newsize elements, and set * ob_size to newsize. If newsize > ob_size on entry, the content * of the new slots at exit is undefined heap trash; it's the caller's @@ -26,14 +86,24 @@ { PyObject **items; size_t new_allocated; - Py_ssize_t allocated = self->allocated; + Py_ssize_t allocated = GET_ALLOCATION(self); + Py_ssize_t needed; + Py_ssize_t orphans = get_orphans(self); + if (orphans >= newsize) { + list_compact_orphans(self); + orphans = 0; + } + + needed = newsize + orphans; + items = self->ob_item - orphans; + /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize falls lower than half the allocated size, then proceed with the realloc() to shrink the list. */ - if (allocated >= newsize && newsize >= (allocated >> 1)) { - assert(self->ob_item != NULL || newsize == 0); + if (allocated >= needed && needed >= (allocated >> 1)) { + assert(items != NULL || newsize == 0); Py_SIZE(self) = newsize; return 0; } @@ -45,30 +115,39 @@ * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ - new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); + new_allocated = (needed >> 3) + (needed < 9 ? 3 : 6); /* check for integer overflow */ - if (new_allocated > PY_SIZE_MAX - newsize) { + if (new_allocated > PY_SIZE_MAX - needed) { PyErr_NoMemory(); return -1; - } else { - new_allocated += newsize; } - if (newsize == 0) + new_allocated += needed; + + if (needed == 0) new_allocated = 0; - items = self->ob_item; - if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *))) + + if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *))) { PyMem_RESIZE(items, PyObject *, new_allocated); - else + } else { items = NULL; + } + if (items == NULL) { PyErr_NoMemory(); return -1; } + self->ob_item = items; + if (orphans) { + self->ob_item += orphans; + self->ob_item[-1] = (PyObject *)items; + self->allocation_bitmap = new_allocated | LIST_FORWARD_MASK; + } else { + self->allocation_bitmap = new_allocated; + } Py_SIZE(self) = newsize; - self->allocated = new_allocated; return 0; } @@ -157,7 +236,7 @@ memset(op->ob_item, 0, nbytes); } Py_SIZE(op) = size; - op->allocated = size; + op->allocation_bitmap = size; _PyObject_GC_TRACK(op); return (PyObject *) op; } @@ -297,6 +376,7 @@ list_dealloc(PyListObject *op) { Py_ssize_t i; + Py_ssize_t orphans; PyObject_GC_UnTrack(op); Py_TRASHCAN_SAFE_BEGIN(op) if (op->ob_item != NULL) { @@ -304,9 +384,11 @@ There's a simple test case where somehow this reduces thrashing when a *very* large list is created and immediately deleted. */ + orphans = get_orphans(op); + op->ob_item -= orphans; i = Py_SIZE(op); while (--i >= 0) { - Py_XDECREF(op->ob_item[i]); + Py_XDECREF(op->ob_item[i+orphans]); } PyMem_FREE(op->ob_item); } @@ -585,16 +667,18 @@ list_clear(PyListObject *a) { Py_ssize_t i; + Py_ssize_t orphans = get_orphans(a); PyObject **item = a->ob_item; if (item != NULL) { /* Because XDECREF can recursively invoke operations on this list, we make it empty first. */ + item -= orphans; i = Py_SIZE(a); Py_SIZE(a) = 0; a->ob_item = NULL; - a->allocated = 0; + a->allocation_bitmap = 0; while (--i >= 0) { - Py_XDECREF(item[i]); + Py_XDECREF(item[i + orphans]); } PyMem_FREE(item); } @@ -679,18 +763,35 @@ memcpy(recycle, &item[ilow], s); if (d < 0) { /* Delete -d items */ - memmove(&item[ihigh+d], &item[ihigh], - (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); + if (ilow == 0) { + list_advance_front(a, -d); + } + else { + /* Move right hand side backward, + * even if ilow is pretty small. + */ + memmove(&item[ihigh+d], &item[ihigh], + (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); + } list_resize(a, Py_SIZE(a) + d); item = a->ob_item; } else if (d > 0) { /* Insert d items */ - k = Py_SIZE(a); - if (list_resize(a, k+d) < 0) - goto Error; - item = a->ob_item; - memmove(&item[ihigh+d], &item[ihigh], - (k - ihigh)*sizeof(PyObject *)); + if (ilow == 0 && d <= get_orphans(a)) { + /* reclaim free slots */ + list_reclaim_orphans(a, d); + item = a->ob_item; + list_resize(a, Py_SIZE(a) + d); + } + else { + k = Py_SIZE(a); + if (list_resize(a, k+d) < 0) + goto Error; + item = a->ob_item; + /* Move right hand side forward */ + memmove(&item[ihigh+d], &item[ihigh], + (k - ihigh)*sizeof(PyObject *)); + } } for (k = 0; k < n; k++, ilow++) { PyObject *w = vitem[k]; @@ -879,7 +980,7 @@ } break; } - if (Py_SIZE(self) < self->allocated) { + if (Py_SIZE(self) + get_orphans(self) < GET_ALLOCATION(self)) { /* steals ref */ PyList_SET_ITEM(self, Py_SIZE(self), item); ++Py_SIZE(self); @@ -893,7 +994,7 @@ } /* Cut back result list if initial guess was too large. */ - if (Py_SIZE(self) < self->allocated) + if (Py_SIZE(self) + get_orphans(self) < GET_ALLOCATION(self)) list_resize(self, Py_SIZE(self)); /* shrinking can't fail */ Py_DECREF(it); @@ -2057,6 +2158,9 @@ PyObject *key, *value, *kvpair; static char *kwlist[] = {"cmp", "key", "reverse", 0}; + if (get_orphans(self)) + list_compact_orphans(self); + assert(self != NULL); assert (PyList_Check(self)); if (args != NULL) { @@ -2083,12 +2187,14 @@ * sorting (allowing mutations during sorting is a core-dump * factory, since ob_item may change). */ + + saved_ob_size = Py_SIZE(self); saved_ob_item = self->ob_item; - saved_allocated = self->allocated; + saved_allocated = self->allocation_bitmap; Py_SIZE(self) = 0; self->ob_item = NULL; - self->allocated = -1; /* any operation will reset it to >= 0 */ + self->allocation_bitmap = -1; /* any operation will reset it to >= 0 */ if (keyfunc != NULL) { for (i=0 ; i < saved_ob_size ; i++) { @@ -2177,7 +2283,7 @@ } } - if (self->allocated != -1 && result != NULL) { + if (self->allocation_bitmap != -1 && result != NULL) { /* The user mucked with the list during the sort, * and we don't already have another error to report. */ @@ -2195,7 +2301,7 @@ i = Py_SIZE(self); Py_SIZE(self) = saved_ob_size; self->ob_item = saved_ob_item; - self->allocated = saved_allocated; + self->allocation_bitmap = saved_allocated; if (final_ob_item != NULL) { /* we cannot use list_clear() for this because it does not guarantee that the list is really empty when it returns */ @@ -2446,9 +2552,9 @@ /* Verify list invariants established by PyType_GenericAlloc() */ assert(0 <= Py_SIZE(self)); - assert(Py_SIZE(self) <= self->allocated || self->allocated == -1); + assert(Py_SIZE(self) <= GET_ALLOCATION(self) || self->allocation_bitmap == -1); assert(self->ob_item != NULL || - self->allocated == 0 || self->allocated == -1); + GET_ALLOCATION(self) == 0 || self->allocation_bitmap == -1); /* Empty previous contents */ if (self->ob_item != NULL) { @@ -2468,7 +2574,7 @@ { Py_ssize_t res; - res = sizeof(PyListObject) + self->allocated * sizeof(void*); + res = sizeof(PyListObject) + (GET_ALLOCATION(self) + get_orphans(self)) * sizeof(void*); return PyInt_FromSsize_t(res); } Index: Lib/test/test_list.py =================================================================== --- Lib/test/test_list.py (revision 77795) +++ Lib/test/test_list.py (working copy) @@ -51,6 +51,15 @@ self.assertEqual(len([0]), 1) self.assertEqual(len([0, 1, 2]), 3) + def test_pop_and_prepend(self): + # This guards against faulty optimizations on list that + # attempt to makes pops and prepends at the beginning of the + # list work faster. + lst = [5] * 100 + del lst[0] + lst.insert(0, 4) + self.assertEqual(lst, [4] + [5] * 99) + def test_overflow(self): lst = [4, 5, 6, 7] n = int((sys.maxint*2+2) // len(lst)) Index: Lib/test/test_iter.py =================================================================== --- Lib/test/test_iter.py (revision 77795) +++ Lib/test/test_iter.py (working copy) @@ -899,7 +899,17 @@ except TypeError: pass + def test_extends(self): + # This test would break on an incomplete patch to listobject.c + def gen(): + for i in range(500): + yield i + lst = [0] * 500 + for i in range(240): + lst.pop(0) + lst.extend(gen()) + def test_main(): run_unittest(TestCase) Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (revision 77795) +++ Lib/test/test_sys.py (working copy) @@ -468,7 +468,7 @@ size += self.gc_headsize msg = 'wrong size for %s: got %d, expected %d' \ % (type(o), result, size) - self.assertEqual(result, size, msg) + self.assertEqual(result, size, msg + ' (%d gc)' % self.gc_headsize) def calcsize(self, fmt): """Wrapper around struct.calcsize which enforces the alignment of the