Index: Include/listobject.h =================================================================== --- Include/listobject.h (revision 77751) +++ Include/listobject.h (working copy) @@ -36,6 +36,7 @@ * the list is not yet visible outside the function that builds it. */ Py_ssize_t allocated; + Py_ssize_t orphans; } PyListObject; PyAPI_DATA(PyTypeObject) PyList_Type; Index: Objects/listobject.c =================================================================== --- Objects/listobject.c (revision 77751) +++ Objects/listobject.c (working copy) @@ -27,13 +27,25 @@ PyObject **items; size_t new_allocated; Py_ssize_t allocated = self->allocated; + Py_ssize_t needed; + if (self->orphans >= newsize) { + items = self->ob_item - self->orphans; + memmove(items, &items[self->orphans], + (newsize)*sizeof(PyObject *)); + self->ob_item = items; + self->orphans = 0; + } + + needed = newsize + self->orphans; + items = self->ob_item - self->orphans; + /* Bypass realloc() when a previous overallocation is large enough to accommodate the newsize. If the newsize falls lower than half the allocated size, then proceed with the realloc() to shrink the list. */ - if (allocated >= newsize && newsize >= (allocated >> 1)) { - assert(self->ob_item != NULL || newsize == 0); + if (allocated >= needed && needed >= (allocated >> 1)) { + assert(items != NULL || newsize == 0); Py_SIZE(self) = newsize; return 0; } @@ -45,28 +57,32 @@ * system realloc(). * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... */ - new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6); + new_allocated = (needed >> 3) + (needed < 9 ? 3 : 6); /* check for integer overflow */ - if (new_allocated > PY_SIZE_MAX - newsize) { + if (new_allocated > PY_SIZE_MAX - needed) { PyErr_NoMemory(); return -1; } else { - new_allocated += newsize; + new_allocated += needed; } - if (newsize == 0) + if (needed == 0) new_allocated = 0; - items = self->ob_item; - if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *))) + /* + fprintf(stderr, "ob_item: %p", self->ob_item); + fprintf(stderr, "items: %p", items); + */ + if (new_allocated <= ((~(size_t)0) / sizeof(PyObject *))) { PyMem_RESIZE(items, PyObject *, new_allocated); + } else items = NULL; if (items == NULL) { PyErr_NoMemory(); return -1; } - self->ob_item = items; + self->ob_item = items + self->orphans; Py_SIZE(self) = newsize; self->allocated = new_allocated; return 0; @@ -158,6 +174,7 @@ } Py_SIZE(op) = size; op->allocated = size; + op->orphans = 0; _PyObject_GC_TRACK(op); return (PyObject *) op; } @@ -304,9 +321,10 @@ There's a simple test case where somehow this reduces thrashing when a *very* large list is created and immediately deleted. */ + op->ob_item -= op->orphans; i = Py_SIZE(op); while (--i >= 0) { - Py_XDECREF(op->ob_item[i]); + Py_XDECREF(op->ob_item[i+op->orphans]); } PyMem_FREE(op->ob_item); } @@ -548,12 +566,13 @@ if (item != NULL) { /* Because XDECREF can recursively invoke operations on this list, we make it empty first. */ + item -= a->orphans; i = Py_SIZE(a); Py_SIZE(a) = 0; a->ob_item = NULL; a->allocated = 0; while (--i >= 0) { - Py_XDECREF(item[i]); + Py_XDECREF(item[i + a->orphans]); } PyMem_FREE(item); } @@ -638,18 +657,32 @@ memcpy(recycle, &item[ilow], s); if (d < 0) { /* Delete -d items */ - memmove(&item[ihigh+d], &item[ihigh], - (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); + if (ilow == 0) { + a->orphans += (-1 * d); + a->ob_item += (-1 * d); + } + else { + memmove(&item[ihigh+d], &item[ihigh], + (Py_SIZE(a) - ihigh)*sizeof(PyObject *)); + } list_resize(a, Py_SIZE(a) + d); item = a->ob_item; } else if (d > 0) { /* Insert d items */ - k = Py_SIZE(a); - if (list_resize(a, k+d) < 0) - goto Error; - item = a->ob_item; - memmove(&item[ihigh+d], &item[ihigh], - (k - ihigh)*sizeof(PyObject *)); + if (ilow == 0 && d <= a->orphans) { + a->orphans += (-1 * d); + a->ob_item += (-1 * d); + item = a->ob_item; + list_resize(a, Py_SIZE(a) + d); + } + else { + k = Py_SIZE(a); + if (list_resize(a, k+d) < 0) + goto Error; + item = a->ob_item; + memmove(&item[ihigh+d], &item[ihigh], + (k - ihigh)*sizeof(PyObject *)); + } } for (k = 0; k < n; k++, ilow++) { PyObject *w = vitem[k]; @@ -838,7 +871,7 @@ } break; } - if (Py_SIZE(self) < self->allocated) { + if (Py_SIZE(self) + self->orphans < self->allocated) { /* steals ref */ PyList_SET_ITEM(self, Py_SIZE(self), item); ++Py_SIZE(self); @@ -852,7 +885,7 @@ } /* Cut back result list if initial guess was too large. */ - if (Py_SIZE(self) < self->allocated) + if (Py_SIZE(self) + self->orphans < self->allocated) list_resize(self, Py_SIZE(self)); /* shrinking can't fail */ Py_DECREF(it); @@ -2290,7 +2323,7 @@ { Py_ssize_t res; - res = sizeof(PyListObject) + self->allocated * sizeof(void*); + res = sizeof(PyListObject) + (self->allocated + self->orphans) * sizeof(void*); return PyLong_FromSsize_t(res); } Index: Lib/test/test_list.py =================================================================== --- Lib/test/test_list.py (revision 77751) +++ Lib/test/test_list.py (working copy) @@ -51,6 +51,15 @@ self.assertEqual(len([0]), 1) self.assertEqual(len([0, 1, 2]), 3) + def test_pop_and_prepend(self): + # This guards against faulty optimizations on list that + # attempt to makes pops and prepends at the beginning of the + # list work faster. + lst = [5] * 100 + del lst[0] + lst.insert(0, 4) + self.assertEqual(lst, [4] + [5] * 99) + def test_overflow(self): lst = [4, 5, 6, 7] n = int((sys.maxsize*2+2) // len(lst)) Index: Lib/test/test_iter.py =================================================================== --- Lib/test/test_iter.py (revision 77751) +++ Lib/test/test_iter.py (working copy) @@ -875,7 +875,18 @@ except TypeError: pass + def test_extends(self): + # This test would break on an incomplete patch to listobject.c + def gen(): + for i in range(500): + yield i + lst = [0] * 500 + for i in range(240): + lst.pop(0) + lst.extend(gen()) + + def test_main(): run_unittest(TestCase) Index: Lib/test/test_sys.py =================================================================== --- Lib/test/test_sys.py (revision 77751) +++ Lib/test/test_sys.py (working copy) @@ -515,7 +515,7 @@ # bool objects are not gc tracked self.assertEqual(sys.getsizeof(True), size(vh) + self.longdigit) # but lists are - self.assertEqual(sys.getsizeof([]), size(vh + 'PP') + gc_header_size) + self.assertEqual(sys.getsizeof([]), size(vh + 'PPi') + gc_header_size) def test_default(self): h = self.header @@ -638,7 +638,7 @@ # list samples = [[], [1,2,3], ['1', '2', '3']] for sample in samples: - check(sample, size(vh + 'PP') + len(sample)*self.P) + check(sample, size(vh + 'PPi') + len(sample)*self.P) # sortwrapper (list) # XXX # cmpwrapper (list)