diff -r d9c555047fcc Include/objimpl.h --- a/Include/objimpl.h Sun Dec 14 12:09:40 2008 +0100 +++ b/Include/objimpl.h Wed Dec 17 15:53:18 2008 +0100 @@ -285,6 +285,12 @@ extern PyGC_Head *_PyGC_generation0; g->gc.gc_next = NULL; \ } while (0); +/* True if the object is tracked by the GC. This can be useful to implement + some optimizations. */ +#define _PyObject_GC_IS_TRACKED(o) \ + ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED) + + PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t); PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); diff -r d9c555047fcc Include/tupleobject.h --- a/Include/tupleobject.h Sun Dec 14 12:09:40 2008 +0100 +++ b/Include/tupleobject.h Wed Dec 17 15:53:18 2008 +0100 @@ -44,6 +44,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); +PyAPI_FUNC(void) _PyTuple_Optimize(PyObject *); /* Macro, trading safety for speed */ #define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) diff -r d9c555047fcc Modules/gcmodule.c --- a/Modules/gcmodule.c Sun Dec 14 12:09:40 2008 +0100 +++ b/Modules/gcmodule.c Wed Dec 17 15:53:18 2008 +0100 @@ -63,6 +63,22 @@ static PyObject *gc_str = NULL; /* Python string used to look for __del__ attribute. */ static PyObject *delstr = NULL; +/* This is the number of objects who survived the last full collection. It + approximates the number of long lived objects tracked by the GC. */ +static Py_ssize_t long_lived_total = 0; +/* This is the number of objects who survived the last "before-full" + collection, and awaiting to undergo a full collection for the first time. + + When there are many long-lived objects, the ratio + long_lived_pending / long_lived_total + is used to decide when to trigger a full collection + (in collect_generations()). Before Python 2.7, not doing so could lead + to exponential performance degradation when keeping lots (e.g. millions) + of GC-enabled objects in memory. + +*/ +static Py_ssize_t long_lived_pending = 0; + /* set for debugging information */ #define DEBUG_STATS (1<<0) /* print collection statistics */ #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ @@ -817,8 +833,16 @@ collect(int generation) move_unreachable(young, &unreachable); /* Move reachable objects to next generation. */ - if (young != old) + if (young != old) { + if (generation == NUM_GENERATIONS - 2) { + long_lived_pending += gc_list_size(young); + } gc_list_merge(young, old); + } + else { + long_lived_pending = 0; + long_lived_total = gc_list_size(young); + } /* All objects in unreachable are trash, but objects reachable from * finalizers can't safely be deleted. Python programmers should take @@ -918,6 +942,13 @@ collect_generations(void) * generations younger than it will be collected. */ for (i = NUM_GENERATIONS-1; i >= 0; i--) { if (generations[i].count > generations[i].threshold) { + /* If there are many long-lived objects, use MvL's + heuristic to do full collections less frequently. + See issue #4074. + */ + if (i == NUM_GENERATIONS - 1 + && long_lived_pending < long_lived_total / 4) + continue; n = collect(i); break; } diff -r d9c555047fcc Objects/abstract.c --- a/Objects/abstract.c Sun Dec 14 12:09:40 2008 +0100 +++ b/Objects/abstract.c Wed Dec 17 15:53:18 2008 +0100 @@ -2115,7 +2115,7 @@ PySequence_Tuple(PyObject *v) PyObject *it; /* iter(v) */ Py_ssize_t n; /* guess for result tuple size */ PyObject *result; - Py_ssize_t j; + Py_ssize_t j, nb_tracked; if (v == NULL) return null_error(); @@ -2144,13 +2144,15 @@ PySequence_Tuple(PyObject *v) goto Fail; /* Fill the tuple. */ - for (j = 0; ; ++j) { + for (j = 0, nb_tracked = 0; ; ++j) { PyObject *item = PyIter_Next(it); if (item == NULL) { if (PyErr_Occurred()) goto Fail; break; } + nb_tracked += (PyObject_IS_GC(item) && + _PyObject_GC_IS_TRACKED(item)); if (j >= n) { Py_ssize_t oldn = n; /* The over-allocation strategy can grow a bit faster @@ -2180,6 +2182,8 @@ PySequence_Tuple(PyObject *v) _PyTuple_Resize(&result, j) != 0) goto Fail; + if (j && !nb_tracked) + _PyObject_GC_UNTRACK(result); Py_DECREF(it); return result; diff -r d9c555047fcc Objects/tupleobject.c --- a/Objects/tupleobject.c Sun Dec 14 12:09:40 2008 +0100 +++ b/Objects/tupleobject.c Wed Dec 17 15:53:18 2008 +0100 @@ -131,10 +131,29 @@ PyTuple_SetItem(register PyObject *op, r return 0; } +void +_PyTuple_Optimize(PyObject *_op) +{ + PyTupleObject *op; + Py_ssize_t i, n; + if (!PyTuple_CheckExact(_op) || !_PyObject_GC_IS_TRACKED(_op)) + return; + op = (PyTupleObject *) _op; + n = Py_SIZE(op); + if (!n) + return; + for (i = 0; i < n; i++) { + PyObject *elt = op->ob_item[i]; + if (PyObject_IS_GC(elt) && _PyObject_GC_IS_TRACKED(elt)) + return; + } + _PyObject_GC_UNTRACK(_op); +} + PyObject * PyTuple_Pack(Py_ssize_t n, ...) { - Py_ssize_t i; + Py_ssize_t i, nb_tracked; PyObject *o; PyObject *result; PyObject **items; @@ -145,11 +164,15 @@ PyTuple_Pack(Py_ssize_t n, ...) if (result == NULL) return NULL; items = ((PyTupleObject *)result)->ob_item; - for (i = 0; i < n; i++) { + for (i = 0, nb_tracked = 0; i < n; i++) { o = va_arg(vargs, PyObject *); Py_INCREF(o); items[i] = o; + nb_tracked += (PyObject_IS_GC(o) + && _PyObject_GC_IS_TRACKED(o)); } + if (n && !nb_tracked) + _PyObject_GC_UNTRACK(result); va_end(vargs); return result; } diff -r d9c555047fcc Python/ceval.c --- a/Python/ceval.c Sun Dec 14 12:09:40 2008 +0100 +++ b/Python/ceval.c Wed Dec 17 15:53:18 2008 +0100 @@ -2014,9 +2014,19 @@ PyEval_EvalFrameEx(PyFrameObject *f, int case BUILD_TUPLE: x = PyTuple_New(oparg); if (x != NULL) { - for (; --oparg >= 0;) { - w = POP(); - PyTuple_SET_ITEM(x, oparg, w); + /* Can't call _PyObject_GC_UNTRACK on empty + tuples. */ + if (oparg) { + Py_ssize_t nb_tracked = 0; + for (; --oparg >= 0;) { + w = POP(); + PyTuple_SET_ITEM(x, oparg, w); + nb_tracked += + (PyObject_IS_GC(w) && + _PyObject_GC_IS_TRACKED(w)); + } + if (!nb_tracked) + _PyObject_GC_UNTRACK(x); } PUSH(x); continue; diff -r d9c555047fcc Python/modsupport.c --- a/Python/modsupport.c Sun Dec 14 12:09:40 2008 +0100 +++ b/Python/modsupport.c Wed Dec 17 15:53:18 2008 +0100 @@ -255,7 +255,7 @@ static PyObject * do_mktuple(const char **p_format, va_list *p_va, int endchar, int n, int flags) { PyObject *v; - int i; + int i, nb_tracked; int itemfailed = 0; if (n < 0) return NULL; @@ -263,13 +263,15 @@ do_mktuple(const char **p_format, va_lis return NULL; /* Note that we can't bail immediately on error as this will leak refcounts on any 'N' arguments. */ - for (i = 0; i < n; i++) { + for (i = 0, nb_tracked = 0; i < n; i++) { PyObject *w = do_mkvalue(p_format, p_va, flags); if (w == NULL) { itemfailed = 1; Py_INCREF(Py_None); w = Py_None; } + else nb_tracked += (PyObject_IS_GC(w) + && _PyObject_GC_IS_TRACKED(w)); PyTuple_SET_ITEM(v, i, w); } if (itemfailed) { @@ -283,6 +285,8 @@ do_mktuple(const char **p_format, va_lis "Unmatched paren in format"); return NULL; } + if (n && !nb_tracked) + _PyObject_GC_UNTRACK(v); if (endchar) ++*p_format; return v;