# HG changeset patch # User Antoine Pitrou # Date 1237045597 -3600 diff -r ae3115defec1 -r 8637b79ed9b6 Include/dictobject.h --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -111,6 +111,7 @@ PyAPI_FUNC(PyObject *) PyDict_Copy(PyObj PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); +PyAPI_FUNC(void) _PyDict_Optimize(PyObject *mp); /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); diff -r ae3115defec1 -r 8637b79ed9b6 Include/objimpl.h --- a/Include/objimpl.h +++ b/Include/objimpl.h @@ -285,6 +285,17 @@ extern PyGC_Head *_PyGC_generation0; g->gc.gc_next = NULL; \ } while (0); +/* True if the object is currently tracked by the GC. */ +#define _PyObject_GC_IS_TRACKED(o) \ + ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED) + +/* True if the object may be tracked by the GC in the future, or already is. + This can be useful to implement some optimizations. */ +#define _PyObject_GC_MAY_BE_TRACKED(obj) \ + (PyObject_IS_GC(obj) && \ + (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj))) + + PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t); PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); diff -r ae3115defec1 -r 8637b79ed9b6 Include/tupleobject.h --- a/Include/tupleobject.h +++ b/Include/tupleobject.h @@ -44,6 +44,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); +PyAPI_FUNC(void) _PyTuple_Optimize(PyObject *); /* Macro, trading safety for speed */ #define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) diff -r ae3115defec1 -r 8637b79ed9b6 Lib/test/test_dict.py --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -569,6 +569,104 @@ class DictTest(unittest.TestCase): gc.collect() self.assert_(ref() is None, "Cycle was not collected") + def _not_tracked(self, t): + # Nested containers can take several collections to untrack + gc.collect() + gc.collect() + self.assertFalse(gc.is_tracked(t), t) + + def _tracked(self, t): + self.assertTrue(gc.is_tracked(t), t) + gc.collect() + gc.collect() + self.assertTrue(gc.is_tracked(t), t) + + def test_track_literals(self): + # Test GC-optimization of dict literals + x, y, z, w = 1.5, "a", (1, None), [] + + self._not_tracked({}) + self._not_tracked({x:(), y:x, z:1}) + self._not_tracked({1: "a", "b": 2}) + self._not_tracked({1: 2, (None, True, False, ()): int}) + self._not_tracked({1: object()}) + + # Dicts with mutable elements are always tracked, even if those + # elements are not tracked right now. + self._tracked({1: []}) + self._tracked({1: ([],)}) + self._tracked({1: {}}) + self._tracked({1: set()}) + + def test_track_dynamic(self): + # Test GC-optimization of dynamically-created dicts + class MyObject(object): + pass + x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject() + + d = dict() + self._not_tracked(d) + d[1] = "a" + self._not_tracked(d) + d[y] = 2 + self._not_tracked(d) + d[z] = 3 + self._not_tracked(d) + self._not_tracked(d.copy()) + d[4] = w + self._tracked(d) + self._tracked(d.copy()) + d[4] = None + self._not_tracked(d) + self._not_tracked(d.copy()) + + # dd isn't tracked right now, but it may mutate and therefore d + # which contains it must be tracked. + d = dict() + dd = dict() + d[1] = dd + self._not_tracked(dd) + self._tracked(d) + dd[1] = d + self._tracked(dd) + + d = dict.fromkeys([x, y, z]) + self._not_tracked(d) + dd = dict() + dd.update(d) + self._not_tracked(dd) + d = dict.fromkeys([x, y, z, o]) + self._tracked(d) + dd = dict() + dd.update(d) + self._tracked(dd) + + d = dict(x=x, y=y, z=z) + self._not_tracked(d) + d = dict(x=x, y=y, z=z, w=w) + self._tracked(d) + d = dict() + d.update(x=x, y=y, z=z) + self._not_tracked(d) + d.update(w=w) + self._tracked(d) + + d = dict([(x, y), (z, 1)]) + self._not_tracked(d) + d = dict([(x, y), (z, w)]) + self._tracked(d) + d = dict() + d.update([(x, y), (z, 1)]) + self._not_tracked(d) + d.update([(x, y), (z, w)]) + self._tracked(d) + + def test_track_subtypes(self): + # Dict subtypes are always tracked + class MyDict(dict): + pass + self._tracked(MyDict()) + from test import mapping_tests diff -r ae3115defec1 -r 8637b79ed9b6 Lib/test/test_gc.py --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -415,6 +415,37 @@ class GCTests(unittest.TestCase): self.assertEqual(gc.get_referents(1, 'a', 4j), []) + def test_is_tracked(self): + # Atomic built-in types are not tracked, user-defined objects and + # mutable containers are. + # NOTE: types with special optimizations (e.g. tuple) have tests + # in their own test files instead. + self.assertFalse(gc.is_tracked(None)) + self.assertFalse(gc.is_tracked(1)) + self.assertFalse(gc.is_tracked(1.0)) + self.assertFalse(gc.is_tracked(1.0 + 5.0j)) + self.assertFalse(gc.is_tracked(True)) + self.assertFalse(gc.is_tracked(False)) + self.assertFalse(gc.is_tracked("a")) + self.assertFalse(gc.is_tracked(u"a")) + self.assertFalse(gc.is_tracked(bytearray("a"))) + self.assertFalse(gc.is_tracked(type)) + self.assertFalse(gc.is_tracked(int)) + self.assertFalse(gc.is_tracked(object)) + self.assertFalse(gc.is_tracked(object())) + + class OldStyle: + pass + class NewStyle(object): + pass + self.assertTrue(gc.is_tracked(gc)) + self.assertTrue(gc.is_tracked(OldStyle)) + self.assertTrue(gc.is_tracked(OldStyle())) + self.assertTrue(gc.is_tracked(NewStyle)) + self.assertTrue(gc.is_tracked(NewStyle())) + self.assertTrue(gc.is_tracked([])) + self.assertTrue(gc.is_tracked(set())) + def test_bug1055820b(self): # Corresponds to temp2b.py in the bug report. diff -r ae3115defec1 -r 8637b79ed9b6 Lib/test/test_tuple.py --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -1,5 +1,7 @@ from test import test_support, seq_tests +import gc + class TupleTest(seq_tests.CommonTest): type2test = tuple @@ -82,6 +84,69 @@ class TupleTest(seq_tests.CommonTest): self.assertEqual(repr(a0), "()") self.assertEqual(repr(a2), "(0, 1, 2)") + def _not_tracked(self, t): + # Nested tuples can take several collections to untrack + gc.collect() + gc.collect() + self.assertFalse(gc.is_tracked(t), t) + + def _tracked(self, t): + self.assertTrue(gc.is_tracked(t), t) + gc.collect() + gc.collect() + self.assertTrue(gc.is_tracked(t), t) + + def test_track_literals(self): + # Test GC-optimization of tuple literals + x, y, z = 1.5, "a", [] + + self._not_tracked(()) + self._not_tracked((1,)) + self._not_tracked((1, 2)) + self._not_tracked((1, 2, "a")) + self._not_tracked((1, 2, (None, True, False, ()), int)) + self._not_tracked((object(),)) + self._not_tracked(((1, x), y, (2, 3))) + + # Tuples with mutable elements are always tracked, even if those + # elements are not tracked right now. + self._tracked(([],)) + self._tracked(([1],)) + self._tracked(({},)) + self._tracked((set(),)) + self._tracked((x, y, z)) + + def check_track_dynamic(self, tp, always_track): + x, y, z = 1.5, "a", [] + + check = self._tracked if always_track else self._not_tracked + check(tp()) + check(tp([])) + check(tp(set())) + check(tp([1, x, y])) + check(tp(obj for obj in [1, x, y])) + check(tp(set([1, x, y]))) + check(tp(tuple([obj]) for obj in [1, x, y])) + check(tuple(tp([obj]) for obj in [1, x, y])) + + self._tracked(tp([z])) + self._tracked(tp([[x, y]])) + self._tracked(tp([{x: y}])) + self._tracked(tp(obj for obj in [x, y, z])) + self._tracked(tp(tuple([obj]) for obj in [x, y, z])) + self._tracked(tuple(tp([obj]) for obj in [x, y, z])) + + def test_track_dynamic(self): + # Test GC-optimization of dynamically constructed tuples. + self.check_track_dynamic(tuple, False) + + def test_track_subtypes(self): + # Tuple subtypes must always be tracked + class MyTuple(tuple): + pass + self.check_track_dynamic(MyTuple, True) + + def test_main(): test_support.run_unittest(TupleTest) diff -r ae3115defec1 -r 8637b79ed9b6 Modules/gcmodule.c --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -432,7 +432,13 @@ move_unreachable(PyGC_Head *young, PyGC_ (void) traverse(op, (visitproc)visit_reachable, (void *)young); - next = gc->gc.gc_next; + next = gc->gc.gc_next; + if (PyTuple_CheckExact(op)) { + _PyTuple_Optimize(op); + } + else if (PyDict_CheckExact(op)) { + _PyDict_Optimize(op); + } } else { /* This *may* be unreachable. To make progress, @@ -1264,6 +1270,26 @@ gc_get_objects(PyObject *self, PyObject return result; } +PyDoc_STRVAR(gc_is_tracked__doc__, +"is_tracked(obj) -> bool\n" +"\n" +"Returns true if the object is tracked by the garbage collector.\n" +"Simple atomic objects will return false.\n" +); + +static PyObject * +gc_is_tracked(PyObject *self, PyObject *obj) +{ + PyObject *result; + + if (PyObject_IS_GC(obj) && IS_TRACKED(obj)) + result = Py_True; + else + result = Py_False; + Py_INCREF(result); + return result; +} + PyDoc_STRVAR(gc__doc__, "This module provides access to the garbage collector for reference cycles.\n" @@ -1278,6 +1304,7 @@ PyDoc_STRVAR(gc__doc__, "set_threshold() -- Set the collection thresholds.\n" "get_threshold() -- Return the current the collection thresholds.\n" "get_objects() -- Return a list of all objects tracked by the collector.\n" +"is_tracked() -- Returns true if a given object is tracked.\n" "get_referrers() -- Return the list of objects that refer to an object.\n" "get_referents() -- Return the list of objects that an object refers to.\n"); @@ -1293,6 +1320,7 @@ static PyMethodDef GcMethods[] = { {"collect", (PyCFunction)gc_collect, METH_VARARGS | METH_KEYWORDS, gc_collect__doc__}, {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__}, + {"is_tracked", gc_is_tracked, METH_O, gc_is_tracked__doc__}, {"get_referrers", gc_get_referrers, METH_VARARGS, gc_get_referrers__doc__}, {"get_referents", gc_get_referents, METH_VARARGS, diff -r ae3115defec1 -r 8637b79ed9b6 Objects/dictobject.c --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -180,6 +180,26 @@ show_alloc(void) } #endif +/* Debug statistic to count GC tracking of dicts */ +#undef SHOW_TRACK_COUNT +#ifdef SHOW_TRACK_COUNT +static Py_ssize_t count_untracked = 0; +static Py_ssize_t count_tracked = 0; + +static void +show_track(void) +{ + fprintf(stderr, "Dict creations: %" PY_FORMAT_SIZE_T "d\n", + count_tracked + count_untracked); + fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T + "d\n", count_tracked); + fprintf(stderr, "%.2f%% tracking rate\n\n", + (100.0*count_tracked/(count_untracked+count_tracked))); +} +#endif + + + /* Initialization macros. There are two ways to create a dict: PyDict_New() is the main C API function, and the tp_new slot maps to dict_new(). In the latter case we @@ -233,6 +253,9 @@ PyDict_New(void) #ifdef SHOW_ALLOC_COUNT Py_AtExit(show_alloc); #endif +#ifdef SHOW_TRACK_COUNT + Py_AtExit(show_track); +#endif } if (numfree) { mp = free_list[--numfree]; @@ -262,10 +285,12 @@ PyDict_New(void) #endif } mp->ma_lookup = lookdict_string; +#ifdef SHOW_TRACK_COUNT + count_untracked++; +#endif #ifdef SHOW_CONVERSION_COUNTS ++created; #endif - _PyObject_GC_TRACK(mp); return (PyObject *)mp; } @@ -433,6 +458,52 @@ lookdict_string(PyDictObject *mp, PyObje return 0; } +#ifdef SHOW_TRACK_COUNT +#define INCREASE_TRACK_COUNT \ + (count_tracked++, count_untracked--); +#define DECREASE_TRACK_COUNT \ + (count_tracked--, count_untracked++); +#else +#define INCREASE_TRACK_COUNT +#define DECREASE_TRACK_COUNT +#endif + +#define MAINTAIN_TRACKING(mp, key, value) \ + do { \ + if (!_PyObject_GC_IS_TRACKED(mp)) { \ + if (_PyObject_GC_MAY_BE_TRACKED(key) || \ + _PyObject_GC_MAY_BE_TRACKED(value)) { \ + _PyObject_GC_TRACK(mp); \ + INCREASE_TRACK_COUNT \ + } \ + } \ + } while(0) + +void +_PyDict_Optimize(PyObject *op) +{ + PyDictObject *mp; + PyObject *value; + Py_ssize_t mask, i; + PyDictEntry *ep; + + if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) + return; + + mp = (PyDictObject *) op; + ep = mp->ma_table; + mask = mp->ma_mask; + for (i = 0; i <= mask; i++) { + if ((value = ep[i].me_value) == NULL) + continue; + if (_PyObject_GC_MAY_BE_TRACKED(value) || + _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key)) + return; + } + _PyObject_GC_UNTRACK(op); +} + + /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. @@ -453,6 +524,7 @@ insertdict(register PyDictObject *mp, Py Py_DECREF(value); return -1; } + MAINTAIN_TRACKING(mp, key, value); if (ep->me_value != NULL) { old_value = ep->me_value; ep->me_value = value; @@ -492,6 +564,7 @@ insertdict_clean(register PyDictObject * PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; + MAINTAIN_TRACKING(mp, key, value); i = hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { @@ -2202,9 +2275,18 @@ dict_new(PyTypeObject *type, PyObject *a assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0); INIT_NONZERO_DICT_SLOTS(d); d->ma_lookup = lookdict_string; + /* The object has been implicitely tracked by tp_alloc */ + if (type == &PyDict_Type) + _PyObject_GC_UNTRACK(d); #ifdef SHOW_CONVERSION_COUNTS ++created; #endif +#ifdef SHOW_TRACK_COUNT + if (_PyObject_GC_IS_TRACKED(d)) + count_tracked++; + else + count_untracked++; +#endif } return self; } diff -r ae3115defec1 -r 8637b79ed9b6 Objects/tupleobject.c --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -131,6 +131,28 @@ PyTuple_SetItem(register PyObject *op, r return 0; } +void +_PyTuple_Optimize(PyObject *op) +{ + PyTupleObject *t; + Py_ssize_t i, n; + + if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) + return; + t = (PyTupleObject *) op; + n = Py_SIZE(t); + for (i = 0; i < n; i++) { + PyObject *elt = PyTuple_GET_ITEM(t, i); + /* Tuple with NULL elements aren't + fully constructed, don't untrack + them yet. */ + if (!elt || + _PyObject_GC_MAY_BE_TRACKED(elt)) + return; + } + _PyObject_GC_UNTRACK(op); +} + PyObject * PyTuple_Pack(Py_ssize_t n, ...) {