diff -r 785e29d8ce13 Lib/test/test_set.py --- a/Lib/test/test_set.py Wed May 14 23:37:14 2014 +0200 +++ b/Lib/test/test_set.py Thu May 15 00:14:15 2014 +0200 @@ -361,6 +361,20 @@ class TestJointOps: gc.collect() self.assertTrue(ref() is None, "Cycle was not collected") + @support.cpython_only + def test_length_hint(self): + # Issue #21507: set constructor should use operator.length_hint to + # guess the final length, and so limit the overallocation of the set + for n in (10, 100, 500, 1000): + data = list(range(n)) + fs1 = self.thetype(self.thetype(data)) + fs2 = self.thetype(data) + self.assertEqual(sys.getsizeof(fs2), sys.getsizeof(fs1)) + fs3 = self.thetype(iter(data)) + self.assertEqual(sys.getsizeof(fs3), sys.getsizeof(fs1)) + fs4 = self.thetype(iter(set(data))) + self.assertEqual(sys.getsizeof(fs4), sys.getsizeof(fs1)) + class TestSet(TestJointOps, unittest.TestCase): thetype = set basetype = set diff -r 785e29d8ce13 Objects/setobject.c --- a/Objects/setobject.c Wed May 14 23:37:14 2014 +0200 +++ b/Objects/setobject.c Thu May 15 00:14:15 2014 +0200 @@ -608,6 +608,16 @@ set_len(PyObject *so) } static int +set_prealloc(PySetObject *so, Py_ssize_t length) +{ + if ((so->fill + length)*3 >= (so->mask+1)*2) { + if (set_table_resize(so, (so->used + length)*2) != 0) + return -1; + } + return 0; +} + +static int set_merge(PySetObject *so, PyObject *otherset) { PySetObject *other; @@ -627,10 +637,8 @@ set_merge(PySetObject *so, PyObject *oth * incrementally resizing as we insert new keys. Expect * that there will be no (or few) overlapping keys. */ - if ((so->fill + other->used)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + other->used)*2) != 0) - return -1; - } + if (set_prealloc(so, other->used) < 0) + return -1; for (i = 0; i <= other->mask; i++) { entry = &other->table[i]; key = entry->key; @@ -949,6 +957,7 @@ static int set_update_internal(PySetObject *so, PyObject *other) { PyObject *key, *it; + Py_ssize_t n; if (PyAnySet_Check(other)) return set_merge(so, other); @@ -965,10 +974,8 @@ set_update_internal(PySetObject *so, PyO */ if (dictsize == -1) return -1; - if ((so->fill + dictsize)*3 >= (so->mask+1)*2) { - if (set_table_resize(so, (so->used + dictsize)*2) != 0) - return -1; - } + if (set_prealloc(so, dictsize) < 0) + return -1; while (_PyDict_Next(other, &pos, &key, &value, &hash)) { setentry an_entry; @@ -984,6 +991,17 @@ set_update_internal(PySetObject *so, PyO if (it == NULL) return -1; + /* Guess a result set size. */ + n = PyObject_LengthHint(other, PySet_MINSIZE); + if (n == -1) { + Py_DECREF(it); + return -1; + } + if (set_prealloc(so, n) < 0) { + Py_DECREF(it); + return -1; + } + while ((key = PyIter_Next(it)) != NULL) { if (set_add_key(so, key) == -1) { Py_DECREF(it);