diff -r 6c5f9c6c25ea Lib/test/test_dict.py --- a/Lib/test/test_dict.py Wed Sep 14 18:17:32 2016 +0300 +++ b/Lib/test/test_dict.py Thu Sep 15 05:19:17 2016 +0900 @@ -839,83 +839,6 @@ pass self._tracked(MyDict()) - def make_shared_key_dict(self, n): - class C: - pass - - dicts = [] - for i in range(n): - a = C() - a.x, a.y, a.z = 1, 2, 3 - dicts.append(a.__dict__) - - return dicts - - @support.cpython_only - def test_splittable_del(self): - """split table must be combined when del d[k]""" - a, b = self.make_shared_key_dict(2) - - orig_size = sys.getsizeof(a) - - del a['y'] # split table is combined - with self.assertRaises(KeyError): - del a['y'] - - self.assertGreater(sys.getsizeof(a), orig_size) - self.assertEqual(list(a), ['x', 'z']) - self.assertEqual(list(b), ['x', 'y', 'z']) - - # Two dicts have different insertion order. - a['y'] = 42 - self.assertEqual(list(a), ['x', 'z', 'y']) - self.assertEqual(list(b), ['x', 'y', 'z']) - - @support.cpython_only - def test_splittable_pop(self): - """split table must be combined when d.pop(k)""" - a, b = self.make_shared_key_dict(2) - - orig_size = sys.getsizeof(a) - - a.pop('y') # split table is combined - with self.assertRaises(KeyError): - a.pop('y') - - self.assertGreater(sys.getsizeof(a), orig_size) - self.assertEqual(list(a), ['x', 'z']) - self.assertEqual(list(b), ['x', 'y', 'z']) - - # Two dicts have different insertion order. - a['y'] = 42 - self.assertEqual(list(a), ['x', 'z', 'y']) - self.assertEqual(list(b), ['x', 'y', 'z']) - - @support.cpython_only - def test_splittable_pop_pending(self): - """pop a pending key in a splitted table should not crash""" - a, b = self.make_shared_key_dict(2) - - a['a'] = 4 - with self.assertRaises(KeyError): - b.pop('a') - - @support.cpython_only - def test_splittable_popitem(self): - """split table must be combined when d.popitem()""" - a, b = self.make_shared_key_dict(2) - - orig_size = sys.getsizeof(a) - - item = a.popitem() # split table is combined - self.assertEqual(item, ('z', 3)) - with self.assertRaises(KeyError): - del a['z'] - - self.assertGreater(sys.getsizeof(a), orig_size) - self.assertEqual(list(a), ['x', 'y']) - self.assertEqual(list(b), ['x', 'y', 'z']) - def test_iterator_pickling(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): data = {1:"a", 2:"b", 3:"c"} diff -r 6c5f9c6c25ea Lib/test/test_dict_split.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_dict_split.py Thu Sep 15 05:19:17 2016 +0900 @@ -0,0 +1,144 @@ +"""Tests for split table form of dict""" + +import struct +import sys +from test import support +import unittest + + +def _usable_fraction(n): + return n * 2 // 3 + + +def _ixsize(n): + if n <= 128: + return 1 + if n <= 2**15: + return 2 + if n <= 2**31: + return 4 + return 8 + + +def combined_dict_size(keysize): + """size of combined dict""" + size = support.calcobjsize('nQ2P') # PyDictObject + size += struct.calcsize('2nP2n') # PyDictKeysObject + size += _ixsize(keysize) * keysize # dk_indices + size += struct.calcsize('n2P') * _usable_fraction(keysize) # dk_entries + return size + + +def split_dict_size(keysize): + """size of split, but not shared dict""" + size = combined_dict_size(keysize) + size += struct.calcsize('P') * _usable_fraction(keysize) # ma_values + return size + + +def shared_dict_size(keysize): + """size of shared dict""" + size = support.calcobjsize('nQ2P') # PyDictObject + size += struct.calcsize('P') * _usable_fraction(keysize) # ma_values + return size + + +@support.cpython_only +class SplitTableTest(unittest.TestCase): + + check_sizeof = support.check_sizeof + + def make_shared_key_dict(self, n): + class C: + pass + + dicts = [] + for i in range(n): + a = C() + a.x, a.y, a.z = 1, 2, 3 + dicts.append(a.__dict__) + + return dicts + + def test_del(self): + """split table must be combined when del d[k]""" + a, b = self.make_shared_key_dict(2) + + orig_size = sys.getsizeof(a) + + del a['y'] # split table is combined + with self.assertRaises(KeyError): + del a['y'] + + self.assertGreater(sys.getsizeof(a), orig_size) + self.assertEqual(list(a), ['x', 'z']) + self.assertEqual(list(b), ['x', 'y', 'z']) + + # Two dicts have different insertion order. + a['y'] = 42 + self.assertEqual(list(a), ['x', 'z', 'y']) + self.assertEqual(list(b), ['x', 'y', 'z']) + + def test_pop(self): + """split table must be combined when d.pop(k)""" + a, b = self.make_shared_key_dict(2) + + orig_size = sys.getsizeof(a) + + a.pop('y') # split table is combined + with self.assertRaises(KeyError): + a.pop('y') + + self.assertGreater(sys.getsizeof(a), orig_size) + self.assertEqual(list(a), ['x', 'z']) + self.assertEqual(list(b), ['x', 'y', 'z']) + + # Two dicts have different insertion order. + a['y'] = 42 + self.assertEqual(list(a), ['x', 'z', 'y']) + self.assertEqual(list(b), ['x', 'y', 'z']) + + def test_pop_pending(self): + """pop a pending key in a splitted table should not crash""" + a, b = self.make_shared_key_dict(2) + + a['a'] = 4 + with self.assertRaises(KeyError): + b.pop('a') + + def test_popitem(self): + """split table must be combined when d.popitem()""" + a, b = self.make_shared_key_dict(2) + + orig_size = sys.getsizeof(a) + + item = a.popitem() # split table is combined + self.assertEqual(item, ('z', 3)) + with self.assertRaises(KeyError): + del a['z'] + + self.assertGreater(sys.getsizeof(a), orig_size) + self.assertEqual(list(a), ['x', 'y']) + self.assertEqual(list(b), ['x', 'y', 'z']) + + def test_setitem_after_pop(self): + class C: + pass + a = C() + + a.a = 1 + self.check_sizeof({}, combined_dict_size(8)) + self.check_sizeof(a.__dict__, shared_dict_size(8)) + + # dict.pop() convert it to combined table + # C doesn't stop using shared key when bypass delitem. + a.__dict__.pop('a') + self.check_sizeof(a.__dict__, combined_dict_size(8)) + + # But C should not convert a.__dict__ to split table again. + a.a = 1 + self.check_sizeof(a.__dict__, combined_dict_size(8)) + + +if __name__ == "__main__": + unittest.main() diff -r 6c5f9c6c25ea Objects/dictobject.c --- a/Objects/dictobject.c Wed Sep 14 18:17:32 2016 +0300 +++ b/Objects/dictobject.c Thu Sep 15 05:19:17 2016 +0900 @@ -388,7 +388,7 @@ * This can be used to reserve enough size to insert n entries without * resizing. */ -#define ESTIMATE_SIZE(n) (((n)*3) >> 1) +#define ESTIMATE_SIZE(n) (((n)*3+1) >> 1) /* Alternative fraction that is otherwise close enough to 2n/3 to make * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. @@ -1236,16 +1236,16 @@ but can be resplit by make_keys_shared(). */ static int -dictresize(PyDictObject *mp, Py_ssize_t minused) +dictresize(PyDictObject *mp, Py_ssize_t minsize) { Py_ssize_t i, newsize; PyDictKeysObject *oldkeys; PyObject **oldvalues; PyDictKeyEntry *ep0; - /* Find the smallest table size > minused. */ + // Find the smallest table size >= minsize. for (newsize = PyDict_MINSIZE; - newsize <= minused && newsize > 0; + newsize < minsize && newsize > 0; newsize <<= 1) ; if (newsize <= 0) { @@ -4229,9 +4229,11 @@ CACHED_KEYS(tp) = NULL; DK_DECREF(cached); } - } else { + } + else { + int shared = cached == ((PyDictObject *)dict)->ma_keys; res = PyDict_SetItem(dict, key, value); - if (cached != ((PyDictObject *)dict)->ma_keys) { + if (shared && cached != ((PyDictObject *)dict)->ma_keys) { /* Either update tp->ht_cached_keys or delete it */ if (cached->dk_refcnt == 1) { CACHED_KEYS(tp) = make_keys_shared(dict);