diff -r 58bd6a58365d -r bc286099ce9a Include/dictobject.h --- a/Include/dictobject.h Wed Feb 08 04:09:37 2012 +0100 +++ b/Include/dictobject.h Wed Feb 08 16:33:32 2012 +0000 @@ -14,7 +14,7 @@ */ /* -There are three kinds of slots in the table: +There are four kinds of slots in the table: 1. Unused. me_key == me_value == NULL Does not hold an active (key, value) pair now and never did. Unused can @@ -22,8 +22,9 @@ me_key is NULL, and is each slot's initial state. 2. Active. me_key != NULL and me_key != dummy and me_value != NULL - Holds an active (key, value) pair. Active can transition to Dummy upon - key deletion. This is the only case in which me_value != NULL. + Holds an active (key, value) pair. Active can transition to Dummy or + Pending upon key deletion (for combined and split tables respectively). + This is the only case in which me_value != NULL. 3. Dummy. me_key == dummy and me_value == NULL Previously held an active (key, value) pair, but that was deleted and an @@ -32,59 +33,78 @@ (cannot have me_key set to NULL), else the probe sequence in case of collision would have no way to know they were once active. +4. Pending. Not yet inserted or deleted from a split-table. + key != NULL, key != dummy and value == NULL + +The DictObject can be in one of two forms. +Either: + A combined table: + ma_values == NULL, dk_refcnt == 1. + Values are stored in the _me_value field of the PyDictKeysObject. + Slot kind 4 is not allowed i.e. + key != NULL, key != dummy and value == NULL is illegal. +Or: + A split table: + ma_values != NULL, dk_refcnt >= 1 + Values are stored in the ma_values array. + Only string (unicode) keys are allowed, no keys are present. + Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to hold a search finger. The me_hash field of Unused or Dummy slots has no -meaning otherwise. +meaning otherwise. As a consequence of this popitem always converts the dict +to the combined-table form. */ -/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are - * allocated directly in the dict object (in the ma_smalltable member). +/* PyDict_MINSIZE is the minimum size of a dictionary. * It must be a power of 2, and at least 4. 8 allows dicts with no more - * than 5 active entries to live in ma_smalltable (and so avoid an - * additional malloc); instrumentation suggested this suffices for the + * than 5 active entries; experiments suggested this suffices for the * majority of dicts (consisting mostly of usually-small instance dicts and * usually-small dicts created to pass keyword arguments). + * */ #ifndef Py_LIMITED_API -#define PyDict_MINSIZE 8 +#define PyDict_MINSIZE 4 typedef struct { /* Cached hash code of me_key. */ Py_hash_t me_hash; PyObject *me_key; - PyObject *me_value; -} PyDictEntry; + PyObject *_me_value; /* This field is only meaningful for combined tables */ +} PyDictKeyEntry; /* To ensure the lookup algorithm terminates, there must be at least one Unused slot (NULL key) in the table. -The value ma_fill is the number of non-NULL keys (sum of Active and Dummy); -ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL -values == the number of Active items). To avoid slowing down lookups on a near-full table, we resize the table when -it's two-thirds full. +it's USABLE_FRACTION (currently two-thirds) full. */ + +/* Note that if the dk_free slot of a PyDictKeysObject is less than zero + then that PyDictKeysObject is *immutable* */ +typedef struct _dictkeysobject PyDictKeysObject; typedef struct _dictobject PyDictObject; + +/* The ma_values pointer is NULL for a combined table + * or points to an array of PyObject* for a split table + */ struct _dictobject { PyObject_HEAD - Py_ssize_t ma_fill; /* # Active + # Dummy */ - Py_ssize_t ma_used; /* # Active */ + Py_ssize_t ma_used; + struct _dictkeysobject *ma_keys; + PyObject **ma_values; +}; - /* The table contains ma_mask + 1 slots, and that's a power of 2. - * We store the mask instead of the size because the mask is more - * frequently needed. - */ - Py_ssize_t ma_mask; +typedef PyDictKeyEntry *(*dict_lookup_func) +(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr); - /* ma_table points to ma_smalltable for small tables, else to - * additional malloc'ed memory. ma_table is never NULL! This rule - * saves repeated runtime null-tests in the workhorse getitem and - * setitem calls. - */ - PyDictEntry *ma_table; - PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash); - PyDictEntry ma_smalltable[PyDict_MINSIZE]; +struct _dictkeysobject { + Py_ssize_t dk_refcnt; + Py_ssize_t dk_size; + dict_lookup_func dk_lookup; + Py_ssize_t dk_free; + PyDictKeyEntry dk_entries[1]; }; + #endif /* Py_LIMITED_API */ PyAPI_DATA(PyTypeObject) PyDict_Type; @@ -115,6 +135,8 @@ PyAPI_FUNC(int) PyDict_Next( PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); #ifndef Py_LIMITED_API +PyDictKeysObject *_PyDict_NewKeysForClass(void); +PyAPI_FUNC(PyObject *) PyDict_NewForInstance(PyTypeObject *tp); PyAPI_FUNC(int) _PyDict_Next( PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash); #endif @@ -129,6 +151,7 @@ PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp); PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp); +#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL) PyAPI_FUNC(int) PyDict_ClearFreeList(void); #endif @@ -158,6 +181,10 @@ PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item); PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); +#ifndef Py_LIMITED_API +int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value); +#endif + #ifdef __cplusplus } #endif diff -r 58bd6a58365d -r bc286099ce9a Include/object.h --- a/Include/object.h Wed Feb 08 04:09:37 2012 +0100 +++ b/Include/object.h Wed Feb 08 16:33:32 2012 +0000 @@ -448,6 +448,7 @@ see add_operators() in typeobject.c . */ PyBufferProcs as_buffer; PyObject *ht_name, *ht_slots, *ht_qualname; + struct _dictkeysobject *ht_cached_keys; /* here are optional user slots, followed by the members. */ } PyHeapTypeObject; diff -r 58bd6a58365d -r bc286099ce9a Lib/test/test_sys.py --- a/Lib/test/test_sys.py Wed Feb 08 04:09:37 2012 +0100 +++ b/Lib/test/test_sys.py Wed Feb 08 16:33:32 2012 +0000 @@ -687,9 +687,9 @@ # method-wrapper (descriptor object) check({}.__iter__, size(h + '2P')) # dict - check({}, size(h + '3P2P' + 8*'P2P')) + check({}, size(h + '3P' + '4P' + 8*'P2P')) longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} - check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P')) + check(longdict, size(h + '3P' + '4P') + 16*size('P2P')) # dictionary-keyiterator check({}.keys(), size(h + 'P')) # dictionary-valueiterator @@ -831,7 +831,7 @@ # type # (PyTypeObject + PyNumberMethods + PyMappingMethods + # PySequenceMethods + PyBufferProcs) - s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 3P') + s = size(vh + 'P2P15Pl4PP9PP11PIP') + size('16Pi17P 3P 10P 2P 3P') check(int, s) # class class newstyleclass(object): pass diff -r 58bd6a58365d -r bc286099ce9a Objects/dictnotes.txt --- a/Objects/dictnotes.txt Wed Feb 08 04:09:37 2012 +0100 +++ b/Objects/dictnotes.txt Wed Feb 08 16:33:32 2012 +0000 @@ -1,7 +1,6 @@ -NOTES ON OPTIMIZING DICTIONARIES +NOTES ON DICTIONARIES ================================ - Principal Use Cases for Dictionaries ------------------------------------ @@ -21,7 +20,6 @@ Builtins Frequent reads. Almost never written. - Size 126 interned strings (as of Py2.3b1). A few keys are accessed much more frequently than others. Uniquification @@ -59,26 +57,22 @@ Characterized by deletions interspersed with adds and replacements. Performance benefits greatly from the re-use of dummy entries. - -Data Layout (assuming a 32-bit box with 64 bytes per cache line) +Data Layout ---------------------------------------------------------------- -Smalldicts (8 entries) are attached to the dictobject structure -and the whole group nearly fills two consecutive cache lines. - -Larger dicts use the first half of the dictobject structure (one cache -line) and a separate, continuous block of entries (at 12 bytes each -for a total of 5.333 entries per cache line). +Dictionaries are composed of 3 components: +The dictobject struct itself +A dict-keys object (keys & hashes) +A values array Tunable Dictionary Parameters ----------------------------- -* PyDict_MINSIZE. Currently set to 8. +* PyDict_MINSIZE. Currently set to 4 (to keep instance dicts small). Must be a power of two. New dicts have to zero-out every cell. - Each additional 8 consumes 1.5 cache lines. Increasing improves - the sparseness of small dictionaries but costs time to read in - the additional cache lines if they are not already in cache. + Increasing improves the sparseness of small dictionaries but costs time + to read in the additional cache lines if they are not already in cache. That case is common when keyword arguments are passed. * Maximum dictionary load in PyDict_SetItem. Currently set to 2/3. @@ -126,8 +120,8 @@ Also, every dictionary iterates at least twice, once for the memset() when it is created and once by dealloc(). -Dictionary operations involving only a single key can be O(1) unless -resizing is possible. By checking for a resize only when the +Dictionary operations involving only a single key can be O(1) unless +resizing is possible. By checking for a resize only when the dictionary can grow (and may *require* resizing), other operations remain O(1), and the odds of resize thrashing or memory fragmentation are reduced. In particular, an algorithm that empties a dictionary @@ -135,136 +129,21 @@ not be necessary at all because the dictionary is eventually discarded entirely. +The key differences between this implementation and earlier versions are: + 1. The table can be split into two parts, the keys and the values. -Results of Cache Locality Experiments -------------------------------------- + 2. There is an additional key-value combination: (key, NULL). + Unlike (, NULL) which represents a deleted value, (key, NULL) + represented a yet to be inserted value. This combination can only occur + when the table is split. -When an entry is retrieved from memory, 4.333 adjacent entries are also -retrieved into a cache line. Since accessing items in cache is *much* -cheaper than a cache miss, an enticing idea is to probe the adjacent -entries as a first step in collision resolution. Unfortunately, the -introduction of any regularity into collision searches results in more -collisions than the current random chaining approach. + 3. No small table embedded in the dict, + as this would make sharing of key-tables impossible. -Exploiting cache locality at the expense of additional collisions fails -to payoff when the entries are already loaded in cache (the expense -is paid with no compensating benefit). This occurs in small dictionaries -where the whole dictionary fits into a pair of cache lines. It also -occurs frequently in large dictionaries which have a common access pattern -where some keys are accessed much more frequently than others. The -more popular entries *and* their collision chains tend to remain in cache. -To exploit cache locality, change the collision resolution section -in lookdict() and lookdict_string(). Set i^=1 at the top of the -loop and move the i = (i << 2) + i + perturb + 1 to an unrolled -version of the loop. +These changes have the following consequences. + 1. General dictionaries are slightly larger. -This optimization strategy can be leveraged in several ways: + 2. All object dictionaries of a single class can share a single key-table, + saving about 60% memory for such cases. -* If the dictionary is kept sparse (through the tunable parameters), -then the occurrence of additional collisions is lessened. - -* If lookdict() and lookdict_string() are specialized for small dicts -and for largedicts, then the versions for large_dicts can be given -an alternate search strategy without increasing collisions in small dicts -which already have the maximum benefit of cache locality. - -* If the use case for a dictionary is known to have a random key -access pattern (as opposed to a more common pattern with a Zipf's law -distribution), then there will be more benefit for large dictionaries -because any given key is no more likely than another to already be -in cache. - -* In use cases with paired accesses to the same key, the second access -is always in cache and gets no benefit from efforts to further improve -cache locality. - -Optimizing the Search of Small Dictionaries -------------------------------------------- - -If lookdict() and lookdict_string() are specialized for smaller dictionaries, -then a custom search approach can be implemented that exploits the small -search space and cache locality. - -* The simplest example is a linear search of contiguous entries. This is - simple to implement, guaranteed to terminate rapidly, never searches - the same entry twice, and precludes the need to check for dummy entries. - -* A more advanced example is a self-organizing search so that the most - frequently accessed entries get probed first. The organization - adapts if the access pattern changes over time. Treaps are ideally - suited for self-organization with the most common entries at the - top of the heap and a rapid binary search pattern. Most probes and - results are all located at the top of the tree allowing them all to - be located in one or two cache lines. - -* Also, small dictionaries may be made more dense, perhaps filling all - eight cells to take the maximum advantage of two cache lines. - - -Strategy Pattern ----------------- - -Consider allowing the user to set the tunable parameters or to select a -particular search method. Since some dictionary use cases have known -sizes and access patterns, the user may be able to provide useful hints. - -1) For example, if membership testing or lookups dominate runtime and memory - is not at a premium, the user may benefit from setting the maximum load - ratio at 5% or 10% instead of the usual 66.7%. This will sharply - curtail the number of collisions but will increase iteration time. - The builtin namespace is a prime example of a dictionary that can - benefit from being highly sparse. - -2) Dictionary creation time can be shortened in cases where the ultimate - size of the dictionary is known in advance. The dictionary can be - pre-sized so that no resize operations are required during creation. - Not only does this save resizes, but the key insertion will go - more quickly because the first half of the keys will be inserted into - a more sparse environment than before. The preconditions for this - strategy arise whenever a dictionary is created from a key or item - sequence and the number of *unique* keys is known. - -3) If the key space is large and the access pattern is known to be random, - then search strategies exploiting cache locality can be fruitful. - The preconditions for this strategy arise in simulations and - numerical analysis. - -4) If the keys are fixed and the access pattern strongly favors some of - the keys, then the entries can be stored contiguously and accessed - with a linear search or treap. This exploits knowledge of the data, - cache locality, and a simplified search routine. It also eliminates - the need to test for dummy entries on each probe. The preconditions - for this strategy arise in symbol tables and in the builtin dictionary. - - -Readonly Dictionaries ---------------------- -Some dictionary use cases pass through a build stage and then move to a -more heavily exercised lookup stage with no further changes to the -dictionary. - -An idea that emerged on python-dev is to be able to convert a dictionary -to a read-only state. This can help prevent programming errors and also -provide knowledge that can be exploited for lookup optimization. - -The dictionary can be immediately rebuilt (eliminating dummy entries), -resized (to an appropriate level of sparseness), and the keys can be -jostled (to minimize collisions). The lookdict() routine can then -eliminate the test for dummy entries (saving about 1/4 of the time -spent in the collision resolution loop). - -An additional possibility is to insert links into the empty spaces -so that dictionary iteration can proceed in len(d) steps instead of -(mp->mask + 1) steps. Alternatively, a separate tuple of keys can be -kept just for iteration. - - -Caching Lookups ---------------- -The idea is to exploit key access patterns by anticipating future lookups -based on previous lookups. - -The simplest incarnation is to save the most recently accessed entry. -This gives optimal performance for use cases where every get is followed -by a set or del to the same key. diff -r 58bd6a58365d -r bc286099ce9a Objects/dictobject.c --- a/Objects/dictobject.c Wed Feb 08 04:09:37 2012 +0100 +++ b/Objects/dictobject.c Wed Feb 08 16:33:32 2012 +0000 @@ -10,7 +10,6 @@ #include "Python.h" #include "stringlib/eq.h" - /* Set a key error with the specified argument, wrapping it in a * tuple automatically so that tuple keys are not unpacked as the * exception arguments. */ @@ -25,10 +24,6 @@ Py_DECREF(tup); } -/* Define this out if you don't want conversion statistics on exit. */ -#undef SHOW_CONVERSION_COUNTS - -/* See large comment block below. This must be >= 1. */ #define PERTURB_SHIFT 5 /* @@ -126,8 +121,13 @@ */ -/* Object used as dummy key to fill deleted entries */ -static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */ +/* Object used as dummy key to fill deleted entries + * This could be any unique object, + * use a custom type in order to minimise coupling. +*/ +static PyObject _dummy_struct; + +#define dummy (&_dummy_struct) #ifdef Py_REF_DEBUG PyObject * @@ -138,77 +138,18 @@ #endif /* forward declarations */ -static PyDictEntry * -lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash); - -#ifdef SHOW_CONVERSION_COUNTS -static long created = 0L; -static long converted = 0L; - -static void -show_counts(void) -{ - fprintf(stderr, "created %ld string dicts\n", created); - fprintf(stderr, "converted %ld to normal dicts\n", converted); - fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created); -} -#endif - -/* Debug statistic to compare allocations with reuse through the free list */ -#undef SHOW_ALLOC_COUNT -#ifdef SHOW_ALLOC_COUNT -static size_t count_alloc = 0; -static size_t count_reuse = 0; - -static void -show_alloc(void) -{ - fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n", - count_alloc); - fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T - "d\n", count_reuse); - fprintf(stderr, "%.2f%% reuse rate\n\n", - (100.0*count_reuse/(count_alloc+count_reuse))); -} -#endif - -/* Debug statistic to count GC tracking of dicts */ -#ifdef SHOW_TRACK_COUNT -static Py_ssize_t count_untracked = 0; -static Py_ssize_t count_tracked = 0; - -static void -show_track(void) -{ - fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n", - count_tracked + count_untracked); - fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T - "d\n", count_tracked); - fprintf(stderr, "%.2f%% dict tracking rate\n\n", - (100.0*count_tracked/(count_untracked+count_tracked))); -} -#endif - - -/* Initialization macros. - There are two ways to create a dict: PyDict_New() is the main C API - function, and the tp_new slot maps to dict_new(). In the latter case we - can save a little time over what PyDict_New does because it's guaranteed - that the PyDictObject struct is already zeroed out. - Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have - an excellent reason not to). -*/ - -#define INIT_NONZERO_DICT_SLOTS(mp) do { \ - (mp)->ma_table = (mp)->ma_smalltable; \ - (mp)->ma_mask = PyDict_MINSIZE - 1; \ - } while(0) - -#define EMPTY_TO_MINSIZE(mp) do { \ - memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \ - (mp)->ma_used = (mp)->ma_fill = 0; \ - INIT_NONZERO_DICT_SLOTS(mp); \ - } while(0) +static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr); +static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr); +static PyDictKeyEntry * +lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr); +static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr); + +static int +dictresize(PyDictObject *mp, Py_ssize_t minused); /* Dictionary reuse scheme to save calls to malloc, free, and memset */ #ifndef PyDict_MAXFREELIST @@ -236,61 +177,144 @@ PyDict_ClearFreeList(); } -PyObject * -PyDict_New(void) +#define DK_INCREF(dk) (++(dk)->dk_refcnt) +#define DK_DECREF(dk) if ((--(dk)->dk_refcnt) == 0) free_keys_object(dk) +#define DK_SIZE(dk) ((dk)->dk_size) +#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0) + +/* USABLE_FRACTION must obey the following: + * USABLE_FRACTION(n) < n for n >= PyDict_MINSIZE + * USABLE_FRACTION(n) > 0 for n >= PyDict_MINSIZE + * 2n/3, (2n+1)/3 and n/2+n/4-n/8 (f(4)==3 otherwise f(n) == 5n/8) + * seem to work well in practice. + */ +/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for + * combined tables (the two fractions round to the same number n < ), + * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite + * a lot of space for small, split tables */ +#define USABLE_FRACTION(n) ((((n) << 1)+1)/3) + +/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make + * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10. + * 32 * 2/3 = 21, 32 * 5/8 = 20. + * Its advantage is that it is faster to compute on machines with slow division. + * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3)) +*/ + +#define ENSURE_DELETEABLE(d) \ + if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \ + (d)->ma_keys->dk_lookup = lookdict_unicode; \ + } + +static PyDictKeysObject *new_keys_object(Py_ssize_t size) { + PyDictKeysObject *dk; + Py_ssize_t i; + PyDictKeyEntry *ep0; + + assert(size >= PyDict_MINSIZE); + assert(IS_POWER_OF_2(size)); + dk = PyMem_Malloc(sizeof(PyDictKeysObject) + + sizeof(PyDictKeyEntry) * (size-1)); + if (dk == NULL) { + PyErr_NoMemory(); + return NULL; + } + dk->dk_refcnt = 1; + dk->dk_size = size; + dk->dk_free = USABLE_FRACTION(size); + ep0 = &dk->dk_entries[0]; + for (i = 0; i < size; i++) { + ep0[i].me_key = NULL; + ep0[i]._me_value = NULL; + } + dk->dk_lookup = lookdict_unicode_nodummy; + return dk; +} + +PyDictKeysObject * +_PyDict_NewKeysForClass(void) { - register PyDictObject *mp; - if (dummy == NULL) { /* Auto-initialize dummy */ - dummy = PyUnicode_FromString(""); - if (dummy == NULL) - return NULL; -#ifdef SHOW_CONVERSION_COUNTS - Py_AtExit(show_counts); -#endif -#ifdef SHOW_ALLOC_COUNT - Py_AtExit(show_alloc); -#endif -#ifdef SHOW_TRACK_COUNT - Py_AtExit(show_track); -#endif + PyDictKeysObject *keys = new_keys_object(4); + if (keys != NULL) + keys->dk_lookup = lookdict_split; + return keys; +} + +static void +free_keys_object(PyDictKeysObject *keys) +{ + PyDictKeyEntry *entries = &keys->dk_entries[0]; + Py_ssize_t i, n; + for (i = 0, n = DK_SIZE(keys); i < n; i++) { + Py_XDECREF(entries[i].me_key); + Py_XDECREF(entries[i]._me_value); } + PyMem_DEL(keys); +} + +/* Consumes a reference to the keys object */ +static PyObject * +new_dict(PyDictKeysObject *keys, PyObject **values) +{ + PyDictObject *mp; if (numfree) { mp = free_list[--numfree]; assert (mp != NULL); assert (Py_TYPE(mp) == &PyDict_Type); _Py_NewReference((PyObject *)mp); - if (mp->ma_fill) { - EMPTY_TO_MINSIZE(mp); - } else { - /* At least set ma_table and ma_mask; these are wrong - if an empty but presized dict is added to freelist */ - INIT_NONZERO_DICT_SLOTS(mp); - } - assert (mp->ma_used == 0); - assert (mp->ma_table == mp->ma_smalltable); - assert (mp->ma_mask == PyDict_MINSIZE - 1); -#ifdef SHOW_ALLOC_COUNT - count_reuse++; -#endif - } else { + } + else { mp = PyObject_GC_New(PyDictObject, &PyDict_Type); if (mp == NULL) return NULL; - EMPTY_TO_MINSIZE(mp); -#ifdef SHOW_ALLOC_COUNT - count_alloc++; -#endif + } - mp->ma_lookup = lookdict_unicode; -#ifdef SHOW_TRACK_COUNT - count_untracked++; -#endif -#ifdef SHOW_CONVERSION_COUNTS - ++created; -#endif + mp->ma_keys = keys; + mp->ma_values = (void *)values; + mp->ma_used = 0; return (PyObject *)mp; } +#define new_values(size) PyMem_New(PyObject *, size) + +#define free_values(values) PyMem_DEL(values) + +/* Consumes a reference to the keys object */ +static PyObject * +new_dict_with_shared_keys(PyDictKeysObject *keys) { + PyObject **values; + Py_ssize_t i, size; + + size = DK_SIZE(keys); + values = new_values(size); + if (values == NULL) { + return PyErr_NoMemory(); + } + for (i = 0; i < size; i++) { + values[i] = NULL; + } + return new_dict(keys, values); +} + +PyObject * +PyDict_New(void) { + return new_dict(new_keys_object(8), NULL); +} + +#define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys) + +PyObject * +PyDict_NewForInstance(PyTypeObject *tp) { + if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) { + DK_INCREF(CACHED_KEYS(tp)); + return new_dict_with_shared_keys(CACHED_KEYS(tp)); + } else { + return PyDict_New(); + } +} + + + /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -309,29 +333,29 @@ lookdict() is general-purpose, and may return NULL if (and only if) a comparison raises an exception (this was new in Python 2.5). lookdict_unicode() below is specialized to string keys, comparison of which can -never raise an exception; that function can never return NULL. For both, when -the key isn't found a PyDictEntry* is returned for which the me_value field is -NULL; this is the slot in the dict at which the key would have been found, and -the caller can (if it wishes) add the pair to the returned -PyDictEntry*. +never raise an exception; that function can never return NULL. +lookdict_unicode_nodummy is further specialized for string keys that cannot be +the value. */ -static PyDictEntry * -lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash) +static PyDictKeyEntry * +lookdict(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr) { register size_t i; register size_t perturb; - register PyDictEntry *freeslot; - register size_t mask = (size_t)mp->ma_mask; - PyDictEntry *ep0 = mp->ma_table; - register PyDictEntry *ep; + register PyDictKeyEntry *freeslot; + register size_t mask = (size_t)mp->ma_keys->dk_size-1; + PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; + register PyDictKeyEntry *ep; register int cmp; PyObject *startkey; i = (size_t)hash & mask; ep = &ep0[i]; - if (ep->me_key == NULL || ep->me_key == key) + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &ep->_me_value; return ep; - + } if (ep->me_key == dummy) freeslot = ep; else { @@ -342,9 +366,11 @@ Py_DECREF(startkey); if (cmp < 0) return NULL; - if (ep0 == mp->ma_table && ep->me_key == startkey) { - if (cmp > 0) + if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) { + if (cmp > 0) { + *value_addr = &ep->_me_value; return ep; + } } else { /* The compare did major nasty stuff to the @@ -352,7 +378,7 @@ * XXX A clever adversary could prevent this * XXX from terminating. */ - return lookdict(mp, key, hash); + return lookdict(mp, key, hash, value_addr); } } freeslot = NULL; @@ -363,20 +389,33 @@ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; - if (ep->me_key == NULL) - return freeslot == NULL ? ep : freeslot; - if (ep->me_key == key) + if (ep->me_key == NULL) { + if (freeslot == NULL) { + *value_addr = &ep->_me_value; + return ep; + } else { + *value_addr = &freeslot->_me_value; + return freeslot; + } + } + if (ep->me_key == key) { + *value_addr = &ep->_me_value; return ep; + } if (ep->me_hash == hash && ep->me_key != dummy) { startkey = ep->me_key; Py_INCREF(startkey); cmp = PyObject_RichCompareBool(startkey, key, Py_EQ); Py_DECREF(startkey); - if (cmp < 0) + if (cmp < 0) { + *value_addr = NULL; return NULL; - if (ep0 == mp->ma_table && ep->me_key == startkey) { - if (cmp > 0) + } + if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) { + if (cmp > 0) { + *value_addr = &ep->_me_value; return ep; + } } else { /* The compare did major nasty stuff to the @@ -384,7 +423,7 @@ * XXX A clever adversary could prevent this * XXX from terminating. */ - return lookdict(mp, key, hash); + return lookdict(mp, key, hash, value_addr); } } else if (ep->me_key == dummy && freeslot == NULL) @@ -394,46 +433,39 @@ return 0; } -/* - * Hacked up version of lookdict which can assume keys are always - * unicodes; this assumption allows testing for errors during - * PyObject_RichCompareBool() to be dropped; unicode-unicode - * comparisons never raise exceptions. This also means we don't need - * to go through PyObject_RichCompareBool(); we can always use - * unicode_eq() directly. - * - * This is valuable because dicts with only unicode keys are very common. - */ -static PyDictEntry * -lookdict_unicode(PyDictObject *mp, PyObject *key, register Py_hash_t hash) +/* Specialized version for string-only keys */ +static PyDictKeyEntry * +lookdict_unicode(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr) { register size_t i; register size_t perturb; - register PyDictEntry *freeslot; - register size_t mask = (size_t)mp->ma_mask; - PyDictEntry *ep0 = mp->ma_table; - register PyDictEntry *ep; + register PyDictKeyEntry *freeslot; + register size_t mask = (size_t)mp->ma_keys->dk_size-1; + PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; + register PyDictKeyEntry *ep; /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass unicodes is to override __eq__, and for speed we don't cater to that here. */ if (!PyUnicode_CheckExact(key)) { -#ifdef SHOW_CONVERSION_COUNTS - ++converted; -#endif - mp->ma_lookup = lookdict; - return lookdict(mp, key, hash); + mp->ma_keys->dk_lookup = lookdict; + return lookdict(mp, key, hash, value_addr); } i = (size_t)hash & mask; ep = &ep0[i]; - if (ep->me_key == NULL || ep->me_key == key) + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &ep->_me_value; return ep; + } if (ep->me_key == dummy) freeslot = ep; else { - if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) + if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) { + *value_addr = &ep->_me_value; return ep; + } freeslot = NULL; } @@ -442,13 +474,22 @@ for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; - if (ep->me_key == NULL) - return freeslot == NULL ? ep : freeslot; + if (ep->me_key == NULL) { + if (freeslot == NULL) { + *value_addr = &ep->_me_value; + return ep; + } else { + *value_addr = &freeslot->_me_value; + return freeslot; + } + } if (ep->me_key == key || (ep->me_hash == hash && ep->me_key != dummy - && unicode_eq(ep->me_key, key))) + && unicode_eq(ep->me_key, key))) { + *value_addr = &ep->_me_value; return ep; + } if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } @@ -456,6 +497,96 @@ return 0; } +/* Faster version of lookdict_unicode when it is known that no keys + * will be present. */ +static PyDictKeyEntry * +lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr) +{ + register size_t i; + register size_t perturb; + register size_t mask = (size_t)mp->ma_keys->dk_size-1; + PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; + register PyDictKeyEntry *ep; + + /* Make sure this function doesn't have to handle non-unicode keys, + including subclasses of str; e.g., one reason to subclass + unicodes is to override __eq__, and for speed we don't cater to + that here. */ + if (!PyUnicode_CheckExact(key)) { + mp->ma_keys->dk_lookup = lookdict; + return lookdict(mp, key, hash, value_addr); + } + i = (size_t)hash & mask; + ep = &ep0[i]; + assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &ep->_me_value; + return ep; + } + if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) { + *value_addr = &ep->_me_value; + return ep; + } + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &ep->_me_value; + return ep; + } + if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) { + *value_addr = &ep->_me_value; + return ep; + } + } + assert(0); /* NOT REACHED */ + return 0; +} + +/* Version of lookdict for split tables. */ +static PyDictKeyEntry * +lookdict_split(PyDictObject *mp, PyObject *key, + Py_hash_t hash, PyObject ***value_addr) +{ + register size_t i; + register size_t perturb; + register size_t mask = (size_t)mp->ma_keys->dk_size-1; + PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; + register PyDictKeyEntry *ep; + + if (!PyUnicode_CheckExact(key)) { + return lookdict(mp, key, hash, value_addr); + } + i = (size_t)hash & mask; + ep = &ep0[i]; + assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &mp->ma_values[i]; + return ep; + } + if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) { + *value_addr = &mp->ma_values[i]; + return ep; + } + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key)); + if (ep->me_key == NULL || ep->me_key == key) { + *value_addr = &mp->ma_values[i & mask]; + return ep; + } + if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) { + *value_addr = &mp->ma_values[i & mask]; + return ep; + } + } + assert(0); /* NOT REACHED */ + return 0; +} + int _PyDict_HasOnlyStringKeys(PyObject *dict) { @@ -463,7 +594,7 @@ PyObject *key, *value; assert(PyDict_Check(dict)); /* Shortcut */ - if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode) + if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict) return 1; while (PyDict_Next(dict, &pos, &key, &value)) if (!PyUnicode_Check(key)) @@ -471,15 +602,6 @@ return 1; } -#ifdef SHOW_TRACK_COUNT -#define INCREASE_TRACK_COUNT \ - (count_tracked++, count_untracked--); -#define DECREASE_TRACK_COUNT \ - (count_tracked--, count_untracked++); -#else -#define INCREASE_TRACK_COUNT -#define DECREASE_TRACK_COUNT -#endif #define MAINTAIN_TRACKING(mp, key, value) \ do { \ @@ -487,7 +609,6 @@ if (_PyObject_GC_MAY_BE_TRACKED(key) || \ _PyObject_GC_MAY_BE_TRACKED(value)) { \ _PyObject_GC_TRACK(mp); \ - INCREASE_TRACK_COUNT \ } \ } \ } while(0) @@ -497,26 +618,42 @@ { PyDictObject *mp; PyObject *value; - Py_ssize_t mask, i; - PyDictEntry *ep; + Py_ssize_t i, size; if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) return; mp = (PyDictObject *) op; - ep = mp->ma_table; - mask = mp->ma_mask; - for (i = 0; i <= mask; i++) { - if ((value = ep[i].me_value) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value) || - _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key)) - return; + size = DK_SIZE(mp->ma_keys); + if (_PyDict_HasSplitTable(mp)) { + for (i = 0; i < size; i++) { + if ((value = mp->ma_values[i]) == NULL) + continue; + if (_PyObject_GC_MAY_BE_TRACKED(value)) { + assert(!_PyObject_GC_MAY_BE_TRACKED(mp->ma_keys->dk_entries[i].me_key)); + return; + } + } + } else { + PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0]; + for (i = 0; i < size; i++) { + if ((value = ep0[i]._me_value) == NULL) + continue; + if (_PyObject_GC_MAY_BE_TRACKED(value) || + _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key)) + return; + } } - DECREASE_TRACK_COUNT _PyObject_GC_UNTRACK(op); } +static int +insertion_resize(PyDictObject *mp) +{ + if (_PyDict_HasSplitTable(mp) || mp->ma_used > 50000) + return dictresize(mp, mp->ma_used * 2); + return dictresize(mp, mp->ma_used * 4); +} /* Internal routine to insert a new item into the table. @@ -525,38 +662,63 @@ Returns -1 if an error occurred, or 0 on success. */ static int -insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) +insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) { PyObject *old_value; - register PyDictEntry *ep; - typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t); - - assert(mp->ma_lookup != NULL); - ep = mp->ma_lookup(mp, key, hash); + PyObject **value_addr; + PyDictKeyEntry *ep; + assert(key != dummy); + + if (mp->ma_values != NULL) { + if (!PyUnicode_CheckExact(key)) + if (insertion_resize(mp) < 0) + return -1; + } + + ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr); if (ep == NULL) { Py_DECREF(key); Py_DECREF(value); return -1; } MAINTAIN_TRACKING(mp, key, value); - if (ep->me_value != NULL) { - old_value = ep->me_value; - ep->me_value = value; + old_value = *value_addr; + if (old_value != NULL) { + assert(ep->me_key != NULL && ep->me_key != dummy); + *value_addr = value; Py_DECREF(old_value); /* which **CAN** re-enter */ Py_DECREF(key); + } else { + if (ep->me_key == NULL) { + if (mp->ma_keys->dk_free <= 0) { + /* Need to resize. */ + if (insertion_resize(mp) < 0) + return -1; + ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr); + if (ep == NULL) { + Py_DECREF(key); + Py_DECREF(value); + return -1; + } + } + mp->ma_keys->dk_free--; + assert(mp->ma_keys->dk_free >= 0); + ep->me_key = key; + ep->me_hash = hash; + } else { + if (ep->me_key == dummy) { + ep->me_key = key; + ep->me_hash = hash; + Py_DECREF(dummy); + } else { + Py_DECREF(key); + assert(_PyDict_HasSplitTable(mp)); + } + } + mp->ma_used++; + *value_addr = value; } - else { - if (ep->me_key == NULL) - mp->ma_fill++; - else { - assert(ep->me_key == dummy); - Py_DECREF(dummy); - } - ep->me_key = key; - ep->me_hash = hash; - ep->me_value = value; - mp->ma_used++; - } + assert(ep->me_key != NULL && ep->me_key != dummy); return 0; } @@ -567,30 +729,39 @@ using insertdict() in dictresize() is dangerous (SF bug #1456209). Note that no refcounts are changed by this routine; if needed, the caller is responsible for incref'ing `key` and `value`. +Neither mp->ma_used nor k->dk_free are modified by this routine; the caller +must set them correctly */ static void -insertdict_clean(register PyDictObject *mp, PyObject *key, Py_hash_t hash, +insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value) { - register size_t i; - register size_t perturb; - register size_t mask = (size_t)mp->ma_mask; - PyDictEntry *ep0 = mp->ma_table; - register PyDictEntry *ep; - + size_t i; + size_t perturb; + PyDictKeysObject *k = mp->ma_keys; + size_t mask = (size_t)DK_SIZE(k)-1; + PyDictKeyEntry *ep0 = &k->dk_entries[0]; + PyDictKeyEntry *ep; + + assert(k->dk_lookup != NULL); + assert(value != NULL); + assert(key != NULL); + assert(key != dummy); + if (!PyUnicode_CheckExact(key)) { + assert(mp->ma_values == NULL); + k->dk_lookup = lookdict; + } MAINTAIN_TRACKING(mp, key, value); - i = (size_t)hash & mask; + i = hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; } - assert(ep->me_value == NULL); - mp->ma_fill++; + assert(ep->_me_value == NULL); ep->me_key = key; ep->me_hash = hash; - ep->me_value = value; - mp->ma_used++; + ep->_me_value = value; } /* @@ -602,14 +773,11 @@ dictresize(PyDictObject *mp, Py_ssize_t minused) { Py_ssize_t newsize; - PyDictEntry *oldtable, *newtable, *ep; - Py_ssize_t i; - int is_oldtable_malloced; - PyDictEntry small_copy[PyDict_MINSIZE]; - - assert(minused >= 0); - - /* Find the smallest table size > minused. */ + PyDictKeysObject *oldkeys; + PyObject **oldvalues; + Py_ssize_t i, size; + +/* Find the smallest table size > minused. */ for (newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) @@ -618,72 +786,92 @@ PyErr_NoMemory(); return -1; } - - /* Get space for a new table. */ - oldtable = mp->ma_table; - assert(oldtable != NULL); - is_oldtable_malloced = oldtable != mp->ma_smalltable; - - if (newsize == PyDict_MINSIZE) { - /* A large table is shrinking, or we can't get any smaller. */ - newtable = mp->ma_smalltable; - if (newtable == oldtable) { - if (mp->ma_fill == mp->ma_used) { - /* No dummies, so no point doing anything. */ - return 0; + oldkeys = mp->ma_keys; + oldvalues = mp->ma_values; + /* Logic below assumes we can transfer refcount to new keys + * and that value is stored in _me_value. + * Increment ref-counts and copy values here to compensate + * This (resizing a split table) should be relatively rare */ + size = DK_SIZE(oldkeys); + if (oldvalues != NULL) { + for (i = 0; i < size; i++) + if (oldvalues[i] != NULL) { + Py_INCREF(oldkeys->dk_entries[i].me_key); + oldkeys->dk_entries[i]._me_value = oldvalues[i]; } - /* We're not going to resize it, but rebuild the - table anyway to purge old dummy entries. - Subtle: This is *necessary* if fill==size, - as lookdict needs at least one virgin slot to - terminate failing searches. If fill < size, it's - merely desirable, as dummies slow searches. */ - assert(mp->ma_fill > mp->ma_used); - memcpy(small_copy, oldtable, sizeof(small_copy)); - oldtable = small_copy; + } + /* Allocate a new table. */ + mp->ma_keys = new_keys_object(newsize); + if (mp->ma_keys == NULL) { + mp->ma_keys = oldkeys; + return -1; + } + mp->ma_values = NULL; + for (i = 0; i < size; i++) { + PyDictKeyEntry *ep = &oldkeys->dk_entries[i]; + if (ep->_me_value != NULL) { + assert(ep->me_key != dummy); + insertdict_clean(mp, ep->me_key, ep->me_hash, ep->_me_value); } } + mp->ma_keys->dk_free -= mp->ma_used; + if (oldvalues != NULL) { + for (i = 0; i < size; i++) + oldkeys->dk_entries[i]._me_value = NULL; + free_values(oldvalues); + DK_DECREF(oldkeys); + } else { - newtable = PyMem_NEW(PyDictEntry, newsize); - if (newtable == NULL) { - PyErr_NoMemory(); - return -1; + assert(oldkeys->dk_lookup != lookdict_split); + if (oldkeys->dk_lookup != lookdict_unicode_nodummy) { + PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0]; + for (i = 0; i < size; i++) { + if (ep0[i].me_key == dummy) + Py_DECREF(dummy); + } } + assert(oldkeys->dk_refcnt == 1); + PyMem_DEL(oldkeys); } - - /* Make the dict empty, using the new table. */ - assert(newtable != oldtable); - mp->ma_table = newtable; - mp->ma_mask = newsize - 1; - memset(newtable, 0, sizeof(PyDictEntry) * newsize); - mp->ma_used = 0; - i = mp->ma_fill; - mp->ma_fill = 0; - - /* Copy the data over; this is refcount-neutral for active entries; - dummy entries aren't copied over, of course */ - for (ep = oldtable; i > 0; ep++) { - if (ep->me_value != NULL) { /* active entry */ - --i; - insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value); - } - else if (ep->me_key != NULL) { /* dummy entry */ - --i; - assert(ep->me_key == dummy); - Py_DECREF(ep->me_key); - } - /* else key == value == NULL: nothing to do */ - } - - if (is_oldtable_malloced) - PyMem_DEL(oldtable); return 0; } -/* Create a new dictionary pre-sized to hold an estimated number of elements. - Underestimates are okay because the dictionary will resize as necessary. - Overestimates just mean the dictionary will be more sparse than usual. -*/ +static PyDictKeysObject * +make_keys_shared(PyObject *op) { + Py_ssize_t i; + Py_ssize_t size; + PyDictObject *mp = (PyDictObject *)op; + + assert(PyDict_CheckExact(op)); + mp = (PyDictObject *)op; + if (!_PyDict_HasSplitTable(mp)) { + PyDictKeyEntry *ep0; + PyObject **values; + assert(mp->ma_keys->dk_refcnt == 1); + if (mp->ma_keys->dk_lookup == lookdict) { + return NULL; + } else if (mp->ma_keys->dk_lookup == lookdict_unicode) { + /* Remove dummy keys */ + if (dictresize(mp, DK_SIZE(mp->ma_keys))) + return NULL; + } + assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy); + /* Copy values into a new array */ + ep0 = &mp->ma_keys->dk_entries[0]; + values = new_values(DK_SIZE(mp->ma_keys)); + if (values == NULL) + return NULL; + size = DK_SIZE(mp->ma_keys); + for (i = 0; i < size; i++) { + values[i] = ep0[i]._me_value; + ep0[i]._me_value = NULL; + } + mp->ma_keys->dk_lookup = lookdict_split; + mp->ma_values = values; + } + DK_INCREF(mp->ma_keys); + return mp->ma_keys; +} PyObject * _PyDict_NewPresized(Py_ssize_t minused) @@ -712,8 +900,10 @@ { Py_hash_t hash; PyDictObject *mp = (PyDictObject *)op; - PyDictEntry *ep; + PyDictKeyEntry *ep; PyThreadState *tstate; + PyObject **value_addr; + if (!PyDict_Check(op)) return NULL; if (!PyUnicode_CheckExact(key) || @@ -737,20 +927,20 @@ /* preserve the existing exception */ PyObject *err_type, *err_value, *err_tb; PyErr_Fetch(&err_type, &err_value, &err_tb); - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); /* ignore errors */ PyErr_Restore(err_type, err_value, err_tb); if (ep == NULL) return NULL; } else { - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) { PyErr_Clear(); return NULL; } } - return ep->me_value; + return *value_addr; } /* Variant of PyDict_GetItem() that doesn't suppress exceptions. @@ -762,7 +952,8 @@ { Py_hash_t hash; PyDictObject*mp = (PyDictObject *)op; - PyDictEntry *ep; + PyDictKeyEntry *ep; + PyObject **value_addr; if (!PyDict_Check(op)) { PyErr_BadInternalCall(); @@ -777,10 +968,10 @@ } } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - return ep->me_value; + return *value_addr; } /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the @@ -790,12 +981,10 @@ * remove them. */ int -PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value) +PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value) { - register PyDictObject *mp; - register Py_hash_t hash; - register Py_ssize_t n_used; - + PyDictObject *mp; + Py_hash_t hash; if (!PyDict_Check(op)) { PyErr_BadInternalCall(); return -1; @@ -810,38 +999,21 @@ if (hash == -1) return -1; } - assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */ - n_used = mp->ma_used; Py_INCREF(value); Py_INCREF(key); - if (insertdict(mp, key, hash, value) != 0) - return -1; - /* If we added a key, we can safely resize. Otherwise just return! - * If fill >= 2/3 size, adjust size. Normally, this doubles or - * quaduples the size, but it's also possible for the dict to shrink - * (if ma_fill is much larger than ma_used, meaning a lot of dict - * keys have been * deleted). - * - * Quadrupling the size improves average dictionary sparseness - * (reducing collisions) at the cost of some memory and iteration - * speed (which loops over every possible entry). It also halves - * the number of expensive resize operations in a growing dictionary. - * - * Very large dictionaries (over 50K items) use doubling instead. - * This may help applications with severe memory constraints. - */ - if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2)) - return 0; - return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used); + + /* insertdict() handles any resizing that might be necessary */ + return insertdict(mp, key, hash, value); } int PyDict_DelItem(PyObject *op, PyObject *key) { - register PyDictObject *mp; - register Py_hash_t hash; - register PyDictEntry *ep; - PyObject *old_value, *old_key; + PyDictObject *mp; + Py_hash_t hash; + PyDictKeyEntry *ep; + PyObject *old_key, *old_value; + PyObject **value_addr; if (!PyDict_Check(op)) { PyErr_BadInternalCall(); @@ -855,91 +1027,97 @@ return -1; } mp = (PyDictObject *)op; - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return -1; - if (ep->me_value == NULL) { + if (*value_addr == NULL) { set_key_error(key); return -1; } - old_key = ep->me_key; - Py_INCREF(dummy); - ep->me_key = dummy; - old_value = ep->me_value; - ep->me_value = NULL; + old_value = *value_addr; + *value_addr = NULL; mp->ma_used--; + if (!_PyDict_HasSplitTable(mp)) { + ENSURE_DELETEABLE(mp); + old_key = ep->me_key; + Py_INCREF(dummy); + ep->me_key = dummy; + Py_DECREF(old_key); + } Py_DECREF(old_value); - Py_DECREF(old_key); return 0; } +/* This immutable, empty PyDictKeysObject is used for PyDict_Clear() + * (which cannot fail and thus can do no allocation). + */ +static PyDictKeysObject empty_keys_struct = { + 2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */ + 1, /* dk_size */ + lookdict_unicode_nodummy, /* dk_lookup */ + 0, /* dk_free (immutable) */ + { + { 0, 0, 0 } /* dk_entries (empty) */ + } +}; + +#define Py_EMPTY_KEYS &empty_keys_struct + void PyDict_Clear(PyObject *op) { PyDictObject *mp; - PyDictEntry *ep, *table; - int table_is_malloced; - Py_ssize_t fill; - PyDictEntry small_copy[PyDict_MINSIZE]; -#ifdef Py_DEBUG + PyDictKeysObject *oldkeys; + PyObject **oldvalues; Py_ssize_t i, n; -#endif - + + mp = ((PyDictObject *)op); if (!PyDict_Check(op)) return; - mp = (PyDictObject *)op; -#ifdef Py_DEBUG - n = mp->ma_mask + 1; - i = 0; -#endif - - table = mp->ma_table; - assert(table != NULL); - table_is_malloced = table != mp->ma_smalltable; - - /* This is delicate. During the process of clearing the dict, - * decrefs can cause the dict to mutate. To avoid fatal confusion - * (voice of experience), we have to make the dict empty before - * clearing the slots, and never refer to anything via mp->xxx while - * clearing. - */ - fill = mp->ma_fill; - if (table_is_malloced) - EMPTY_TO_MINSIZE(mp); - - else if (fill > 0) { - /* It's a small table with something that needs to be cleared. - * Afraid the only safe way is to copy the dict entries into - * another small table first. - */ - memcpy(small_copy, table, sizeof(small_copy)); - table = small_copy; - EMPTY_TO_MINSIZE(mp); + oldkeys = mp->ma_keys; + oldvalues = mp->ma_values; + /* Empty the dict... */ + mp->ma_keys = Py_EMPTY_KEYS; + mp->ma_used = 0; + DK_INCREF(Py_EMPTY_KEYS); + mp->ma_values = NULL; + /* ...then clear the keys and values */ + n = DK_SIZE(oldkeys); + if (oldvalues != NULL) + for (i = 0; i < n; i++) + Py_CLEAR(oldvalues[i]); + if (n == 8 && oldvalues == NULL) { + /* Reuse table */ + Py_ssize_t i, n; + assert(oldkeys->dk_refcnt == 1); + for (i = 0, n = DK_SIZE(oldkeys); i < n; i++) { + Py_CLEAR(oldkeys->dk_entries[i].me_key); + Py_CLEAR(oldkeys->dk_entries[i]._me_value); + } + oldkeys->dk_free = USABLE_FRACTION(oldkeys->dk_size); + oldkeys->dk_lookup = lookdict_unicode_nodummy; + /* Check that dict is still empty */ + if (mp->ma_keys == Py_EMPTY_KEYS) { + mp->ma_keys = oldkeys; + mp->ma_values = NULL; + assert(mp->ma_used == 0); + } else { + free_keys_object(oldkeys); + } } - /* else it's a small table that's already empty */ - - /* Now we can finally clear things. If C had refcounts, we could - * assert that the refcount on table is 1 now, i.e. that this function - * has unique access to it, so decref side-effects can't alter it. - */ - for (ep = table; fill > 0; ++ep) { -#ifdef Py_DEBUG - assert(i < n); - ++i; -#endif - if (ep->me_key) { - --fill; - Py_DECREF(ep->me_key); - Py_XDECREF(ep->me_value); + else { + if (oldvalues != NULL) + free_values(oldvalues); + DK_DECREF(oldkeys); + if (mp->ma_keys == Py_EMPTY_KEYS) { + mp->ma_keys = new_keys_object(8); + if (mp->ma_keys == NULL) { + mp->ma_keys = Py_EMPTY_KEYS; + } + else + DK_DECREF(Py_EMPTY_KEYS); } -#ifdef Py_DEBUG - else - assert(ep->me_value == NULL); -#endif } - - if (table_is_malloced) - PyMem_DEL(table); } /* @@ -960,26 +1138,38 @@ int PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) { - register Py_ssize_t i; - register Py_ssize_t mask; - register PyDictEntry *ep; + Py_ssize_t i; + Py_ssize_t mask, offset; + PyDictObject *mp; + PyObject **value_ptr; + if (!PyDict_Check(op)) return 0; + mp = (PyDictObject *)op; i = *ppos; if (i < 0) return 0; - ep = ((PyDictObject *)op)->ma_table; - mask = ((PyDictObject *)op)->ma_mask; - while (i <= mask && ep[i].me_value == NULL) + if (mp->ma_values) { + value_ptr = &mp->ma_values[i]; + offset = sizeof(PyObject *); + } + else { + value_ptr = &mp->ma_keys->dk_entries[i]._me_value; + offset = sizeof(PyDictKeyEntry); + } + mask = DK_SIZE(mp->ma_keys)-1; + while (i <= mask && *value_ptr == NULL) { + value_ptr = (PyObject **)(((char *)value_ptr) + offset); i++; + } *ppos = i+1; if (i > mask) return 0; if (pkey) - *pkey = ep[i].me_key; + *pkey = mp->ma_keys->dk_entries[i].me_key; if (pvalue) - *pvalue = ep[i].me_value; + *pvalue = *value_ptr; return 1; } @@ -987,48 +1177,61 @@ int _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, Py_hash_t *phash) { - register Py_ssize_t i; - register Py_ssize_t mask; - register PyDictEntry *ep; + Py_ssize_t i; + Py_ssize_t mask, offset; + PyDictObject *mp; + PyObject **value_ptr; if (!PyDict_Check(op)) return 0; + mp = (PyDictObject *)op; i = *ppos; if (i < 0) return 0; - ep = ((PyDictObject *)op)->ma_table; - mask = ((PyDictObject *)op)->ma_mask; - while (i <= mask && ep[i].me_value == NULL) + if (mp->ma_values) { + value_ptr = &mp->ma_values[i]; + offset = sizeof(PyObject *); + } + else { + value_ptr = &mp->ma_keys->dk_entries[i]._me_value; + offset = sizeof(PyDictKeyEntry); + } + mask = DK_SIZE(mp->ma_keys)-1; + while (i <= mask && *value_ptr == NULL) { + value_ptr = (PyObject **)(((char *)value_ptr) + offset); i++; + } *ppos = i+1; if (i > mask) return 0; - *phash = ep[i].me_hash; + *phash = mp->ma_keys->dk_entries[i].me_hash; if (pkey) - *pkey = ep[i].me_key; + *pkey = mp->ma_keys->dk_entries[i].me_key; if (pvalue) - *pvalue = ep[i].me_value; + *pvalue = *value_ptr; return 1; } /* Methods */ static void -dict_dealloc(register PyDictObject *mp) +dict_dealloc(PyDictObject *mp) { - register PyDictEntry *ep; - Py_ssize_t fill = mp->ma_fill; + PyObject **values = mp->ma_values; + PyDictKeysObject *keys = mp->ma_keys; + Py_ssize_t i, n; PyObject_GC_UnTrack(mp); Py_TRASHCAN_SAFE_BEGIN(mp) - for (ep = mp->ma_table; fill > 0; ep++) { - if (ep->me_key) { - --fill; - Py_DECREF(ep->me_key); - Py_XDECREF(ep->me_value); + if (values != NULL) { + for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) { + Py_XDECREF(values[i]); } + free_values(values); + DK_DECREF(keys); } - if (mp->ma_table != mp->ma_smalltable) - PyMem_DEL(mp->ma_table); + else { + free_keys_object(keys); + } if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type) free_list[numfree++] = mp; else @@ -1036,6 +1239,7 @@ Py_TRASHCAN_SAFE_END(mp) } + static PyObject * dict_repr(PyDictObject *mp) { @@ -1068,10 +1272,12 @@ while (PyDict_Next((PyObject *)mp, &i, &key, &value)) { int status; /* Prevent repr from deleting value during key format. */ + Py_INCREF(key); Py_INCREF(value); s = PyObject_Repr(key); PyUnicode_Append(&s, colon); PyUnicode_AppendAndDel(&s, PyObject_Repr(value)); + Py_DECREF(key); Py_DECREF(value); if (s == NULL) goto Done; @@ -1126,18 +1332,19 @@ { PyObject *v; Py_hash_t hash; - PyDictEntry *ep; - assert(mp->ma_table != NULL); + PyDictKeyEntry *ep; + PyObject **value_addr; + if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - v = ep->me_value; + v = *value_addr; if (v == NULL) { if (!PyDict_CheckExact(mp)) { /* Look up __missing__ method if we're a subclass. */ @@ -1181,8 +1388,9 @@ { register PyObject *v; register Py_ssize_t i, j; - PyDictEntry *ep; - Py_ssize_t mask, n; + PyDictKeyEntry *ep; + Py_ssize_t size, n, offset; + PyObject **value_ptr; again: n = mp->ma_used; @@ -1196,15 +1404,24 @@ Py_DECREF(v); goto again; } - ep = mp->ma_table; - mask = mp->ma_mask; - for (i = 0, j = 0; i <= mask; i++) { - if (ep[i].me_value != NULL) { + ep = &mp->ma_keys->dk_entries[0]; + size = DK_SIZE(mp->ma_keys); + if (mp->ma_values) { + value_ptr = mp->ma_values; + offset = sizeof(PyObject *); + } + else { + value_ptr = &ep[0]._me_value; + offset = sizeof(PyDictKeyEntry); + } + for (i = 0, j = 0; i < size; i++) { + if (*value_ptr != NULL) { PyObject *key = ep[i].me_key; Py_INCREF(key); PyList_SET_ITEM(v, j, key); j++; } + value_ptr = (PyObject **)(((char *)value_ptr) + offset); } assert(j == n); return v; @@ -1215,8 +1432,8 @@ { register PyObject *v; register Py_ssize_t i, j; - PyDictEntry *ep; - Py_ssize_t mask, n; + Py_ssize_t size, n, offset; + PyObject **value_ptr; again: n = mp->ma_used; @@ -1230,11 +1447,19 @@ Py_DECREF(v); goto again; } - ep = mp->ma_table; - mask = mp->ma_mask; - for (i = 0, j = 0; i <= mask; i++) { - if (ep[i].me_value != NULL) { - PyObject *value = ep[i].me_value; + size = DK_SIZE(mp->ma_keys); + if (mp->ma_values) { + value_ptr = mp->ma_values; + offset = sizeof(PyObject *); + } + else { + value_ptr = &mp->ma_keys->dk_entries[0]._me_value; + offset = sizeof(PyDictKeyEntry); + } + for (i = 0, j = 0; i < size; i++) { + PyObject *value = *value_ptr; + value_ptr = (PyObject **)(((char *)value_ptr) + offset); + if (value != NULL) { Py_INCREF(value); PyList_SET_ITEM(v, j, value); j++; @@ -1249,9 +1474,10 @@ { register PyObject *v; register Py_ssize_t i, j, n; - Py_ssize_t mask; - PyObject *item, *key, *value; - PyDictEntry *ep; + Py_ssize_t size, offset; + PyObject *item, *key; + PyDictKeyEntry *ep; + PyObject **value_ptr; /* Preallocate the list of tuples, to avoid allocations during * the loop over the items, which could trigger GC, which @@ -1278,10 +1504,20 @@ goto again; } /* Nothing we do below makes any function calls. */ - ep = mp->ma_table; - mask = mp->ma_mask; - for (i = 0, j = 0; i <= mask; i++) { - if ((value=ep[i].me_value) != NULL) { + ep = mp->ma_keys->dk_entries; + size = DK_SIZE(mp->ma_keys); + if (mp->ma_values) { + value_ptr = mp->ma_values; + offset = sizeof(PyObject *); + } + else { + value_ptr = &ep[0]._me_value; + offset = sizeof(PyDictKeyEntry); + } + for (i = 0, j = 0; i < size; i++) { + PyObject *value = *value_ptr; + value_ptr = (PyObject **)(((char *)value_ptr) + offset); + if (value != NULL) { key = ep[i].me_key; item = PyList_GET_ITEM(v, j); Py_INCREF(key); @@ -1323,7 +1559,6 @@ Py_DECREF(d); return NULL; } - while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) { Py_INCREF(key); Py_INCREF(value); @@ -1341,7 +1576,7 @@ PyObject *key; Py_hash_t hash; - if (dictresize(mp, PySet_GET_SIZE(seq))) { + if (dictresize(mp, PySet_GET_SIZE(seq))){ Py_DECREF(d); return NULL; } @@ -1349,7 +1584,7 @@ while (_PySet_NextEntry(seq, &pos, &key, &hash)) { Py_INCREF(key); Py_INCREF(value); - if (insertdict(mp, key, hash, value)) { + if (insertdict(mp, key, hash, value)){ Py_DECREF(d); return NULL; } @@ -1513,8 +1748,8 @@ PyDict_Merge(PyObject *a, PyObject *b, int override) { register PyDictObject *mp, *other; - register Py_ssize_t i; - PyDictEntry *entry; + register Py_ssize_t i, n; + PyDictKeyEntry *entry; /* We accept for the argument either a concrete dictionary object, * or an abstract "mapping" object. For the former, we can do @@ -1541,20 +1776,25 @@ * incrementally resizing as we insert new items. Expect * that there will be no (or few) overlapping keys. */ - if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) { - if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0) + if (mp->ma_keys->dk_free * 3 < other->ma_used * 2) + if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0) return -1; - } - for (i = 0; i <= other->ma_mask; i++) { - entry = &other->ma_table[i]; - if (entry->me_value != NULL && + for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) { + PyObject *value; + entry = &other->ma_keys->dk_entries[i]; + if (other->ma_values) + value = other->ma_values[i]; + else + value = entry->_me_value; + + if (value != NULL && (override || PyDict_GetItem(a, entry->me_key) == NULL)) { Py_INCREF(entry->me_key); - Py_INCREF(entry->me_value); + Py_INCREF(value); if (insertdict(mp, entry->me_key, entry->me_hash, - entry->me_value) != 0) + value) != 0) return -1; } } @@ -1616,11 +1856,35 @@ PyDict_Copy(PyObject *o) { PyObject *copy; + PyDictObject *mp; + Py_ssize_t i, n; if (o == NULL || !PyDict_Check(o)) { PyErr_BadInternalCall(); return NULL; } + mp = (PyDictObject *)o; + if (_PyDict_HasSplitTable(mp)) { + PyDictObject *split_copy; + PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys)); + if (newvalues == NULL) + return PyErr_NoMemory(); + split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type); + if (split_copy == NULL) { + PyMem_DEL(newvalues); + return NULL; + } + split_copy->ma_values = newvalues; + split_copy->ma_keys = mp->ma_keys; + split_copy->ma_used = mp->ma_used; + DK_INCREF(mp->ma_keys); + for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) { + PyObject *value = mp->ma_values[i]; + Py_XINCREF(value); + split_copy->ma_values[i] = value; + } + return (PyObject *)split_copy; + } copy = PyDict_New(); if (copy == NULL) return NULL; @@ -1682,14 +1946,18 @@ if (a->ma_used != b->ma_used) /* can't be equal if # of entries differ */ return 0; - /* Same # of entries -- check all of 'em. Exit early on any diff. */ - for (i = 0; i <= a->ma_mask; i++) { - PyObject *aval = a->ma_table[i].me_value; + for (i = 0; i < DK_SIZE(a->ma_keys); i++) { + PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i]; + PyObject *aval; + if (a->ma_values) + aval = a->ma_values[i]; + else + aval = ep->_me_value; if (aval != NULL) { int cmp; PyObject *bval; - PyObject *key = a->ma_table[i].me_key; + PyObject *key = ep->me_key; /* temporarily bump aval's refcount to ensure it stays alive until we're done with it */ Py_INCREF(aval); @@ -1710,7 +1978,7 @@ } } return 1; - } +} static PyObject * dict_richcompare(PyObject *v, PyObject *w, int op) @@ -1731,13 +1999,14 @@ res = Py_NotImplemented; Py_INCREF(res); return res; - } +} static PyObject * dict_contains(register PyDictObject *mp, PyObject *key) { Py_hash_t hash; - PyDictEntry *ep; + PyDictKeyEntry *ep; + PyObject **value_addr; if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -1745,10 +2014,10 @@ if (hash == -1) return NULL; } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - return PyBool_FromLong(ep->me_value != NULL); + return PyBool_FromLong(*value_addr != NULL); } static PyObject * @@ -1758,7 +2027,8 @@ PyObject *failobj = Py_None; PyObject *val = NULL; Py_hash_t hash; - PyDictEntry *ep; + PyDictKeyEntry *ep; + PyObject **value_addr; if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) return NULL; @@ -1769,17 +2039,16 @@ if (hash == -1) return NULL; } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - val = ep->me_value; + val = *value_addr; if (val == NULL) val = failobj; Py_INCREF(val); return val; } - static PyObject * dict_setdefault(register PyDictObject *mp, PyObject *args) { @@ -1787,7 +2056,8 @@ PyObject *failobj = Py_None; PyObject *val = NULL; Py_hash_t hash; - PyDictEntry *ep; + PyDictKeyEntry *ep; + PyObject **value_addr; if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj)) return NULL; @@ -1798,14 +2068,17 @@ if (hash == -1) return NULL; } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - val = ep->me_value; + val = *value_addr; if (val == NULL) { - val = failobj; - if (PyDict_SetItem((PyObject*)mp, key, failobj)) + Py_INCREF(failobj); + Py_INCREF(key); + if (insertdict(mp, key, hash, failobj)) val = NULL; + else + val = failobj; } Py_XINCREF(val); return val; @@ -1823,9 +2096,10 @@ dict_pop(PyDictObject *mp, PyObject *args) { Py_hash_t hash; - PyDictEntry *ep; PyObject *old_value, *old_key; PyObject *key, *deflt = NULL; + PyDictKeyEntry *ep; + PyObject **value_addr; if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt)) return NULL; @@ -1833,8 +2107,7 @@ if (deflt) { Py_INCREF(deflt); return deflt; - } - set_key_error(key); + } set_key_error(key); return NULL; } if (!PyUnicode_CheckExact(key) || @@ -1843,10 +2116,11 @@ if (hash == -1) return NULL; } - ep = (mp->ma_lookup)(mp, key, hash); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); if (ep == NULL) return NULL; - if (ep->me_value == NULL) { + old_value = *value_addr; + if (old_value == NULL) { if (deflt) { Py_INCREF(deflt); return deflt; @@ -1854,13 +2128,15 @@ set_key_error(key); return NULL; } - old_key = ep->me_key; - Py_INCREF(dummy); - ep->me_key = dummy; - old_value = ep->me_value; - ep->me_value = NULL; + *value_addr = NULL; mp->ma_used--; - Py_DECREF(old_key); + if (!_PyDict_HasSplitTable(mp)) { + ENSURE_DELETEABLE(mp); + old_key = ep->me_key; + Py_INCREF(dummy); + ep->me_key = dummy; + Py_DECREF(old_key); + } return old_value; } @@ -1868,9 +2144,10 @@ dict_popitem(PyDictObject *mp) { Py_hash_t i = 0; - PyDictEntry *ep; + PyDictKeyEntry *ep; PyObject *res; + /* Allocate the result tuple before checking the size. Believe it * or not, this allocation could trigger a garbage collection which * could empty the dict, so if we checked the size first and that @@ -1889,49 +2166,71 @@ "popitem(): dictionary is empty"); return NULL; } + /* Convert split table to combined table */ + if (mp->ma_keys->dk_lookup == lookdict_split) { + if (dictresize(mp, DK_SIZE(mp->ma_keys))) { + Py_DECREF(res); + return NULL; + } + } + ENSURE_DELETEABLE(mp); /* Set ep to "the first" dict entry with a value. We abuse the hash * field of slot 0 to hold a search finger: * If slot 0 has a value, use slot 0. * Else slot 0 is being used to hold a search finger, * and we use its hash value as the first index to look. */ - ep = &mp->ma_table[0]; - if (ep->me_value == NULL) { + ep = &mp->ma_keys->dk_entries[0]; + if (ep->_me_value == NULL) { + Py_ssize_t mask = DK_SIZE(mp->ma_keys)-1; i = ep->me_hash; /* The hash field may be a real hash value, or it may be a * legit search finger, or it may be a once-legit search * finger that's out of bounds now because it wrapped around * or the table shrunk -- simply make sure it's in bounds now. */ - if (i > mp->ma_mask || i < 1) + if (i > mask || i < 1) i = 1; /* skip slot 0 */ - while ((ep = &mp->ma_table[i])->me_value == NULL) { + while ((ep = &mp->ma_keys->dk_entries[i])->_me_value == NULL) { i++; - if (i > mp->ma_mask) + if (i > mask) i = 1; } } PyTuple_SET_ITEM(res, 0, ep->me_key); - PyTuple_SET_ITEM(res, 1, ep->me_value); + PyTuple_SET_ITEM(res, 1, ep->_me_value); Py_INCREF(dummy); ep->me_key = dummy; - ep->me_value = NULL; + ep->_me_value = NULL; mp->ma_used--; - assert(mp->ma_table[0].me_value == NULL); - mp->ma_table[0].me_hash = i + 1; /* next place to start */ + assert(mp->ma_keys->dk_entries[0].me_value == NULL); + mp->ma_keys->dk_entries[0].me_hash = i + 1; /* next place to start */ return res; } static int dict_traverse(PyObject *op, visitproc visit, void *arg) { - Py_ssize_t i = 0; - PyObject *pk; - PyObject *pv; - - while (PyDict_Next(op, &i, &pk, &pv)) { - Py_VISIT(pk); - Py_VISIT(pv); + Py_ssize_t i, n; + PyDictObject *mp = (PyDictObject *)op; + if (mp->ma_keys->dk_lookup == lookdict) { + for (i = 0; i < DK_SIZE(mp->ma_keys); i++) { + if (mp->ma_keys->dk_entries[i]._me_value != NULL) { + Py_VISIT(mp->ma_keys->dk_entries[i]._me_value); + Py_VISIT(mp->ma_keys->dk_entries[i].me_key); + } + } + } else { + if (mp->ma_values != NULL) { + for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) { + Py_VISIT(mp->ma_values[i]); + } + } + else { + for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) { + Py_VISIT(mp->ma_keys->dk_entries[i]._me_value); + } + } } return 0; } @@ -1948,12 +2247,22 @@ static PyObject * dict_sizeof(PyDictObject *mp) { - Py_ssize_t res; - + Py_ssize_t size; + double res, keys_size; + + size = DK_SIZE(mp->ma_keys); res = sizeof(PyDictObject); - if (mp->ma_table != mp->ma_smalltable) - res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry); - return PyLong_FromSsize_t(res); + if (mp->ma_values) + res += size * sizeof(PyObject*); + /* Count our share of the keys object -- with rounding errors. */ + keys_size = sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry); + /* If refcnt > 1, then one count is (probably) held by a type */ + /* XXX This is somewhat approximate :) */ + if (mp->ma_keys->dk_refcnt < 3) + res += keys_size; + else + res += keys_size / (mp->ma_keys->dk_refcnt - 1); + return PyFloat_FromDouble(res); } PyDoc_STRVAR(contains__doc__, @@ -2044,7 +2353,8 @@ { Py_hash_t hash; PyDictObject *mp = (PyDictObject *)op; - PyDictEntry *ep; + PyDictKeyEntry *ep; + PyObject **value_addr; if (!PyUnicode_CheckExact(key) || (hash = ((PyASCIIObject *) key)->hash) == -1) { @@ -2052,8 +2362,8 @@ if (hash == -1) return -1; } - ep = (mp->ma_lookup)(mp, key, hash); - return ep == NULL ? -1 : (ep->me_value != NULL); + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); + return (ep == NULL) ? -1 : (*value_addr != NULL); } /* Internal version of PyDict_Contains used when the hash value is already known */ @@ -2061,10 +2371,11 @@ _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash) { PyDictObject *mp = (PyDictObject *)op; - PyDictEntry *ep; - - ep = (mp->ma_lookup)(mp, key, hash); - return ep == NULL ? -1 : (ep->me_value != NULL); + PyDictKeyEntry *ep; + PyObject **value_addr; + + ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr); + return (ep == NULL) ? -1 : (*value_addr != NULL); } /* Hack to implement "key in dict" */ @@ -2081,6 +2392,7 @@ 0, /* sq_inplace_repeat */ }; + static PyObject * dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { @@ -2090,22 +2402,15 @@ self = type->tp_alloc(type, 0); if (self != NULL) { PyDictObject *d = (PyDictObject *)self; - /* It's guaranteed that tp->alloc zeroed out the struct. */ - assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0); - INIT_NONZERO_DICT_SLOTS(d); - d->ma_lookup = lookdict_unicode; + d->ma_keys = new_keys_object(8); + /* XXX - Should we raise a no-memory error? */ + if (d->ma_keys == NULL) { + DK_INCREF(Py_EMPTY_KEYS); + d->ma_keys = Py_EMPTY_KEYS; + } /* The object has been implicitly tracked by tp_alloc */ if (type == &PyDict_Type) _PyObject_GC_UNTRACK(d); -#ifdef SHOW_CONVERSION_COUNTS - ++created; -#endif -#ifdef SHOW_TRACK_COUNT - if (_PyObject_GC_IS_TRACKED(d)) - count_tracked++; - else - count_untracked++; -#endif } return self; } @@ -2278,6 +2583,7 @@ return PyLong_FromSize_t(len); } + PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it))."); @@ -2290,9 +2596,10 @@ static PyObject *dictiter_iternextkey(dictiterobject *di) { PyObject *key; - register Py_ssize_t i, mask; - register PyDictEntry *ep; + register Py_ssize_t i, mask, offset; + register PyDictKeysObject *k; PyDictObject *d = di->di_dict; + PyObject **value_ptr; if (d == NULL) return NULL; @@ -2308,15 +2615,25 @@ i = di->di_pos; if (i < 0) goto fail; - ep = d->ma_table; - mask = d->ma_mask; - while (i <= mask && ep[i].me_value == NULL) + k = d->ma_keys; + if (d->ma_values) { + value_ptr = &d->ma_values[i]; + offset = sizeof(PyObject *); + } + else { + value_ptr = &k->dk_entries[i]._me_value; + offset = sizeof(PyDictKeyEntry); + } + mask = DK_SIZE(k)-1; + while (i <= mask && *value_ptr == NULL) { + value_ptr = (PyObject **)(((char *)value_ptr) + offset); i++; + } di->di_pos = i+1; if (i > mask) goto fail; di->len--; - key = ep[i].me_key; + key = k->dk_entries[i].me_key; Py_INCREF(key); return key; @@ -2362,9 +2679,9 @@ static PyObject *dictiter_iternextvalue(dictiterobject *di) { PyObject *value; - register Py_ssize_t i, mask; - register PyDictEntry *ep; + register Py_ssize_t i, mask, offset; PyDictObject *d = di->di_dict; + PyObject **value_ptr; if (d == NULL) return NULL; @@ -2378,17 +2695,26 @@ } i = di->di_pos; - mask = d->ma_mask; + mask = DK_SIZE(d->ma_keys)-1; if (i < 0 || i > mask) goto fail; - ep = d->ma_table; - while ((value=ep[i].me_value) == NULL) { + if (d->ma_values) { + value_ptr = &d->ma_values[i]; + offset = sizeof(PyObject *); + } + else { + value_ptr = &d->ma_keys->dk_entries[i]._me_value; + offset = sizeof(PyDictKeyEntry); + } + while (i <= mask && *value_ptr == NULL) { + value_ptr = (PyObject **)(((char *)value_ptr) + offset); i++; if (i > mask) goto fail; } di->di_pos = i+1; di->len--; + value = *value_ptr; Py_INCREF(value); return value; @@ -2434,9 +2760,9 @@ static PyObject *dictiter_iternextitem(dictiterobject *di) { PyObject *key, *value, *result = di->di_result; - register Py_ssize_t i, mask; - register PyDictEntry *ep; + register Py_ssize_t i, mask, offset; PyDictObject *d = di->di_dict; + PyObject **value_ptr; if (d == NULL) return NULL; @@ -2452,10 +2778,19 @@ i = di->di_pos; if (i < 0) goto fail; - ep = d->ma_table; - mask = d->ma_mask; - while (i <= mask && ep[i].me_value == NULL) + mask = DK_SIZE(d->ma_keys)-1; + if (d->ma_values) { + value_ptr = &d->ma_values[i]; + offset = sizeof(PyObject *); + } + else { + value_ptr = &d->ma_keys->dk_entries[i]._me_value; + offset = sizeof(PyDictKeyEntry); + } + while (i <= mask && *value_ptr == NULL) { + value_ptr = (PyObject **)(((char *)value_ptr) + offset); i++; + } di->di_pos = i+1; if (i > mask) goto fail; @@ -2470,8 +2805,8 @@ return NULL; } di->len--; - key = ep[i].me_key; - value = ep[i].me_value; + key = d->ma_keys->dk_entries[i].me_key; + value = *value_ptr; Py_INCREF(key); Py_INCREF(value); PyTuple_SET_ITEM(result, 0, key); @@ -2585,6 +2920,7 @@ - if public then they should probably be in builtins */ + /* Return 1 if self is a subset of other, iterating over self; 0 if not; -1 if an error occurred. */ static int @@ -2611,6 +2947,7 @@ return ok; } + static PyObject * dictview_richcompare(PyObject *self, PyObject *other, int op) { @@ -2722,7 +3059,6 @@ PyObject *result = PySet_New(self); PyObject *tmp; _Py_IDENTIFIER(difference_update); - if (result == NULL) return NULL; @@ -2742,7 +3078,6 @@ PyObject *result = PySet_New(self); PyObject *tmp; _Py_IDENTIFIER(intersection_update); - if (result == NULL) return NULL; @@ -2762,7 +3097,6 @@ PyObject *result = PySet_New(self); PyObject *tmp; _Py_IDENTIFIER(update); - if (result == NULL) return NULL; @@ -2782,7 +3116,6 @@ PyObject *result = PySet_New(self); PyObject *tmp; _Py_IDENTIFIER(symmetric_difference_update); - if (result == NULL) return NULL; @@ -2915,6 +3248,7 @@ return dictview_new(dict, &PyDictKeys_Type); } + /*** dict_items ***/ static PyObject * @@ -3001,6 +3335,8 @@ return dictview_new(dict, &PyDictItems_Type); } + + /*** dict_values ***/ static PyObject * @@ -3065,3 +3401,113 @@ { return dictview_new(dict, &PyDictValues_Type); } + +int +_PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, + PyObject *key, PyObject *value) { + PyObject *dict; + int res; + PyDictKeysObject *cached; + + assert(dictptr != NULL); + if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) { + assert(dictptr != NULL); + dict = *dictptr; + if (dict == NULL) { + DK_INCREF(cached); + dict = new_dict_with_shared_keys(cached); + if (dict == NULL) + return -1; + } + *dictptr = dict; + if (value == NULL) { + res = PyDict_DelItem(dict, key); + if (cached != ((PyDictObject *)dict)->ma_keys) { + CACHED_KEYS(tp) = NULL; + DK_DECREF(cached); + } + } else { + res = PyDict_SetItem(dict, key, value); + if (cached != ((PyDictObject *)dict)->ma_keys) { + /* Either update tp->ht_cached_keys or delete it */ + if (cached->dk_refcnt == 1) { + CACHED_KEYS(tp) = make_keys_shared(dict); + } else { + CACHED_KEYS(tp) = NULL; + } + DK_DECREF(cached); + } + } + } else { + dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + if (dict == NULL) + return -1; + *dictptr = dict; + } + if (value == NULL) { + res = PyDict_DelItem(dict, key); + } else { + res = PyDict_SetItem(dict, key, value); + } + } + return res; +} + +void +_PyDictKeys_DecRef(PyDictKeysObject *keys) +{ + DK_DECREF(keys); +} + + +/* ARGSUSED */ +static PyObject * +dummy_repr(PyObject *op) +{ + return PyUnicode_FromString(""); +} + +/* ARGUSED */ +static void +dummy_dealloc(PyObject* ignore) +{ + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref dummy-key out of existence. + */ + Py_FatalError("deallocating "); +} + +static PyTypeObject PyDictDummy_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + " type", + 0, + 0, + dummy_dealloc, /*tp_dealloc*/ /*never called*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_reserved*/ + dummy_repr, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call */ + 0, /*tp_str */ + 0, /*tp_getattro */ + 0, /*tp_setattro */ + 0, /*tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /*tp_flags */ +}; + +static PyObject _dummy_struct = { + _PyObject_EXTRA_INIT + 2, &PyDictDummy_Type +}; + + + + + diff -r 58bd6a58365d -r bc286099ce9a Objects/object.c --- a/Objects/object.c Wed Feb 08 04:09:37 2012 +0100 +++ b/Objects/object.c Wed Feb 08 16:33:32 2012 +0000 @@ -1163,13 +1163,10 @@ if (dict == NULL) { dictptr = _PyObject_GetDictPtr(obj); if (dictptr != NULL) { - dict = *dictptr; - if (dict == NULL && value != NULL) { - dict = PyDict_New(); - if (dict == NULL) - goto done; - *dictptr = dict; - } + res = _PyObjectDict_SetItem(Py_TYPE(obj), dictptr, name, value); + if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError)) + PyErr_SetObject(PyExc_AttributeError, name); + goto done; } } if (dict != NULL) { diff -r 58bd6a58365d -r bc286099ce9a Objects/typeobject.c --- a/Objects/typeobject.c Wed Feb 08 04:09:37 2012 +0100 +++ b/Objects/typeobject.c Wed Feb 08 16:33:32 2012 +0000 @@ -1787,7 +1787,7 @@ } dict = *dictptr; if (dict == NULL) - *dictptr = dict = PyDict_New(); + *dictptr = dict = PyDict_NewForInstance(Py_TYPE(obj)); Py_XINCREF(dict); return dict; } @@ -2335,6 +2335,9 @@ type->tp_dictoffset = slotoffset; slotoffset += sizeof(PyObject *); } + if (type->tp_dictoffset) { + et->ht_cached_keys = _PyDict_NewKeysForClass(); + } if (add_weak) { assert(!base->tp_itemsize); type->tp_weaklistoffset = slotoffset; @@ -2434,6 +2437,9 @@ res->ht_type.tp_doc = tp_doc; } } + if (res->ht_type.tp_dictoffset) { + res->ht_cached_keys = _PyDict_NewKeysForClass(); + } if (PyType_Ready(&res->ht_type) < 0) goto fail; diff -r 58bd6a58365d -r bc286099ce9a Python/ceval.c --- a/Python/ceval.c Wed Feb 08 04:09:37 2012 +0100 +++ b/Python/ceval.c Wed Feb 08 16:33:32 2012 +0000 @@ -2102,29 +2102,30 @@ /* Inline the PyDict_GetItem() calls. WARNING: this is an extreme speed hack. Do not try this at home. */ + PyObject **value_addr; Py_hash_t hash = ((PyASCIIObject *)w)->hash; if (hash != -1) { PyDictObject *d; - PyDictEntry *e; + PyDictKeyEntry *e; d = (PyDictObject *)(f->f_globals); - e = d->ma_lookup(d, w, hash); + e = d->ma_keys->dk_lookup(d, w, hash, &value_addr); if (e == NULL) { x = NULL; break; } - x = e->me_value; + x = *value_addr; if (x != NULL) { Py_INCREF(x); PUSH(x); DISPATCH(); } d = (PyDictObject *)(f->f_builtins); - e = d->ma_lookup(d, w, hash); + e = d->ma_keys->dk_lookup(d, w, hash, &value_addr); if (e == NULL) { x = NULL; break; } - x = e->me_value; + x = *value_addr; if (x != NULL) { Py_INCREF(x); PUSH(x); diff -r 58bd6a58365d -r bc286099ce9a Tools/gdb/libpython.py --- a/Tools/gdb/libpython.py Wed Feb 08 04:09:37 2012 +0100 +++ b/Tools/gdb/libpython.py Wed Feb 08 16:33:32 2012 +0000 @@ -634,9 +634,14 @@ Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs, analagous to dict.iteritems() ''' - for i in safe_range(self.field('ma_mask') + 1): - ep = self.field('ma_table') + i - pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value']) + keys = self.field('ma_keys') + values = self.field('ma_values') + for i in safe_range(keys['dk_size']): + ep = keys['dk_entries'].address + i + if long(values): + pyop_value = PyObjectPtr.from_pyobject_ptr(values[i]) + else: + pyop_value = PyObjectPtr.from_pyobject_ptr(ep['_me_value']) if not pyop_value.is_null(): pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key']) yield (pyop_key, pyop_value)