diff -r 58bd6a58365d -r bc286099ce9a Include/dictobject.h
--- a/Include/dictobject.h	Wed Feb 08 04:09:37 2012 +0100
+++ b/Include/dictobject.h	Wed Feb 08 16:33:32 2012 +0000
@@ -14,7 +14,7 @@
 */
 
 /*
-There are three kinds of slots in the table:
+There are four kinds of slots in the table:
 
 1. Unused.  me_key == me_value == NULL
    Does not hold an active (key, value) pair now and never did.  Unused can
@@ -22,8 +22,9 @@
    me_key is NULL, and is each slot's initial state.
 
 2. Active.  me_key != NULL and me_key != dummy and me_value != NULL
-   Holds an active (key, value) pair.  Active can transition to Dummy upon
-   key deletion.  This is the only case in which me_value != NULL.
+   Holds an active (key, value) pair.  Active can transition to Dummy or
+   Pending upon key deletion (for combined and split tables respectively).
+   This is the only case in which me_value != NULL.
 
 3. Dummy.  me_key == dummy and me_value == NULL
    Previously held an active (key, value) pair, but that was deleted and an
@@ -32,59 +33,78 @@
    (cannot have me_key set to NULL), else the probe sequence in case of
    collision would have no way to know they were once active.
 
+4. Pending. Not yet inserted or deleted from a split-table.
+   key != NULL, key != dummy and value == NULL
+
+The DictObject can be in one of two forms.
+Either:
+  A combined table:
+    ma_values == NULL, dk_refcnt == 1.
+    Values are stored in the _me_value field of the PyDictKeysObject.
+    Slot kind 4 is not allowed i.e.
+        key != NULL, key != dummy and value == NULL is illegal.
+Or:
+  A split table:
+    ma_values != NULL, dk_refcnt >= 1
+    Values are stored in the ma_values array.
+    Only string (unicode) keys are allowed, no <dummy> keys are present.
+
 Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
 hold a search finger.  The me_hash field of Unused or Dummy slots has no
-meaning otherwise.
+meaning otherwise. As a consequence of this popitem always converts the dict
+to the combined-table form.
 */
 
-/* PyDict_MINSIZE is the minimum size of a dictionary.  This many slots are
- * allocated directly in the dict object (in the ma_smalltable member).
+/* PyDict_MINSIZE is the minimum size of a dictionary.
  * It must be a power of 2, and at least 4.  8 allows dicts with no more
- * than 5 active entries to live in ma_smalltable (and so avoid an
- * additional malloc); instrumentation suggested this suffices for the
+ * than 5 active entries; experiments suggested this suffices for the
  * majority of dicts (consisting mostly of usually-small instance dicts and
  * usually-small dicts created to pass keyword arguments).
+ *
  */
 #ifndef Py_LIMITED_API
-#define PyDict_MINSIZE 8
+#define PyDict_MINSIZE 4
 
 typedef struct {
     /* Cached hash code of me_key. */
     Py_hash_t me_hash;
     PyObject *me_key;
-    PyObject *me_value;
-} PyDictEntry;
+    PyObject *_me_value; /* This field is only meaningful for combined tables */
+} PyDictKeyEntry;
 
 /*
 To ensure the lookup algorithm terminates, there must be at least one Unused
 slot (NULL key) in the table.
-The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
-ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
-values == the number of Active items).
 To avoid slowing down lookups on a near-full table, we resize the table when
-it's two-thirds full.
+it's USABLE_FRACTION (currently two-thirds) full.
 */
+
+/* Note that if the dk_free slot of a PyDictKeysObject is less than zero
+   then that PyDictKeysObject is *immutable* */
+typedef struct _dictkeysobject PyDictKeysObject;
 typedef struct _dictobject PyDictObject;
+
+/* The ma_values pointer is NULL for a combined table
+ * or points to an array of PyObject* for a split table
+ */
 struct _dictobject {
     PyObject_HEAD
-    Py_ssize_t ma_fill;  /* # Active + # Dummy */
-    Py_ssize_t ma_used;  /* # Active */
+    Py_ssize_t ma_used;
+    struct _dictkeysobject *ma_keys;
+    PyObject **ma_values;
+};
 
-    /* The table contains ma_mask + 1 slots, and that's a power of 2.
-     * We store the mask instead of the size because the mask is more
-     * frequently needed.
-     */
-    Py_ssize_t ma_mask;
+typedef PyDictKeyEntry *(*dict_lookup_func)
+(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr);
 
-    /* ma_table points to ma_smalltable for small tables, else to
-     * additional malloc'ed memory.  ma_table is never NULL!  This rule
-     * saves repeated runtime null-tests in the workhorse getitem and
-     * setitem calls.
-     */
-    PyDictEntry *ma_table;
-    PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
-    PyDictEntry ma_smalltable[PyDict_MINSIZE];
+struct _dictkeysobject {
+    Py_ssize_t dk_refcnt;
+    Py_ssize_t dk_size;
+    dict_lookup_func dk_lookup;
+    Py_ssize_t dk_free;
+    PyDictKeyEntry dk_entries[1];
 };
+
 #endif /* Py_LIMITED_API */
 
 PyAPI_DATA(PyTypeObject) PyDict_Type;
@@ -115,6 +135,8 @@
 PyAPI_FUNC(int) PyDict_Next(
     PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
 #ifndef Py_LIMITED_API
+PyDictKeysObject *_PyDict_NewKeysForClass(void);
+PyAPI_FUNC(PyObject *) PyDict_NewForInstance(PyTypeObject *tp);
 PyAPI_FUNC(int) _PyDict_Next(
     PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
 #endif
@@ -129,6 +151,7 @@
 PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
 PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
 PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
+#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
 
 PyAPI_FUNC(int) PyDict_ClearFreeList(void);
 #endif
@@ -158,6 +181,10 @@
 PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item);
 PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
 
+#ifndef Py_LIMITED_API
+int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff -r 58bd6a58365d -r bc286099ce9a Include/object.h
--- a/Include/object.h	Wed Feb 08 04:09:37 2012 +0100
+++ b/Include/object.h	Wed Feb 08 16:33:32 2012 +0000
@@ -448,6 +448,7 @@
                                       see add_operators() in typeobject.c . */
     PyBufferProcs as_buffer;
     PyObject *ht_name, *ht_slots, *ht_qualname;
+    struct _dictkeysobject *ht_cached_keys;
     /* here are optional user slots, followed by the members. */
 } PyHeapTypeObject;
 
diff -r 58bd6a58365d -r bc286099ce9a Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Wed Feb 08 04:09:37 2012 +0100
+++ b/Lib/test/test_sys.py	Wed Feb 08 16:33:32 2012 +0000
@@ -687,9 +687,9 @@
         # method-wrapper (descriptor object)
         check({}.__iter__, size(h + '2P'))
         # dict
-        check({}, size(h + '3P2P' + 8*'P2P'))
+        check({}, size(h + '3P' + '4P' + 8*'P2P'))
         longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
-        check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
+        check(longdict, size(h + '3P' + '4P') + 16*size('P2P'))
         # dictionary-keyiterator
         check({}.keys(), size(h + 'P'))
         # dictionary-valueiterator
@@ -831,7 +831,7 @@
         # type
         # (PyTypeObject + PyNumberMethods + PyMappingMethods +
         #  PySequenceMethods + PyBufferProcs)
-        s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 3P')
+        s = size(vh + 'P2P15Pl4PP9PP11PIP') + size('16Pi17P 3P 10P 2P 3P')
         check(int, s)
         # class
         class newstyleclass(object): pass
diff -r 58bd6a58365d -r bc286099ce9a Objects/dictnotes.txt
--- a/Objects/dictnotes.txt	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/dictnotes.txt	Wed Feb 08 16:33:32 2012 +0000
@@ -1,7 +1,6 @@
-NOTES ON OPTIMIZING DICTIONARIES
+NOTES ON DICTIONARIES
 ================================
 
-
 Principal Use Cases for Dictionaries
 ------------------------------------
 
@@ -21,7 +20,6 @@
 
 Builtins
     Frequent reads.  Almost never written.
-    Size 126 interned strings (as of Py2.3b1).
     A few keys are accessed much more frequently than others.
 
 Uniquification
@@ -59,26 +57,22 @@
     Characterized by deletions interspersed with adds and replacements.
     Performance benefits greatly from the re-use of dummy entries.
 
-
-Data Layout (assuming a 32-bit box with 64 bytes per cache line)
+Data Layout
 ----------------------------------------------------------------
 
-Smalldicts (8 entries) are attached to the dictobject structure
-and the whole group nearly fills two consecutive cache lines.
-
-Larger dicts use the first half of the dictobject structure (one cache
-line) and a separate, continuous block of entries (at 12 bytes each
-for a total of 5.333 entries per cache line).
+Dictionaries are composed of 3 components:
+The dictobject struct itself
+A dict-keys object (keys & hashes)
+A values array
 
 
 Tunable Dictionary Parameters
 -----------------------------
 
-* PyDict_MINSIZE.  Currently set to 8.
+* PyDict_MINSIZE.  Currently set to 4 (to keep instance dicts small).
     Must be a power of two.  New dicts have to zero-out every cell.
-    Each additional 8 consumes 1.5 cache lines.  Increasing improves
-    the sparseness of small dictionaries but costs time to read in
-    the additional cache lines if they are not already in cache.
+    Increasing improves the sparseness of small dictionaries but costs time
+    to read in the additional cache lines if they are not already in cache.
     That case is common when keyword arguments are passed.
 
 * Maximum dictionary load in PyDict_SetItem.  Currently set to 2/3.
@@ -126,8 +120,8 @@
 Also, every dictionary iterates at least twice, once for the memset()
 when it is created and once by dealloc().
 
-Dictionary operations involving only a single key can be O(1) unless 
-resizing is possible.  By checking for a resize only when the 
+Dictionary operations involving only a single key can be O(1) unless
+resizing is possible.  By checking for a resize only when the
 dictionary can grow (and may *require* resizing), other operations
 remain O(1), and the odds of resize thrashing or memory fragmentation
 are reduced. In particular, an algorithm that empties a dictionary
@@ -135,136 +129,21 @@
 not be necessary at all because the dictionary is eventually
 discarded entirely.
 
+The key differences between this implementation and earlier versions are:
+    1. The table can be split into two parts, the keys and the values.
 
-Results of Cache Locality Experiments
--------------------------------------
+    2. There is an additional key-value combination: (key, NULL).
+       Unlike (<dummy>, NULL) which represents a deleted value, (key, NULL)
+       represented a yet to be inserted value. This combination can only occur
+       when the table is split.
 
-When an entry is retrieved from memory, 4.333 adjacent entries are also
-retrieved into a cache line.  Since accessing items in cache is *much*
-cheaper than a cache miss, an enticing idea is to probe the adjacent
-entries as a first step in collision resolution.  Unfortunately, the
-introduction of any regularity into collision searches results in more
-collisions than the current random chaining approach.
+    3. No small table embedded in the dict,
+       as this would make sharing of key-tables impossible.
 
-Exploiting cache locality at the expense of additional collisions fails
-to payoff when the entries are already loaded in cache (the expense
-is paid with no compensating benefit).  This occurs in small dictionaries
-where the whole dictionary fits into a pair of cache lines.  It also
-occurs frequently in large dictionaries which have a common access pattern
-where some keys are accessed much more frequently than others.  The
-more popular entries *and* their collision chains tend to remain in cache.
 
-To exploit cache locality, change the collision resolution section
-in lookdict() and lookdict_string().  Set i^=1 at the top of the
-loop and move the  i = (i << 2) + i + perturb + 1 to an unrolled
-version of the loop.
+These changes have the following consequences.
+   1. General dictionaries are slightly larger.
 
-This optimization strategy can be leveraged in several ways:
+   2. All object dictionaries of a single class can share a single key-table,
+      saving about 60% memory for such cases.
 
-* If the dictionary is kept sparse (through the tunable parameters),
-then the occurrence of additional collisions is lessened.
-
-* If lookdict() and lookdict_string() are specialized for small dicts
-and for largedicts, then the versions for large_dicts can be given
-an alternate search strategy without increasing collisions in small dicts
-which already have the maximum benefit of cache locality.
-
-* If the use case for a dictionary is known to have a random key
-access pattern (as opposed to a more common pattern with a Zipf's law
-distribution), then there will be more benefit for large dictionaries
-because any given key is no more likely than another to already be
-in cache.
-
-* In use cases with paired accesses to the same key, the second access
-is always in cache and gets no benefit from efforts to further improve
-cache locality.
-
-Optimizing the Search of Small Dictionaries
--------------------------------------------
-
-If lookdict() and lookdict_string() are specialized for smaller dictionaries,
-then a custom search approach can be implemented that exploits the small
-search space and cache locality.
-
-* The simplest example is a linear search of contiguous entries.  This is
-  simple to implement, guaranteed to terminate rapidly, never searches
-  the same entry twice, and precludes the need to check for dummy entries.
-
-* A more advanced example is a self-organizing search so that the most
-  frequently accessed entries get probed first.  The organization
-  adapts if the access pattern changes over time.  Treaps are ideally
-  suited for self-organization with the most common entries at the
-  top of the heap and a rapid binary search pattern.  Most probes and
-  results are all located at the top of the tree allowing them all to
-  be located in one or two cache lines.
-
-* Also, small dictionaries may be made more dense, perhaps filling all
-  eight cells to take the maximum advantage of two cache lines.
-
-
-Strategy Pattern
-----------------
-
-Consider allowing the user to set the tunable parameters or to select a
-particular search method.  Since some dictionary use cases have known
-sizes and access patterns, the user may be able to provide useful hints.
-
-1) For example, if membership testing or lookups dominate runtime and memory
-   is not at a premium, the user may benefit from setting the maximum load
-   ratio at 5% or 10% instead of the usual 66.7%.  This will sharply
-   curtail the number of collisions but will increase iteration time.
-   The builtin namespace is a prime example of a dictionary that can
-   benefit from being highly sparse.
-
-2) Dictionary creation time can be shortened in cases where the ultimate
-   size of the dictionary is known in advance.  The dictionary can be
-   pre-sized so that no resize operations are required during creation.
-   Not only does this save resizes, but the key insertion will go
-   more quickly because the first half of the keys will be inserted into
-   a more sparse environment than before.  The preconditions for this
-   strategy arise whenever a dictionary is created from a key or item
-   sequence and the number of *unique* keys is known.
-
-3) If the key space is large and the access pattern is known to be random,
-   then search strategies exploiting cache locality can be fruitful.
-   The preconditions for this strategy arise in simulations and
-   numerical analysis.
-
-4) If the keys are fixed and the access pattern strongly favors some of
-   the keys, then the entries can be stored contiguously and accessed
-   with a linear search or treap.  This exploits knowledge of the data,
-   cache locality, and a simplified search routine.  It also eliminates
-   the need to test for dummy entries on each probe.  The preconditions
-   for this strategy arise in symbol tables and in the builtin dictionary.
-
-
-Readonly Dictionaries
----------------------
-Some dictionary use cases pass through a build stage and then move to a
-more heavily exercised lookup stage with no further changes to the
-dictionary.
-
-An idea that emerged on python-dev is to be able to convert a dictionary
-to a read-only state.  This can help prevent programming errors and also
-provide knowledge that can be exploited for lookup optimization.
-
-The dictionary can be immediately rebuilt (eliminating dummy entries),
-resized (to an appropriate level of sparseness), and the keys can be
-jostled (to minimize collisions).  The lookdict() routine can then
-eliminate the test for dummy entries (saving about 1/4 of the time
-spent in the collision resolution loop).
-
-An additional possibility is to insert links into the empty spaces
-so that dictionary iteration can proceed in len(d) steps instead of
-(mp->mask + 1) steps.  Alternatively, a separate tuple of keys can be
-kept just for iteration.
-
-
-Caching Lookups
----------------
-The idea is to exploit key access patterns by anticipating future lookups
-based on previous lookups.
-
-The simplest incarnation is to save the most recently accessed entry.
-This gives optimal performance for use cases where every get is followed
-by a set or del to the same key.
diff -r 58bd6a58365d -r bc286099ce9a Objects/dictobject.c
--- a/Objects/dictobject.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/dictobject.c	Wed Feb 08 16:33:32 2012 +0000
@@ -10,7 +10,6 @@
 #include "Python.h"
 #include "stringlib/eq.h"
 
-
 /* Set a key error with the specified argument, wrapping it in a
  * tuple automatically so that tuple keys are not unpacked as the
  * exception arguments. */
@@ -25,10 +24,6 @@
     Py_DECREF(tup);
 }
 
-/* Define this out if you don't want conversion statistics on exit. */
-#undef SHOW_CONVERSION_COUNTS
-
-/* See large comment block below.  This must be >= 1. */
 #define PERTURB_SHIFT 5
 
 /*
@@ -126,8 +121,13 @@
 
 */
 
-/* Object used as dummy key to fill deleted entries */
-static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
+/* Object used as dummy key to fill deleted entries
+ * This could be any unique object,
+ * use a custom type in order to minimise coupling.
+*/
+static PyObject _dummy_struct;
+
+#define dummy (&_dummy_struct)
 
 #ifdef Py_REF_DEBUG
 PyObject *
@@ -138,77 +138,18 @@
 #endif
 
 /* forward declarations */
-static PyDictEntry *
-lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash);
-
-#ifdef SHOW_CONVERSION_COUNTS
-static long created = 0L;
-static long converted = 0L;
-
-static void
-show_counts(void)
-{
-    fprintf(stderr, "created %ld string dicts\n", created);
-    fprintf(stderr, "converted %ld to normal dicts\n", converted);
-    fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
-}
-#endif
-
-/* Debug statistic to compare allocations with reuse through the free list */
-#undef SHOW_ALLOC_COUNT
-#ifdef SHOW_ALLOC_COUNT
-static size_t count_alloc = 0;
-static size_t count_reuse = 0;
-
-static void
-show_alloc(void)
-{
-    fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
-        count_alloc);
-    fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
-        "d\n", count_reuse);
-    fprintf(stderr, "%.2f%% reuse rate\n\n",
-        (100.0*count_reuse/(count_alloc+count_reuse)));
-}
-#endif
-
-/* Debug statistic to count GC tracking of dicts */
-#ifdef SHOW_TRACK_COUNT
-static Py_ssize_t count_untracked = 0;
-static Py_ssize_t count_tracked = 0;
-
-static void
-show_track(void)
-{
-    fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
-        count_tracked + count_untracked);
-    fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
-        "d\n", count_tracked);
-    fprintf(stderr, "%.2f%% dict tracking rate\n\n",
-        (100.0*count_tracked/(count_untracked+count_tracked)));
-}
-#endif
-
-
-/* Initialization macros.
-   There are two ways to create a dict:  PyDict_New() is the main C API
-   function, and the tp_new slot maps to dict_new().  In the latter case we
-   can save a little time over what PyDict_New does because it's guaranteed
-   that the PyDictObject struct is already zeroed out.
-   Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
-   an excellent reason not to).
-*/
-
-#define INIT_NONZERO_DICT_SLOTS(mp) do {                                \
-    (mp)->ma_table = (mp)->ma_smalltable;                               \
-    (mp)->ma_mask = PyDict_MINSIZE - 1;                                 \
-    } while(0)
-
-#define EMPTY_TO_MINSIZE(mp) do {                                       \
-    memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));        \
-    (mp)->ma_used = (mp)->ma_fill = 0;                                  \
-    INIT_NONZERO_DICT_SLOTS(mp);                                        \
-    } while(0)
+static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,
+                                Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,
+                                        Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *
+lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,
+                                      Py_hash_t hash, PyObject ***value_addr);
+
+static int
+dictresize(PyDictObject *mp, Py_ssize_t minused);
 
 /* Dictionary reuse scheme to save calls to malloc, free, and memset */
 #ifndef PyDict_MAXFREELIST
@@ -236,61 +177,144 @@
     PyDict_ClearFreeList();
 }
 
-PyObject *
-PyDict_New(void)
+#define DK_INCREF(dk) (++(dk)->dk_refcnt)
+#define DK_DECREF(dk) if ((--(dk)->dk_refcnt) == 0) free_keys_object(dk)
+#define DK_SIZE(dk) ((dk)->dk_size)
+#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
+
+/* USABLE_FRACTION must obey the following:
+ * USABLE_FRACTION(n) < n for n >= PyDict_MINSIZE
+ * USABLE_FRACTION(n) > 0 for n >= PyDict_MINSIZE
+ * 2n/3, (2n+1)/3 and n/2+n/4-n/8 (f(4)==3 otherwise f(n) == 5n/8)
+ * seem to work well in practice.
+ */
+/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for
+ * combined tables (the two fractions round to the same number n < ),
+ * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite
+ * a lot of space for small, split tables */
+#define USABLE_FRACTION(n) ((((n) << 1)+1)/3)
+
+/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make
+ * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
+ * 32 * 2/3 = 21, 32 * 5/8 = 20.
+ * Its advantage is that it is faster to compute on machines with slow division.
+ * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
+*/
+
+#define ENSURE_DELETEABLE(d) \
+    if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
+        (d)->ma_keys->dk_lookup = lookdict_unicode; \
+    }
+
+static PyDictKeysObject *new_keys_object(Py_ssize_t size) {
+    PyDictKeysObject *dk;
+    Py_ssize_t i;
+    PyDictKeyEntry *ep0;
+
+    assert(size >= PyDict_MINSIZE);
+    assert(IS_POWER_OF_2(size));
+    dk = PyMem_Malloc(sizeof(PyDictKeysObject) +
+                      sizeof(PyDictKeyEntry) * (size-1));
+    if (dk == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    dk->dk_refcnt = 1;
+    dk->dk_size = size;
+    dk->dk_free = USABLE_FRACTION(size);
+    ep0 = &dk->dk_entries[0];
+    for (i = 0; i < size; i++) {
+        ep0[i].me_key = NULL;
+        ep0[i]._me_value = NULL;
+    }
+    dk->dk_lookup = lookdict_unicode_nodummy;
+    return dk;
+}
+
+PyDictKeysObject *
+_PyDict_NewKeysForClass(void)
 {
-    register PyDictObject *mp;
-    if (dummy == NULL) { /* Auto-initialize dummy */
-        dummy = PyUnicode_FromString("<dummy key>");
-        if (dummy == NULL)
-            return NULL;
-#ifdef SHOW_CONVERSION_COUNTS
-        Py_AtExit(show_counts);
-#endif
-#ifdef SHOW_ALLOC_COUNT
-        Py_AtExit(show_alloc);
-#endif
-#ifdef SHOW_TRACK_COUNT
-        Py_AtExit(show_track);
-#endif
+    PyDictKeysObject *keys = new_keys_object(4);
+    if (keys != NULL)
+        keys->dk_lookup = lookdict_split;
+    return keys;
+}
+
+static void
+free_keys_object(PyDictKeysObject *keys)
+{
+    PyDictKeyEntry *entries = &keys->dk_entries[0];
+    Py_ssize_t i, n;
+    for (i = 0, n = DK_SIZE(keys); i < n; i++) {
+        Py_XDECREF(entries[i].me_key);
+        Py_XDECREF(entries[i]._me_value);
     }
+    PyMem_DEL(keys);
+}
+
+/* Consumes a reference to the keys object */
+static PyObject *
+new_dict(PyDictKeysObject *keys, PyObject **values)
+{
+    PyDictObject *mp;
     if (numfree) {
         mp = free_list[--numfree];
         assert (mp != NULL);
         assert (Py_TYPE(mp) == &PyDict_Type);
         _Py_NewReference((PyObject *)mp);
-        if (mp->ma_fill) {
-            EMPTY_TO_MINSIZE(mp);
-        } else {
-            /* At least set ma_table and ma_mask; these are wrong
-               if an empty but presized dict is added to freelist */
-            INIT_NONZERO_DICT_SLOTS(mp);
-        }
-        assert (mp->ma_used == 0);
-        assert (mp->ma_table == mp->ma_smalltable);
-        assert (mp->ma_mask == PyDict_MINSIZE - 1);
-#ifdef SHOW_ALLOC_COUNT
-        count_reuse++;
-#endif
-    } else {
+    }
+    else {
         mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
         if (mp == NULL)
             return NULL;
-        EMPTY_TO_MINSIZE(mp);
-#ifdef SHOW_ALLOC_COUNT
-        count_alloc++;
-#endif
+
     }
-    mp->ma_lookup = lookdict_unicode;
-#ifdef SHOW_TRACK_COUNT
-    count_untracked++;
-#endif
-#ifdef SHOW_CONVERSION_COUNTS
-    ++created;
-#endif
+    mp->ma_keys = keys;
+    mp->ma_values = (void *)values;
+    mp->ma_used = 0;
     return (PyObject *)mp;
 }
 
+#define new_values(size) PyMem_New(PyObject *, size)
+
+#define free_values(values) PyMem_DEL(values)
+
+/* Consumes a reference to the keys object */
+static PyObject *
+new_dict_with_shared_keys(PyDictKeysObject *keys) {
+    PyObject **values;
+    Py_ssize_t i, size;
+
+    size = DK_SIZE(keys);
+    values = new_values(size);
+    if (values == NULL) {
+        return PyErr_NoMemory();
+    }
+    for (i = 0; i < size; i++) {
+        values[i] = NULL;
+    }
+    return new_dict(keys, values);
+}
+
+PyObject *
+PyDict_New(void) {
+    return new_dict(new_keys_object(8), NULL);
+}
+
+#define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
+
+PyObject *
+PyDict_NewForInstance(PyTypeObject *tp) {
+    if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
+        DK_INCREF(CACHED_KEYS(tp));
+        return new_dict_with_shared_keys(CACHED_KEYS(tp));
+    } else {
+        return PyDict_New();
+    }
+}
+
+
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -309,29 +333,29 @@
 lookdict() is general-purpose, and may return NULL if (and only if) a
 comparison raises an exception (this was new in Python 2.5).
 lookdict_unicode() below is specialized to string keys, comparison of which can
-never raise an exception; that function can never return NULL.  For both, when
-the key isn't found a PyDictEntry* is returned for which the me_value field is
-NULL; this is the slot in the dict at which the key would have been found, and
-the caller can (if it wishes) add the <key, value> pair to the returned
-PyDictEntry*.
+never raise an exception; that function can never return NULL.
+lookdict_unicode_nodummy is further specialized for string keys that cannot be
+the <dummy> value.
 */
-static PyDictEntry *
-lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash)
+static PyDictKeyEntry *
+lookdict(PyDictObject *mp, PyObject *key,
+         Py_hash_t hash, PyObject ***value_addr)
 {
     register size_t i;
     register size_t perturb;
-    register PyDictEntry *freeslot;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
+    register PyDictKeyEntry *freeslot;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
     register int cmp;
     PyObject *startkey;
 
     i = (size_t)hash & mask;
     ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key)
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
         return ep;
-
+    }
     if (ep->me_key == dummy)
         freeslot = ep;
     else {
@@ -342,9 +366,11 @@
             Py_DECREF(startkey);
             if (cmp < 0)
                 return NULL;
-            if (ep0 == mp->ma_table && ep->me_key == startkey) {
-                if (cmp > 0)
+            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+                if (cmp > 0) {
+                    *value_addr = &ep->_me_value;
                     return ep;
+                }
             }
             else {
                 /* The compare did major nasty stuff to the
@@ -352,7 +378,7 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return lookdict(mp, key, hash, value_addr);
             }
         }
         freeslot = NULL;
@@ -363,20 +389,33 @@
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
-        if (ep->me_key == NULL)
-            return freeslot == NULL ? ep : freeslot;
-        if (ep->me_key == key)
+        if (ep->me_key == NULL) {
+            if (freeslot == NULL) {
+                *value_addr = &ep->_me_value;
+                return ep;
+            } else {
+                *value_addr = &freeslot->_me_value;
+                return freeslot;
+            }
+        }
+        if (ep->me_key == key) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         if (ep->me_hash == hash && ep->me_key != dummy) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
-            if (cmp < 0)
+            if (cmp < 0) {
+                *value_addr = NULL;
                 return NULL;
-            if (ep0 == mp->ma_table && ep->me_key == startkey) {
-                if (cmp > 0)
+            }
+            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+                if (cmp > 0) {
+                    *value_addr = &ep->_me_value;
                     return ep;
+                }
             }
             else {
                 /* The compare did major nasty stuff to the
@@ -384,7 +423,7 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return lookdict(mp, key, hash, value_addr);
             }
         }
         else if (ep->me_key == dummy && freeslot == NULL)
@@ -394,46 +433,39 @@
     return 0;
 }
 
-/*
- * Hacked up version of lookdict which can assume keys are always
- * unicodes; this assumption allows testing for errors during
- * PyObject_RichCompareBool() to be dropped; unicode-unicode
- * comparisons never raise exceptions.  This also means we don't need
- * to go through PyObject_RichCompareBool(); we can always use
- * unicode_eq() directly.
- *
- * This is valuable because dicts with only unicode keys are very common.
- */
-static PyDictEntry *
-lookdict_unicode(PyDictObject *mp, PyObject *key, register Py_hash_t hash)
+/* Specialized version for string-only keys */
+static PyDictKeyEntry *
+lookdict_unicode(PyDictObject *mp, PyObject *key,
+                 Py_hash_t hash, PyObject ***value_addr)
 {
     register size_t i;
     register size_t perturb;
-    register PyDictEntry *freeslot;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
+    register PyDictKeyEntry *freeslot;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
 
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
-#ifdef SHOW_CONVERSION_COUNTS
-        ++converted;
-#endif
-        mp->ma_lookup = lookdict;
-        return lookdict(mp, key, hash);
+        mp->ma_keys->dk_lookup = lookdict;
+        return lookdict(mp, key, hash, value_addr);
     }
     i = (size_t)hash & mask;
     ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key)
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
         return ep;
+    }
     if (ep->me_key == dummy)
         freeslot = ep;
     else {
-        if (ep->me_hash == hash && unicode_eq(ep->me_key, key))
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         freeslot = NULL;
     }
 
@@ -442,13 +474,22 @@
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
-        if (ep->me_key == NULL)
-            return freeslot == NULL ? ep : freeslot;
+        if (ep->me_key == NULL) {
+            if (freeslot == NULL) {
+                *value_addr = &ep->_me_value;
+                return ep;
+            } else {
+                *value_addr = &freeslot->_me_value;
+                return freeslot;
+            }
+        }
         if (ep->me_key == key
             || (ep->me_hash == hash
             && ep->me_key != dummy
-            && unicode_eq(ep->me_key, key)))
+            && unicode_eq(ep->me_key, key))) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         if (ep->me_key == dummy && freeslot == NULL)
             freeslot = ep;
     }
@@ -456,6 +497,96 @@
     return 0;
 }
 
+/* Faster version of lookdict_unicode when it is known that no <dummy> keys
+ * will be present. */
+static PyDictKeyEntry *
+lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr)
+{
+    register size_t i;
+    register size_t perturb;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
+
+    /* Make sure this function doesn't have to handle non-unicode keys,
+       including subclasses of str; e.g., one reason to subclass
+       unicodes is to override __eq__, and for speed we don't cater to
+       that here. */
+    if (!PyUnicode_CheckExact(key)) {
+        mp->ma_keys->dk_lookup = lookdict;
+        return lookdict(mp, key, hash, value_addr);
+    }
+    i = (size_t)hash & mask;
+    ep = &ep0[i];
+    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
+        return ep;
+    }
+    if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        *value_addr = &ep->_me_value;
+        return ep;
+    }
+    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+        i = (i << 2) + i + perturb + 1;
+        ep = &ep0[i & mask];
+        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == NULL || ep->me_key == key) {
+            *value_addr = &ep->_me_value;
+            return ep;
+        }
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &ep->_me_value;
+            return ep;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return 0;
+}
+
+/* Version of lookdict for split tables. */
+static PyDictKeyEntry *
+lookdict_split(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr)
+{
+    register size_t i;
+    register size_t perturb;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
+
+    if (!PyUnicode_CheckExact(key)) {
+        return lookdict(mp, key, hash, value_addr);
+    }
+    i = (size_t)hash & mask;
+    ep = &ep0[i];
+    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &mp->ma_values[i];
+        return ep;
+    }
+    if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        *value_addr = &mp->ma_values[i];
+        return ep;
+    }
+    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+        i = (i << 2) + i + perturb + 1;
+        ep = &ep0[i & mask];
+        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == NULL || ep->me_key == key) {
+            *value_addr = &mp->ma_values[i & mask];
+            return ep;
+        }
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &mp->ma_values[i & mask];
+            return ep;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return 0;
+}
+
 int
 _PyDict_HasOnlyStringKeys(PyObject *dict)
 {
@@ -463,7 +594,7 @@
     PyObject *key, *value;
     assert(PyDict_Check(dict));
     /* Shortcut */
-    if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode)
+    if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict)
         return 1;
     while (PyDict_Next(dict, &pos, &key, &value))
         if (!PyUnicode_Check(key))
@@ -471,15 +602,6 @@
     return 1;
 }
 
-#ifdef SHOW_TRACK_COUNT
-#define INCREASE_TRACK_COUNT \
-    (count_tracked++, count_untracked--);
-#define DECREASE_TRACK_COUNT \
-    (count_tracked--, count_untracked++);
-#else
-#define INCREASE_TRACK_COUNT
-#define DECREASE_TRACK_COUNT
-#endif
 
 #define MAINTAIN_TRACKING(mp, key, value) \
     do { \
@@ -487,7 +609,6 @@
             if (_PyObject_GC_MAY_BE_TRACKED(key) || \
                 _PyObject_GC_MAY_BE_TRACKED(value)) { \
                 _PyObject_GC_TRACK(mp); \
-                INCREASE_TRACK_COUNT \
             } \
         } \
     } while(0)
@@ -497,26 +618,42 @@
 {
     PyDictObject *mp;
     PyObject *value;
-    Py_ssize_t mask, i;
-    PyDictEntry *ep;
+    Py_ssize_t i, size;
 
     if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
         return;
 
     mp = (PyDictObject *) op;
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0; i <= mask; i++) {
-        if ((value = ep[i].me_value) == NULL)
-            continue;
-        if (_PyObject_GC_MAY_BE_TRACKED(value) ||
-            _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
-            return;
+    size = DK_SIZE(mp->ma_keys);
+    if (_PyDict_HasSplitTable(mp)) {
+        for (i = 0; i < size; i++) {
+            if ((value = mp->ma_values[i]) == NULL)
+                continue;
+            if (_PyObject_GC_MAY_BE_TRACKED(value)) {
+                assert(!_PyObject_GC_MAY_BE_TRACKED(mp->ma_keys->dk_entries[i].me_key));
+                return;
+            }
+        }
+    } else {
+        PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+        for (i = 0; i < size; i++) {
+            if ((value = ep0[i]._me_value) == NULL)
+                continue;
+            if (_PyObject_GC_MAY_BE_TRACKED(value) ||
+                _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
+                return;
+        }
     }
-    DECREASE_TRACK_COUNT
     _PyObject_GC_UNTRACK(op);
 }
 
+static int
+insertion_resize(PyDictObject *mp)
+{
+    if (_PyDict_HasSplitTable(mp) || mp->ma_used > 50000)
+        return dictresize(mp, mp->ma_used * 2);
+    return dictresize(mp, mp->ma_used * 4);
+}
 
 /*
 Internal routine to insert a new item into the table.
@@ -525,38 +662,63 @@
 Returns -1 if an error occurred, or 0 on success.
 */
 static int
-insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
+insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 {
     PyObject *old_value;
-    register PyDictEntry *ep;
-    typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t);
-
-    assert(mp->ma_lookup != NULL);
-    ep = mp->ma_lookup(mp, key, hash);
+    PyObject **value_addr;
+    PyDictKeyEntry *ep;
+    assert(key != dummy);
+
+    if (mp->ma_values != NULL) {
+        if (!PyUnicode_CheckExact(key))
+            if (insertion_resize(mp) < 0)
+                return -1;
+    }
+
+    ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
     if (ep == NULL) {
         Py_DECREF(key);
         Py_DECREF(value);
         return -1;
     }
     MAINTAIN_TRACKING(mp, key, value);
-    if (ep->me_value != NULL) {
-        old_value = ep->me_value;
-        ep->me_value = value;
+    old_value = *value_addr;
+    if (old_value != NULL) {
+        assert(ep->me_key != NULL && ep->me_key != dummy);
+        *value_addr = value;
         Py_DECREF(old_value); /* which **CAN** re-enter */
         Py_DECREF(key);
+    } else {
+        if (ep->me_key == NULL) {
+            if (mp->ma_keys->dk_free <= 0) {
+                /* Need to resize. */
+                if (insertion_resize(mp) < 0)
+                    return -1;
+                ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
+                if (ep == NULL) {
+                    Py_DECREF(key);
+                    Py_DECREF(value);
+                    return -1;
+                }
+            }
+            mp->ma_keys->dk_free--;
+            assert(mp->ma_keys->dk_free >= 0);
+            ep->me_key = key;
+            ep->me_hash = hash;
+        } else {
+            if (ep->me_key == dummy) {
+                ep->me_key = key;
+                ep->me_hash = hash;
+                Py_DECREF(dummy);
+            } else {
+                Py_DECREF(key);
+                assert(_PyDict_HasSplitTable(mp));
+            }
+        }
+        mp->ma_used++;
+        *value_addr = value;
     }
-    else {
-        if (ep->me_key == NULL)
-            mp->ma_fill++;
-        else {
-            assert(ep->me_key == dummy);
-            Py_DECREF(dummy);
-        }
-        ep->me_key = key;
-        ep->me_hash = hash;
-        ep->me_value = value;
-        mp->ma_used++;
-    }
+    assert(ep->me_key != NULL && ep->me_key != dummy);
     return 0;
 }
 
@@ -567,30 +729,39 @@
 using insertdict() in dictresize() is dangerous (SF bug #1456209).
 Note that no refcounts are changed by this routine; if needed, the caller
 is responsible for incref'ing `key` and `value`.
+Neither mp->ma_used nor k->dk_free are modified by this routine; the caller
+must set them correctly
 */
 static void
-insertdict_clean(register PyDictObject *mp, PyObject *key, Py_hash_t hash,
+insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
                  PyObject *value)
 {
-    register size_t i;
-    register size_t perturb;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
-
+    size_t i;
+    size_t perturb;
+    PyDictKeysObject *k = mp->ma_keys;
+    size_t mask = (size_t)DK_SIZE(k)-1;
+    PyDictKeyEntry *ep0 = &k->dk_entries[0];
+    PyDictKeyEntry *ep;
+
+    assert(k->dk_lookup != NULL);
+    assert(value != NULL);
+    assert(key != NULL);
+    assert(key != dummy);
+    if (!PyUnicode_CheckExact(key)) {
+        assert(mp->ma_values == NULL);
+        k->dk_lookup = lookdict;
+    }
     MAINTAIN_TRACKING(mp, key, value);
-    i = (size_t)hash & mask;
+    i = hash & mask;
     ep = &ep0[i];
     for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
     }
-    assert(ep->me_value == NULL);
-    mp->ma_fill++;
+    assert(ep->_me_value == NULL);
     ep->me_key = key;
     ep->me_hash = hash;
-    ep->me_value = value;
-    mp->ma_used++;
+    ep->_me_value = value;
 }
 
 /*
@@ -602,14 +773,11 @@
 dictresize(PyDictObject *mp, Py_ssize_t minused)
 {
     Py_ssize_t newsize;
-    PyDictEntry *oldtable, *newtable, *ep;
-    Py_ssize_t i;
-    int is_oldtable_malloced;
-    PyDictEntry small_copy[PyDict_MINSIZE];
-
-    assert(minused >= 0);
-
-    /* Find the smallest table size > minused. */
+    PyDictKeysObject *oldkeys;
+    PyObject **oldvalues;
+    Py_ssize_t i, size;
+
+/* Find the smallest table size > minused. */
     for (newsize = PyDict_MINSIZE;
          newsize <= minused && newsize > 0;
          newsize <<= 1)
@@ -618,72 +786,92 @@
         PyErr_NoMemory();
         return -1;
     }
-
-    /* Get space for a new table. */
-    oldtable = mp->ma_table;
-    assert(oldtable != NULL);
-    is_oldtable_malloced = oldtable != mp->ma_smalltable;
-
-    if (newsize == PyDict_MINSIZE) {
-        /* A large table is shrinking, or we can't get any smaller. */
-        newtable = mp->ma_smalltable;
-        if (newtable == oldtable) {
-            if (mp->ma_fill == mp->ma_used) {
-                /* No dummies, so no point doing anything. */
-                return 0;
+    oldkeys = mp->ma_keys;
+    oldvalues = mp->ma_values;
+    /* Logic below assumes we can transfer refcount to new keys
+     * and that value is stored in _me_value.
+     * Increment ref-counts and copy values here to compensate
+     * This (resizing a split table) should be relatively rare */
+    size = DK_SIZE(oldkeys);
+    if (oldvalues != NULL) {
+        for (i = 0; i < size; i++)
+            if (oldvalues[i] != NULL) {
+                Py_INCREF(oldkeys->dk_entries[i].me_key);
+                oldkeys->dk_entries[i]._me_value = oldvalues[i];
             }
-            /* We're not going to resize it, but rebuild the
-               table anyway to purge old dummy entries.
-               Subtle:  This is *necessary* if fill==size,
-               as lookdict needs at least one virgin slot to
-               terminate failing searches.  If fill < size, it's
-               merely desirable, as dummies slow searches. */
-            assert(mp->ma_fill > mp->ma_used);
-            memcpy(small_copy, oldtable, sizeof(small_copy));
-            oldtable = small_copy;
+    }
+    /* Allocate a new table. */
+    mp->ma_keys = new_keys_object(newsize);
+    if (mp->ma_keys == NULL) {
+        mp->ma_keys = oldkeys;
+        return -1;
+    }
+    mp->ma_values = NULL;
+    for (i = 0; i < size; i++) {
+        PyDictKeyEntry *ep = &oldkeys->dk_entries[i];
+        if (ep->_me_value != NULL) {
+            assert(ep->me_key != dummy);
+            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->_me_value);
         }
     }
+    mp->ma_keys->dk_free -= mp->ma_used;
+    if (oldvalues != NULL) {
+        for (i = 0; i < size; i++)
+            oldkeys->dk_entries[i]._me_value = NULL;
+        free_values(oldvalues);
+        DK_DECREF(oldkeys);
+    }
     else {
-        newtable = PyMem_NEW(PyDictEntry, newsize);
-        if (newtable == NULL) {
-            PyErr_NoMemory();
-            return -1;
+        assert(oldkeys->dk_lookup != lookdict_split);
+        if (oldkeys->dk_lookup != lookdict_unicode_nodummy) {
+            PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0];
+            for (i = 0; i < size; i++) {
+                if (ep0[i].me_key == dummy)
+                    Py_DECREF(dummy);
+            }
         }
+        assert(oldkeys->dk_refcnt == 1);
+        PyMem_DEL(oldkeys);
     }
-
-    /* Make the dict empty, using the new table. */
-    assert(newtable != oldtable);
-    mp->ma_table = newtable;
-    mp->ma_mask = newsize - 1;
-    memset(newtable, 0, sizeof(PyDictEntry) * newsize);
-    mp->ma_used = 0;
-    i = mp->ma_fill;
-    mp->ma_fill = 0;
-
-    /* Copy the data over; this is refcount-neutral for active entries;
-       dummy entries aren't copied over, of course */
-    for (ep = oldtable; i > 0; ep++) {
-        if (ep->me_value != NULL) {             /* active entry */
-            --i;
-            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
-        }
-        else if (ep->me_key != NULL) {          /* dummy entry */
-            --i;
-            assert(ep->me_key == dummy);
-            Py_DECREF(ep->me_key);
-        }
-        /* else key == value == NULL:  nothing to do */
-    }
-
-    if (is_oldtable_malloced)
-        PyMem_DEL(oldtable);
     return 0;
 }
 
-/* Create a new dictionary pre-sized to hold an estimated number of elements.
-   Underestimates are okay because the dictionary will resize as necessary.
-   Overestimates just mean the dictionary will be more sparse than usual.
-*/
+static PyDictKeysObject *
+make_keys_shared(PyObject *op) {
+    Py_ssize_t i;
+    Py_ssize_t size;
+    PyDictObject *mp = (PyDictObject *)op;
+
+    assert(PyDict_CheckExact(op));
+    mp = (PyDictObject *)op;
+    if (!_PyDict_HasSplitTable(mp)) {
+        PyDictKeyEntry *ep0;
+        PyObject **values;
+        assert(mp->ma_keys->dk_refcnt == 1);
+        if (mp->ma_keys->dk_lookup == lookdict) {
+            return NULL;
+        } else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
+            /* Remove dummy keys */
+            if (dictresize(mp, DK_SIZE(mp->ma_keys)))
+                return NULL;
+        }
+        assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
+        /* Copy values into a new array */
+        ep0 = &mp->ma_keys->dk_entries[0];
+        values = new_values(DK_SIZE(mp->ma_keys));
+        if (values == NULL)
+            return NULL;
+        size = DK_SIZE(mp->ma_keys);
+        for (i = 0; i < size; i++) {
+            values[i] = ep0[i]._me_value;
+            ep0[i]._me_value = NULL;
+        }
+        mp->ma_keys->dk_lookup = lookdict_split;
+        mp->ma_values = values;
+    }
+    DK_INCREF(mp->ma_keys);
+    return mp->ma_keys;
+}
 
 PyObject *
 _PyDict_NewPresized(Py_ssize_t minused)
@@ -712,8 +900,10 @@
 {
     Py_hash_t hash;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
     PyThreadState *tstate;
+    PyObject **value_addr;
+
     if (!PyDict_Check(op))
         return NULL;
     if (!PyUnicode_CheckExact(key) ||
@@ -737,20 +927,20 @@
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
         PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_lookup)(mp, key, hash);
+        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
         /* ignore errors */
         PyErr_Restore(err_type, err_value, err_tb);
         if (ep == NULL)
             return NULL;
     }
     else {
-        ep = (mp->ma_lookup)(mp, key, hash);
+        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
         if (ep == NULL) {
             PyErr_Clear();
             return NULL;
         }
     }
-    return ep->me_value;
+    return *value_addr;
 }
 
 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
@@ -762,7 +952,8 @@
 {
     Py_hash_t hash;
     PyDictObject*mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
@@ -777,10 +968,10 @@
         }
     }
 
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    return ep->me_value;
+    return *value_addr;
 }
 
 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -790,12 +981,10 @@
  * remove them.
  */
 int
-PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
+PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
 {
-    register PyDictObject *mp;
-    register Py_hash_t hash;
-    register Py_ssize_t n_used;
-
+    PyDictObject *mp;
+    Py_hash_t hash;
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
         return -1;
@@ -810,38 +999,21 @@
         if (hash == -1)
             return -1;
     }
-    assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
-    n_used = mp->ma_used;
     Py_INCREF(value);
     Py_INCREF(key);
-    if (insertdict(mp, key, hash, value) != 0)
-        return -1;
-    /* If we added a key, we can safely resize.  Otherwise just return!
-     * If fill >= 2/3 size, adjust size.  Normally, this doubles or
-     * quaduples the size, but it's also possible for the dict to shrink
-     * (if ma_fill is much larger than ma_used, meaning a lot of dict
-     * keys have been * deleted).
-     *
-     * Quadrupling the size improves average dictionary sparseness
-     * (reducing collisions) at the cost of some memory and iteration
-     * speed (which loops over every possible entry).  It also halves
-     * the number of expensive resize operations in a growing dictionary.
-     *
-     * Very large dictionaries (over 50K items) use doubling instead.
-     * This may help applications with severe memory constraints.
-     */
-    if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
-        return 0;
-    return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
+
+    /* insertdict() handles any resizing that might be necessary */
+    return insertdict(mp, key, hash, value);
 }
 
 int
 PyDict_DelItem(PyObject *op, PyObject *key)
 {
-    register PyDictObject *mp;
-    register Py_hash_t hash;
-    register PyDictEntry *ep;
-    PyObject *old_value, *old_key;
+    PyDictObject *mp;
+    Py_hash_t hash;
+    PyDictKeyEntry *ep;
+    PyObject *old_key, *old_value;
+    PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
@@ -855,91 +1027,97 @@
             return -1;
     }
     mp = (PyDictObject *)op;
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return -1;
-    if (ep->me_value == NULL) {
+    if (*value_addr == NULL) {
         set_key_error(key);
         return -1;
     }
-    old_key = ep->me_key;
-    Py_INCREF(dummy);
-    ep->me_key = dummy;
-    old_value = ep->me_value;
-    ep->me_value = NULL;
+    old_value = *value_addr;
+    *value_addr = NULL;
     mp->ma_used--;
+    if (!_PyDict_HasSplitTable(mp)) {
+        ENSURE_DELETEABLE(mp);
+        old_key = ep->me_key;
+        Py_INCREF(dummy);
+        ep->me_key = dummy;
+        Py_DECREF(old_key);
+    }
     Py_DECREF(old_value);
-    Py_DECREF(old_key);
     return 0;
 }
 
+/* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
+ * (which cannot fail and thus can do no allocation).
+ */
+static PyDictKeysObject empty_keys_struct = {
+        2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */
+        1, /* dk_size */
+        lookdict_unicode_nodummy, /* dk_lookup */
+        0, /* dk_free (immutable) */
+        {
+            { 0, 0, 0 } /* dk_entries (empty) */
+        }
+};
+
+#define Py_EMPTY_KEYS &empty_keys_struct
+
 void
 PyDict_Clear(PyObject *op)
 {
     PyDictObject *mp;
-    PyDictEntry *ep, *table;
-    int table_is_malloced;
-    Py_ssize_t fill;
-    PyDictEntry small_copy[PyDict_MINSIZE];
-#ifdef Py_DEBUG
+    PyDictKeysObject *oldkeys;
+    PyObject **oldvalues;
     Py_ssize_t i, n;
-#endif
-
+
+    mp = ((PyDictObject *)op);
     if (!PyDict_Check(op))
         return;
-    mp = (PyDictObject *)op;
-#ifdef Py_DEBUG
-    n = mp->ma_mask + 1;
-    i = 0;
-#endif
-
-    table = mp->ma_table;
-    assert(table != NULL);
-    table_is_malloced = table != mp->ma_smalltable;
-
-    /* This is delicate.  During the process of clearing the dict,
-     * decrefs can cause the dict to mutate.  To avoid fatal confusion
-     * (voice of experience), we have to make the dict empty before
-     * clearing the slots, and never refer to anything via mp->xxx while
-     * clearing.
-     */
-    fill = mp->ma_fill;
-    if (table_is_malloced)
-        EMPTY_TO_MINSIZE(mp);
-
-    else if (fill > 0) {
-        /* It's a small table with something that needs to be cleared.
-         * Afraid the only safe way is to copy the dict entries into
-         * another small table first.
-         */
-        memcpy(small_copy, table, sizeof(small_copy));
-        table = small_copy;
-        EMPTY_TO_MINSIZE(mp);
+    oldkeys = mp->ma_keys;
+    oldvalues = mp->ma_values;
+    /* Empty the dict... */
+    mp->ma_keys = Py_EMPTY_KEYS;
+    mp->ma_used = 0;
+    DK_INCREF(Py_EMPTY_KEYS);
+    mp->ma_values = NULL;
+    /* ...then clear the keys and values */
+    n = DK_SIZE(oldkeys);
+    if (oldvalues != NULL)
+        for (i = 0; i < n; i++)
+            Py_CLEAR(oldvalues[i]);
+    if (n == 8 && oldvalues == NULL) {
+        /* Reuse table */
+        Py_ssize_t i, n;
+        assert(oldkeys->dk_refcnt == 1);
+        for (i = 0, n = DK_SIZE(oldkeys); i < n; i++) {
+            Py_CLEAR(oldkeys->dk_entries[i].me_key);
+            Py_CLEAR(oldkeys->dk_entries[i]._me_value);
+        }
+        oldkeys->dk_free = USABLE_FRACTION(oldkeys->dk_size);
+        oldkeys->dk_lookup = lookdict_unicode_nodummy;
+        /* Check that dict is still empty */
+        if (mp->ma_keys == Py_EMPTY_KEYS) {
+            mp->ma_keys = oldkeys;
+            mp->ma_values = NULL;
+            assert(mp->ma_used == 0);
+        } else {
+            free_keys_object(oldkeys);
+        }
     }
-    /* else it's a small table that's already empty */
-
-    /* Now we can finally clear things.  If C had refcounts, we could
-     * assert that the refcount on table is 1 now, i.e. that this function
-     * has unique access to it, so decref side-effects can't alter it.
-     */
-    for (ep = table; fill > 0; ++ep) {
-#ifdef Py_DEBUG
-        assert(i < n);
-        ++i;
-#endif
-        if (ep->me_key) {
-            --fill;
-            Py_DECREF(ep->me_key);
-            Py_XDECREF(ep->me_value);
+    else {
+        if (oldvalues != NULL)
+            free_values(oldvalues);
+        DK_DECREF(oldkeys);
+        if (mp->ma_keys == Py_EMPTY_KEYS) {
+            mp->ma_keys = new_keys_object(8);
+            if (mp->ma_keys == NULL) {
+                mp->ma_keys = Py_EMPTY_KEYS;
+            }
+            else
+                DK_DECREF(Py_EMPTY_KEYS);
         }
-#ifdef Py_DEBUG
-        else
-            assert(ep->me_value == NULL);
-#endif
     }
-
-    if (table_is_malloced)
-        PyMem_DEL(table);
 }
 
 /*
@@ -960,26 +1138,38 @@
 int
 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t mask;
-    register PyDictEntry *ep;
+    Py_ssize_t i;
+    Py_ssize_t mask, offset;
+    PyDictObject *mp;
+    PyObject **value_ptr;
+
 
     if (!PyDict_Check(op))
         return 0;
+    mp = (PyDictObject *)op;
     i = *ppos;
     if (i < 0)
         return 0;
-    ep = ((PyDictObject *)op)->ma_table;
-    mask = ((PyDictObject *)op)->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    if (mp->ma_values) {
+        value_ptr = &mp->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(mp->ma_keys)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     *ppos = i+1;
     if (i > mask)
         return 0;
     if (pkey)
-        *pkey = ep[i].me_key;
+        *pkey = mp->ma_keys->dk_entries[i].me_key;
     if (pvalue)
-        *pvalue = ep[i].me_value;
+        *pvalue = *value_ptr;
     return 1;
 }
 
@@ -987,48 +1177,61 @@
 int
 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, Py_hash_t *phash)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t mask;
-    register PyDictEntry *ep;
+    Py_ssize_t i;
+    Py_ssize_t mask, offset;
+    PyDictObject *mp;
+    PyObject **value_ptr;
 
     if (!PyDict_Check(op))
         return 0;
+    mp = (PyDictObject *)op;
     i = *ppos;
     if (i < 0)
         return 0;
-    ep = ((PyDictObject *)op)->ma_table;
-    mask = ((PyDictObject *)op)->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    if (mp->ma_values) {
+        value_ptr = &mp->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(mp->ma_keys)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     *ppos = i+1;
     if (i > mask)
         return 0;
-    *phash = ep[i].me_hash;
+    *phash = mp->ma_keys->dk_entries[i].me_hash;
     if (pkey)
-        *pkey = ep[i].me_key;
+        *pkey = mp->ma_keys->dk_entries[i].me_key;
     if (pvalue)
-        *pvalue = ep[i].me_value;
+        *pvalue = *value_ptr;
     return 1;
 }
 
 /* Methods */
 
 static void
-dict_dealloc(register PyDictObject *mp)
+dict_dealloc(PyDictObject *mp)
 {
-    register PyDictEntry *ep;
-    Py_ssize_t fill = mp->ma_fill;
+    PyObject **values = mp->ma_values;
+    PyDictKeysObject *keys = mp->ma_keys;
+    Py_ssize_t i, n;
     PyObject_GC_UnTrack(mp);
     Py_TRASHCAN_SAFE_BEGIN(mp)
-    for (ep = mp->ma_table; fill > 0; ep++) {
-        if (ep->me_key) {
-            --fill;
-            Py_DECREF(ep->me_key);
-            Py_XDECREF(ep->me_value);
+    if (values != NULL) {
+        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            Py_XDECREF(values[i]);
         }
+        free_values(values);
+        DK_DECREF(keys);
     }
-    if (mp->ma_table != mp->ma_smalltable)
-        PyMem_DEL(mp->ma_table);
+    else {
+        free_keys_object(keys);
+    }
     if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
         free_list[numfree++] = mp;
     else
@@ -1036,6 +1239,7 @@
     Py_TRASHCAN_SAFE_END(mp)
 }
 
+
 static PyObject *
 dict_repr(PyDictObject *mp)
 {
@@ -1068,10 +1272,12 @@
     while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
         int status;
         /* Prevent repr from deleting value during key format. */
+        Py_INCREF(key);
         Py_INCREF(value);
         s = PyObject_Repr(key);
         PyUnicode_Append(&s, colon);
         PyUnicode_AppendAndDel(&s, PyObject_Repr(value));
+        Py_DECREF(key);
         Py_DECREF(value);
         if (s == NULL)
             goto Done;
@@ -1126,18 +1332,19 @@
 {
     PyObject *v;
     Py_hash_t hash;
-    PyDictEntry *ep;
-    assert(mp->ma_table != NULL);
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
+
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    v = ep->me_value;
+    v = *value_addr;
     if (v == NULL) {
         if (!PyDict_CheckExact(mp)) {
             /* Look up __missing__ method if we're a subclass. */
@@ -1181,8 +1388,9 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j;
-    PyDictEntry *ep;
-    Py_ssize_t mask, n;
+    PyDictKeyEntry *ep;
+    Py_ssize_t size, n, offset;
+    PyObject **value_ptr;
 
   again:
     n = mp->ma_used;
@@ -1196,15 +1404,24 @@
         Py_DECREF(v);
         goto again;
     }
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if (ep[i].me_value != NULL) {
+    ep = &mp->ma_keys->dk_entries[0];
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &ep[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        if (*value_ptr != NULL) {
             PyObject *key = ep[i].me_key;
             Py_INCREF(key);
             PyList_SET_ITEM(v, j, key);
             j++;
         }
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
     }
     assert(j == n);
     return v;
@@ -1215,8 +1432,8 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j;
-    PyDictEntry *ep;
-    Py_ssize_t mask, n;
+    Py_ssize_t size, n, offset;
+    PyObject **value_ptr;
 
   again:
     n = mp->ma_used;
@@ -1230,11 +1447,19 @@
         Py_DECREF(v);
         goto again;
     }
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if (ep[i].me_value != NULL) {
-            PyObject *value = ep[i].me_value;
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        PyObject *value = *value_ptr;
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
+        if (value != NULL) {
             Py_INCREF(value);
             PyList_SET_ITEM(v, j, value);
             j++;
@@ -1249,9 +1474,10 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j, n;
-    Py_ssize_t mask;
-    PyObject *item, *key, *value;
-    PyDictEntry *ep;
+    Py_ssize_t size, offset;
+    PyObject *item, *key;
+    PyDictKeyEntry *ep;
+    PyObject **value_ptr;
 
     /* Preallocate the list of tuples, to avoid allocations during
      * the loop over the items, which could trigger GC, which
@@ -1278,10 +1504,20 @@
         goto again;
     }
     /* Nothing we do below makes any function calls. */
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if ((value=ep[i].me_value) != NULL) {
+    ep = mp->ma_keys->dk_entries;
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &ep[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        PyObject *value = *value_ptr;
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
+        if (value != NULL) {
             key = ep[i].me_key;
             item = PyList_GET_ITEM(v, j);
             Py_INCREF(key);
@@ -1323,7 +1559,6 @@
             Py_DECREF(d);
             return NULL;
         }
-
         while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
             Py_INCREF(key);
             Py_INCREF(value);
@@ -1341,7 +1576,7 @@
         PyObject *key;
         Py_hash_t hash;
 
-        if (dictresize(mp, PySet_GET_SIZE(seq))) {
+        if (dictresize(mp, PySet_GET_SIZE(seq))){
             Py_DECREF(d);
             return NULL;
         }
@@ -1349,7 +1584,7 @@
         while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
             Py_INCREF(key);
             Py_INCREF(value);
-            if (insertdict(mp, key, hash, value)) {
+            if (insertdict(mp, key, hash, value)){
                 Py_DECREF(d);
                 return NULL;
             }
@@ -1513,8 +1748,8 @@
 PyDict_Merge(PyObject *a, PyObject *b, int override)
 {
     register PyDictObject *mp, *other;
-    register Py_ssize_t i;
-    PyDictEntry *entry;
+    register Py_ssize_t i, n;
+    PyDictKeyEntry *entry;
 
     /* We accept for the argument either a concrete dictionary object,
      * or an abstract "mapping" object.  For the former, we can do
@@ -1541,20 +1776,25 @@
          * incrementally resizing as we insert new items.  Expect
          * that there will be no (or few) overlapping keys.
          */
-        if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
-           if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
+        if (mp->ma_keys->dk_free * 3 < other->ma_used * 2)
+            if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
                return -1;
-        }
-        for (i = 0; i <= other->ma_mask; i++) {
-            entry = &other->ma_table[i];
-            if (entry->me_value != NULL &&
+        for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) {
+            PyObject *value;
+            entry = &other->ma_keys->dk_entries[i];
+            if (other->ma_values)
+                value = other->ma_values[i];
+            else
+                value = entry->_me_value;
+
+            if (value != NULL &&
                 (override ||
                  PyDict_GetItem(a, entry->me_key) == NULL)) {
                 Py_INCREF(entry->me_key);
-                Py_INCREF(entry->me_value);
+                Py_INCREF(value);
                 if (insertdict(mp, entry->me_key,
                                entry->me_hash,
-                               entry->me_value) != 0)
+                               value) != 0)
                     return -1;
             }
         }
@@ -1616,11 +1856,35 @@
 PyDict_Copy(PyObject *o)
 {
     PyObject *copy;
+    PyDictObject *mp;
+    Py_ssize_t i, n;
 
     if (o == NULL || !PyDict_Check(o)) {
         PyErr_BadInternalCall();
         return NULL;
     }
+    mp = (PyDictObject *)o;
+    if (_PyDict_HasSplitTable(mp)) {
+        PyDictObject *split_copy;
+        PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys));
+        if (newvalues == NULL)
+            return PyErr_NoMemory();
+        split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
+        if (split_copy == NULL) {
+            PyMem_DEL(newvalues);
+            return NULL;
+        }
+        split_copy->ma_values = newvalues;
+        split_copy->ma_keys = mp->ma_keys;
+        split_copy->ma_used = mp->ma_used;
+        DK_INCREF(mp->ma_keys);
+        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            PyObject *value = mp->ma_values[i];
+            Py_XINCREF(value);
+            split_copy->ma_values[i] = value;
+        }
+        return (PyObject *)split_copy;
+    }
     copy = PyDict_New();
     if (copy == NULL)
         return NULL;
@@ -1682,14 +1946,18 @@
     if (a->ma_used != b->ma_used)
         /* can't be equal if # of entries differ */
         return 0;
-
     /* Same # of entries -- check all of 'em.  Exit early on any diff. */
-    for (i = 0; i <= a->ma_mask; i++) {
-        PyObject *aval = a->ma_table[i].me_value;
+    for (i = 0; i < DK_SIZE(a->ma_keys); i++) {
+        PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i];
+        PyObject *aval;
+        if (a->ma_values)
+            aval = a->ma_values[i];
+        else
+            aval = ep->_me_value;
         if (aval != NULL) {
             int cmp;
             PyObject *bval;
-            PyObject *key = a->ma_table[i].me_key;
+            PyObject *key = ep->me_key;
             /* temporarily bump aval's refcount to ensure it stays
                alive until we're done with it */
             Py_INCREF(aval);
@@ -1710,7 +1978,7 @@
         }
     }
     return 1;
- }
+}
 
 static PyObject *
 dict_richcompare(PyObject *v, PyObject *w, int op)
@@ -1731,13 +1999,14 @@
         res = Py_NotImplemented;
     Py_INCREF(res);
     return res;
- }
+}
 
 static PyObject *
 dict_contains(register PyDictObject *mp, PyObject *key)
 {
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1745,10 +2014,10 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    return PyBool_FromLong(ep->me_value != NULL);
+    return PyBool_FromLong(*value_addr != NULL);
 }
 
 static PyObject *
@@ -1758,7 +2027,8 @@
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
         return NULL;
@@ -1769,17 +2039,16 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    val = ep->me_value;
+    val = *value_addr;
     if (val == NULL)
         val = failobj;
     Py_INCREF(val);
     return val;
 }
 
-
 static PyObject *
 dict_setdefault(register PyDictObject *mp, PyObject *args)
 {
@@ -1787,7 +2056,8 @@
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
         return NULL;
@@ -1798,14 +2068,17 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    val = ep->me_value;
+    val = *value_addr;
     if (val == NULL) {
-        val = failobj;
-        if (PyDict_SetItem((PyObject*)mp, key, failobj))
+        Py_INCREF(failobj);
+        Py_INCREF(key);
+        if (insertdict(mp, key, hash, failobj))
             val = NULL;
+        else
+            val = failobj;
     }
     Py_XINCREF(val);
     return val;
@@ -1823,9 +2096,10 @@
 dict_pop(PyDictObject *mp, PyObject *args)
 {
     Py_hash_t hash;
-    PyDictEntry *ep;
     PyObject *old_value, *old_key;
     PyObject *key, *deflt = NULL;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
         return NULL;
@@ -1833,8 +2107,7 @@
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
-        }
-        set_key_error(key);
+        }        set_key_error(key);
         return NULL;
     }
     if (!PyUnicode_CheckExact(key) ||
@@ -1843,10 +2116,11 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    if (ep->me_value == NULL) {
+    old_value = *value_addr;
+    if (old_value == NULL) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
@@ -1854,13 +2128,15 @@
         set_key_error(key);
         return NULL;
     }
-    old_key = ep->me_key;
-    Py_INCREF(dummy);
-    ep->me_key = dummy;
-    old_value = ep->me_value;
-    ep->me_value = NULL;
+    *value_addr = NULL;
     mp->ma_used--;
-    Py_DECREF(old_key);
+    if (!_PyDict_HasSplitTable(mp)) {
+        ENSURE_DELETEABLE(mp);
+        old_key = ep->me_key;
+        Py_INCREF(dummy);
+        ep->me_key = dummy;
+        Py_DECREF(old_key);
+    }
     return old_value;
 }
 
@@ -1868,9 +2144,10 @@
 dict_popitem(PyDictObject *mp)
 {
     Py_hash_t i = 0;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
     PyObject *res;
 
+
     /* Allocate the result tuple before checking the size.  Believe it
      * or not, this allocation could trigger a garbage collection which
      * could empty the dict, so if we checked the size first and that
@@ -1889,49 +2166,71 @@
                         "popitem(): dictionary is empty");
         return NULL;
     }
+    /* Convert split table to combined table */
+    if (mp->ma_keys->dk_lookup == lookdict_split) {
+        if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
+            Py_DECREF(res);
+            return NULL;
+        }
+    }
+    ENSURE_DELETEABLE(mp);
     /* Set ep to "the first" dict entry with a value.  We abuse the hash
      * field of slot 0 to hold a search finger:
      * If slot 0 has a value, use slot 0.
      * Else slot 0 is being used to hold a search finger,
      * and we use its hash value as the first index to look.
      */
-    ep = &mp->ma_table[0];
-    if (ep->me_value == NULL) {
+    ep = &mp->ma_keys->dk_entries[0];
+    if (ep->_me_value == NULL) {
+        Py_ssize_t mask = DK_SIZE(mp->ma_keys)-1;
         i = ep->me_hash;
         /* The hash field may be a real hash value, or it may be a
          * legit search finger, or it may be a once-legit search
          * finger that's out of bounds now because it wrapped around
          * or the table shrunk -- simply make sure it's in bounds now.
          */
-        if (i > mp->ma_mask || i < 1)
+        if (i > mask || i < 1)
             i = 1;              /* skip slot 0 */
-        while ((ep = &mp->ma_table[i])->me_value == NULL) {
+        while ((ep = &mp->ma_keys->dk_entries[i])->_me_value == NULL) {
             i++;
-            if (i > mp->ma_mask)
+            if (i > mask)
                 i = 1;
         }
     }
     PyTuple_SET_ITEM(res, 0, ep->me_key);
-    PyTuple_SET_ITEM(res, 1, ep->me_value);
+    PyTuple_SET_ITEM(res, 1, ep->_me_value);
     Py_INCREF(dummy);
     ep->me_key = dummy;
-    ep->me_value = NULL;
+    ep->_me_value = NULL;
     mp->ma_used--;
-    assert(mp->ma_table[0].me_value == NULL);
-    mp->ma_table[0].me_hash = i + 1;  /* next place to start */
+    assert(mp->ma_keys->dk_entries[0].me_value == NULL);
+    mp->ma_keys->dk_entries[0].me_hash = i + 1;  /* next place to start */
     return res;
 }
 
 static int
 dict_traverse(PyObject *op, visitproc visit, void *arg)
 {
-    Py_ssize_t i = 0;
-    PyObject *pk;
-    PyObject *pv;
-
-    while (PyDict_Next(op, &i, &pk, &pv)) {
-        Py_VISIT(pk);
-        Py_VISIT(pv);
+    Py_ssize_t i, n;
+    PyDictObject *mp = (PyDictObject *)op;
+    if (mp->ma_keys->dk_lookup == lookdict) {
+        for (i = 0; i < DK_SIZE(mp->ma_keys); i++) {
+            if (mp->ma_keys->dk_entries[i]._me_value != NULL) {
+                Py_VISIT(mp->ma_keys->dk_entries[i]._me_value);
+                Py_VISIT(mp->ma_keys->dk_entries[i].me_key);
+            }
+        }
+    } else {
+        if (mp->ma_values != NULL) {
+            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+                Py_VISIT(mp->ma_values[i]);
+            }
+        }
+        else {
+            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+                Py_VISIT(mp->ma_keys->dk_entries[i]._me_value);
+            }
+        }
     }
     return 0;
 }
@@ -1948,12 +2247,22 @@
 static PyObject *
 dict_sizeof(PyDictObject *mp)
 {
-    Py_ssize_t res;
-
+    Py_ssize_t size;
+    double res, keys_size;
+
+    size = DK_SIZE(mp->ma_keys);
     res = sizeof(PyDictObject);
-    if (mp->ma_table != mp->ma_smalltable)
-        res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry);
-    return PyLong_FromSsize_t(res);
+    if (mp->ma_values)
+        res += size * sizeof(PyObject*);
+    /* Count our share of the keys object -- with rounding errors. */
+    keys_size = sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry);
+    /* If refcnt > 1, then one count is (probably) held by a type */
+    /* XXX  This is somewhat approximate :) */
+    if (mp->ma_keys->dk_refcnt < 3)
+        res += keys_size;
+    else
+        res += keys_size / (mp->ma_keys->dk_refcnt - 1);
+    return PyFloat_FromDouble(res);
 }
 
 PyDoc_STRVAR(contains__doc__,
@@ -2044,7 +2353,8 @@
 {
     Py_hash_t hash;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -2052,8 +2362,8 @@
         if (hash == -1)
             return -1;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
-    return ep == NULL ? -1 : (ep->me_value != NULL);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+    return (ep == NULL) ? -1 : (*value_addr != NULL);
 }
 
 /* Internal version of PyDict_Contains used when the hash value is already known */
@@ -2061,10 +2371,11 @@
 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
 {
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
-
-    ep = (mp->ma_lookup)(mp, key, hash);
-    return ep == NULL ? -1 : (ep->me_value != NULL);
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
+
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+    return (ep == NULL) ? -1 : (*value_addr != NULL);
 }
 
 /* Hack to implement "key in dict" */
@@ -2081,6 +2392,7 @@
     0,                          /* sq_inplace_repeat */
 };
 
+
 static PyObject *
 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@@ -2090,22 +2402,15 @@
     self = type->tp_alloc(type, 0);
     if (self != NULL) {
         PyDictObject *d = (PyDictObject *)self;
-        /* It's guaranteed that tp->alloc zeroed out the struct. */
-        assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
-        INIT_NONZERO_DICT_SLOTS(d);
-        d->ma_lookup = lookdict_unicode;
+        d->ma_keys = new_keys_object(8);
+        /* XXX - Should we raise a no-memory error? */
+        if (d->ma_keys == NULL) {
+            DK_INCREF(Py_EMPTY_KEYS);
+            d->ma_keys = Py_EMPTY_KEYS;
+        }
         /* The object has been implicitly tracked by tp_alloc */
         if (type == &PyDict_Type)
             _PyObject_GC_UNTRACK(d);
-#ifdef SHOW_CONVERSION_COUNTS
-        ++created;
-#endif
-#ifdef SHOW_TRACK_COUNT
-        if (_PyObject_GC_IS_TRACKED(d))
-            count_tracked++;
-        else
-            count_untracked++;
-#endif
     }
     return self;
 }
@@ -2278,6 +2583,7 @@
     return PyLong_FromSize_t(len);
 }
 
+
 PyDoc_STRVAR(length_hint_doc,
              "Private method returning an estimate of len(list(it)).");
 
@@ -2290,9 +2596,10 @@
 static PyObject *dictiter_iternextkey(dictiterobject *di)
 {
     PyObject *key;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
+    register PyDictKeysObject *k;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2308,15 +2615,25 @@
     i = di->di_pos;
     if (i < 0)
         goto fail;
-    ep = d->ma_table;
-    mask = d->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    k = d->ma_keys;
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &k->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(k)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     di->di_pos = i+1;
     if (i > mask)
         goto fail;
     di->len--;
-    key = ep[i].me_key;
+    key = k->dk_entries[i].me_key;
     Py_INCREF(key);
     return key;
 
@@ -2362,9 +2679,9 @@
 static PyObject *dictiter_iternextvalue(dictiterobject *di)
 {
     PyObject *value;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2378,17 +2695,26 @@
     }
 
     i = di->di_pos;
-    mask = d->ma_mask;
+    mask = DK_SIZE(d->ma_keys)-1;
     if (i < 0 || i > mask)
         goto fail;
-    ep = d->ma_table;
-    while ((value=ep[i].me_value) == NULL) {
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &d->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
         if (i > mask)
             goto fail;
     }
     di->di_pos = i+1;
     di->len--;
+    value = *value_ptr;
     Py_INCREF(value);
     return value;
 
@@ -2434,9 +2760,9 @@
 static PyObject *dictiter_iternextitem(dictiterobject *di)
 {
     PyObject *key, *value, *result = di->di_result;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2452,10 +2778,19 @@
     i = di->di_pos;
     if (i < 0)
         goto fail;
-    ep = d->ma_table;
-    mask = d->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    mask = DK_SIZE(d->ma_keys)-1;
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &d->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     di->di_pos = i+1;
     if (i > mask)
         goto fail;
@@ -2470,8 +2805,8 @@
             return NULL;
     }
     di->len--;
-    key = ep[i].me_key;
-    value = ep[i].me_value;
+    key = d->ma_keys->dk_entries[i].me_key;
+    value = *value_ptr;
     Py_INCREF(key);
     Py_INCREF(value);
     PyTuple_SET_ITEM(result, 0, key);
@@ -2585,6 +2920,7 @@
    - if public then they should probably be in builtins
 */
 
+
 /* Return 1 if self is a subset of other, iterating over self;
    0 if not; -1 if an error occurred. */
 static int
@@ -2611,6 +2947,7 @@
     return ok;
 }
 
+
 static PyObject *
 dictview_richcompare(PyObject *self, PyObject *other, int op)
 {
@@ -2722,7 +3059,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(difference_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2742,7 +3078,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(intersection_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2762,7 +3097,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(update);
-
     if (result == NULL)
         return NULL;
 
@@ -2782,7 +3116,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(symmetric_difference_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2915,6 +3248,7 @@
     return dictview_new(dict, &PyDictKeys_Type);
 }
 
+
 /*** dict_items ***/
 
 static PyObject *
@@ -3001,6 +3335,8 @@
     return dictview_new(dict, &PyDictItems_Type);
 }
 
+
+
 /*** dict_values ***/
 
 static PyObject *
@@ -3065,3 +3401,113 @@
 {
     return dictview_new(dict, &PyDictValues_Type);
 }
+
+int
+_PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
+                     PyObject *key, PyObject *value) {
+    PyObject *dict;
+    int res;
+    PyDictKeysObject *cached;
+
+    assert(dictptr != NULL);
+    if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
+        assert(dictptr != NULL);
+        dict = *dictptr;
+        if (dict == NULL) {
+            DK_INCREF(cached);
+            dict = new_dict_with_shared_keys(cached);
+            if (dict == NULL)
+                return -1;
+        }
+        *dictptr = dict;
+        if (value == NULL) {
+            res = PyDict_DelItem(dict, key);
+            if (cached != ((PyDictObject *)dict)->ma_keys) {
+                CACHED_KEYS(tp) = NULL;
+                DK_DECREF(cached);
+            }
+        } else {
+            res = PyDict_SetItem(dict, key, value);
+            if (cached != ((PyDictObject *)dict)->ma_keys) {
+                /* Either update tp->ht_cached_keys or delete it */
+                if (cached->dk_refcnt == 1) {
+                    CACHED_KEYS(tp) = make_keys_shared(dict);
+                } else {
+                    CACHED_KEYS(tp) = NULL;
+                }
+                DK_DECREF(cached);
+            }
+        }
+    } else {
+        dict = *dictptr;
+        if (dict == NULL) {
+            dict = PyDict_New();
+            if (dict == NULL)
+                return -1;
+            *dictptr = dict;
+        }
+        if (value == NULL) {
+            res = PyDict_DelItem(dict, key);
+        } else {
+            res = PyDict_SetItem(dict, key, value);
+        }
+    }
+    return res;
+}
+
+void
+_PyDictKeys_DecRef(PyDictKeysObject *keys)
+{
+    DK_DECREF(keys);
+}
+
+
+/* ARGSUSED */
+static PyObject *
+dummy_repr(PyObject *op)
+{
+    return PyUnicode_FromString("<dummy key>");
+}
+
+/* ARGUSED */
+static void
+dummy_dealloc(PyObject* ignore)
+{
+    /* This should never get called, but we also don't want to SEGV if
+     * we accidentally decref dummy-key out of existence.
+     */
+    Py_FatalError("deallocating <dummy key>");
+}
+
+static PyTypeObject PyDictDummy_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "<dummy key> type",
+    0,
+    0,
+    dummy_dealloc,      /*tp_dealloc*/ /*never called*/
+    0,                  /*tp_print*/
+    0,                  /*tp_getattr*/
+    0,                  /*tp_setattr*/
+    0,                  /*tp_reserved*/
+    dummy_repr,         /*tp_repr*/
+    0,                  /*tp_as_number*/
+    0,                  /*tp_as_sequence*/
+    0,                  /*tp_as_mapping*/
+    0,                  /*tp_hash */
+    0,                  /*tp_call */
+    0,                  /*tp_str */
+    0,                  /*tp_getattro */
+    0,                  /*tp_setattro */
+    0,                  /*tp_as_buffer */
+    Py_TPFLAGS_DEFAULT, /*tp_flags */
+};
+
+static PyObject _dummy_struct = {
+  _PyObject_EXTRA_INIT
+  2, &PyDictDummy_Type
+};
+
+
+
+
+
diff -r 58bd6a58365d -r bc286099ce9a Objects/object.c
--- a/Objects/object.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/object.c	Wed Feb 08 16:33:32 2012 +0000
@@ -1163,13 +1163,10 @@
     if (dict == NULL) {
         dictptr = _PyObject_GetDictPtr(obj);
         if (dictptr != NULL) {
-            dict = *dictptr;
-            if (dict == NULL && value != NULL) {
-                dict = PyDict_New();
-                if (dict == NULL)
-                    goto done;
-                *dictptr = dict;
-            }
+            res = _PyObjectDict_SetItem(Py_TYPE(obj), dictptr, name, value);
+            if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
+                PyErr_SetObject(PyExc_AttributeError, name);
+            goto done;
         }
     }
     if (dict != NULL) {
diff -r 58bd6a58365d -r bc286099ce9a Objects/typeobject.c
--- a/Objects/typeobject.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/typeobject.c	Wed Feb 08 16:33:32 2012 +0000
@@ -1787,7 +1787,7 @@
     }
     dict = *dictptr;
     if (dict == NULL)
-        *dictptr = dict = PyDict_New();
+        *dictptr = dict = PyDict_NewForInstance(Py_TYPE(obj));
     Py_XINCREF(dict);
     return dict;
 }
@@ -2335,6 +2335,9 @@
             type->tp_dictoffset = slotoffset;
         slotoffset += sizeof(PyObject *);
     }
+    if (type->tp_dictoffset) {
+        et->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
     if (add_weak) {
         assert(!base->tp_itemsize);
         type->tp_weaklistoffset = slotoffset;
@@ -2434,6 +2437,9 @@
             res->ht_type.tp_doc = tp_doc;
         }
     }
+    if (res->ht_type.tp_dictoffset) {
+        res->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
 
     if (PyType_Ready(&res->ht_type) < 0)
         goto fail;
diff -r 58bd6a58365d -r bc286099ce9a Python/ceval.c
--- a/Python/ceval.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Python/ceval.c	Wed Feb 08 16:33:32 2012 +0000
@@ -2102,29 +2102,30 @@
                 /* Inline the PyDict_GetItem() calls.
                    WARNING: this is an extreme speed hack.
                    Do not try this at home. */
+                PyObject **value_addr;
                 Py_hash_t hash = ((PyASCIIObject *)w)->hash;
                 if (hash != -1) {
                     PyDictObject *d;
-                    PyDictEntry *e;
+                    PyDictKeyEntry *e;
                     d = (PyDictObject *)(f->f_globals);
-                    e = d->ma_lookup(d, w, hash);
+                    e = d->ma_keys->dk_lookup(d, w, hash, &value_addr);
                     if (e == NULL) {
                         x = NULL;
                         break;
                     }
-                    x = e->me_value;
+                    x = *value_addr;
                     if (x != NULL) {
                         Py_INCREF(x);
                         PUSH(x);
                         DISPATCH();
                     }
                     d = (PyDictObject *)(f->f_builtins);
-                    e = d->ma_lookup(d, w, hash);
+                    e = d->ma_keys->dk_lookup(d, w, hash, &value_addr);
                     if (e == NULL) {
                         x = NULL;
                         break;
                     }
-                    x = e->me_value;
+                    x = *value_addr;
                     if (x != NULL) {
                         Py_INCREF(x);
                         PUSH(x);
diff -r 58bd6a58365d -r bc286099ce9a Tools/gdb/libpython.py
--- a/Tools/gdb/libpython.py	Wed Feb 08 04:09:37 2012 +0100
+++ b/Tools/gdb/libpython.py	Wed Feb 08 16:33:32 2012 +0000
@@ -634,9 +634,14 @@
         Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
         analagous to dict.iteritems()
         '''
-        for i in safe_range(self.field('ma_mask') + 1):
-            ep = self.field('ma_table') + i
-            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
+        keys = self.field('ma_keys')
+        values = self.field('ma_values')
+        for i in safe_range(keys['dk_size']):
+            ep = keys['dk_entries'].address + i
+            if long(values):
+                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
+            else:
+                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['_me_value'])
             if not pyop_value.is_null():
                 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
                 yield (pyop_key, pyop_value)