diff -r 58bd6a58365d -r a9138aba7896 Include/dictobject.h
--- a/Include/dictobject.h	Wed Feb 08 04:09:37 2012 +0100
+++ b/Include/dictobject.h	Wed Feb 08 13:38:20 2012 +0000
@@ -14,7 +14,7 @@
 */
 
 /*
-There are three kinds of slots in the table:
+There are four kinds of slots in the table:
 
 1. Unused.  me_key == me_value == NULL
    Does not hold an active (key, value) pair now and never did.  Unused can
@@ -32,59 +32,81 @@
    (cannot have me_key set to NULL), else the probe sequence in case of
    collision would have no way to know they were once active.
 
+4. Not yet inserted.  key != NULL, key != dummy and value == NULL
+   When sharing key-tables.
+
+The DictObject can be in one of two forms.
+Either:
+  A combined table:
+    ma_values == NULL, dk_refcnt == 1.
+    Values are stored in the _me_value field of the PyDictKeysObject.
+    Slot kind 4 is not allowed i.e.
+        key != NULL, key != dummy and value == NULL is illegal.
+Or:
+  A split table:
+    ma_values != NULL, dk_refcnt >= 1
+    Values are stored in the ma_values array.
+    Only string (unicode) keys are allowed, no <dummy> keys are present.
+
+This means that insertion of a non-string key or any deletion
+(including pop & popitem) causes a split table to become a combined table.
+
+
 Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
 hold a search finger.  The me_hash field of Unused or Dummy slots has no
 meaning otherwise.
 */
 
-/* PyDict_MINSIZE is the minimum size of a dictionary.  This many slots are
- * allocated directly in the dict object (in the ma_smalltable member).
+/* PyDict_MINSIZE is the minimum size of a dictionary.
  * It must be a power of 2, and at least 4.  8 allows dicts with no more
- * than 5 active entries to live in ma_smalltable (and so avoid an
- * additional malloc); instrumentation suggested this suffices for the
+ * than 5 active entries; experiments suggested this suffices for the
  * majority of dicts (consisting mostly of usually-small instance dicts and
  * usually-small dicts created to pass keyword arguments).
+ *
  */
 #ifndef Py_LIMITED_API
-#define PyDict_MINSIZE 8
+#define PyDict_MINSIZE 4
 
 typedef struct {
     /* Cached hash code of me_key. */
     Py_hash_t me_hash;
     PyObject *me_key;
-    PyObject *me_value;
-} PyDictEntry;
+    PyObject *_me_value; /* This field is only meaningful for combined tables */
+} PyDictKeyEntry;
 
 /*
 To ensure the lookup algorithm terminates, there must be at least one Unused
 slot (NULL key) in the table.
-The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
-ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
-values == the number of Active items).
 To avoid slowing down lookups on a near-full table, we resize the table when
-it's two-thirds full.
+it's USABLE_FRACTION (currently five-eighths) full.
 */
+
+/* Note that if the dk_free slot of a PyDictKeysObject is less than zero
+   then that PyDictKeysObject is *immutable* */
+typedef struct _dictkeysobject PyDictKeysObject;
 typedef struct _dictobject PyDictObject;
+
+/* The ma_values pointer is NULL for a combined table
+ * or points to an array of PyObject* for a split table
+ */
 struct _dictobject {
     PyObject_HEAD
-    Py_ssize_t ma_fill;  /* # Active + # Dummy */
-    Py_ssize_t ma_used;  /* # Active */
+    Py_ssize_t ma_used;
+    struct _dictkeysobject *ma_keys;
+    PyObject **ma_values;
+};
 
-    /* The table contains ma_mask + 1 slots, and that's a power of 2.
-     * We store the mask instead of the size because the mask is more
-     * frequently needed.
-     */
-    Py_ssize_t ma_mask;
+typedef PyDictKeyEntry *(*dict_lookup_func)
+(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr);
 
-    /* ma_table points to ma_smalltable for small tables, else to
-     * additional malloc'ed memory.  ma_table is never NULL!  This rule
-     * saves repeated runtime null-tests in the workhorse getitem and
-     * setitem calls.
-     */
-    PyDictEntry *ma_table;
-    PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
-    PyDictEntry ma_smalltable[PyDict_MINSIZE];
+struct _dictkeysobject {
+    Py_ssize_t dk_refcnt;
+    Py_ssize_t dk_size;
+    dict_lookup_func dk_lookup;
+    Py_ssize_t dk_free;
+    PyDictKeyEntry dk_entries[1];
 };
+
 #endif /* Py_LIMITED_API */
 
 PyAPI_DATA(PyTypeObject) PyDict_Type;
@@ -115,6 +137,8 @@
 PyAPI_FUNC(int) PyDict_Next(
     PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
 #ifndef Py_LIMITED_API
+PyDictKeysObject *_PyDict_NewKeysForClass(void);
+PyAPI_FUNC(PyObject *) PyDict_NewForInstance(PyTypeObject *tp);
 PyAPI_FUNC(int) _PyDict_Next(
     PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
 #endif
@@ -129,6 +153,7 @@
 PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
 PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
 PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
+#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
 
 PyAPI_FUNC(int) PyDict_ClearFreeList(void);
 #endif
@@ -158,6 +183,10 @@
 PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item);
 PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
 
+#ifndef Py_LIMITED_API
+int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff -r 58bd6a58365d -r a9138aba7896 Include/object.h
--- a/Include/object.h	Wed Feb 08 04:09:37 2012 +0100
+++ b/Include/object.h	Wed Feb 08 13:38:20 2012 +0000
@@ -448,6 +448,7 @@
                                       see add_operators() in typeobject.c . */
     PyBufferProcs as_buffer;
     PyObject *ht_name, *ht_slots, *ht_qualname;
+    struct _dictkeysobject *ht_cached_keys;
     /* here are optional user slots, followed by the members. */
 } PyHeapTypeObject;
 
diff -r 58bd6a58365d -r a9138aba7896 Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Wed Feb 08 04:09:37 2012 +0100
+++ b/Lib/test/test_sys.py	Wed Feb 08 13:38:20 2012 +0000
@@ -687,9 +687,9 @@
         # method-wrapper (descriptor object)
         check({}.__iter__, size(h + '2P'))
         # dict
-        check({}, size(h + '3P2P' + 8*'P2P'))
+        check({}, size(h + '3P' + '4P' + 8*'P2P'))
         longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
-        check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
+        check(longdict, size(h + '3P' + '4P') + 16*size('P2P'))
         # dictionary-keyiterator
         check({}.keys(), size(h + 'P'))
         # dictionary-valueiterator
@@ -831,7 +831,7 @@
         # type
         # (PyTypeObject + PyNumberMethods + PyMappingMethods +
         #  PySequenceMethods + PyBufferProcs)
-        s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 3P')
+        s = size(vh + 'P2P15Pl4PP9PP11PIP') + size('16Pi17P 3P 10P 2P 3P')
         check(int, s)
         # class
         class newstyleclass(object): pass
diff -r 58bd6a58365d -r a9138aba7896 Objects/dictnotes.txt
--- a/Objects/dictnotes.txt	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/dictnotes.txt	Wed Feb 08 13:38:20 2012 +0000
@@ -1,7 +1,6 @@
-NOTES ON OPTIMIZING DICTIONARIES
+NOTES ON DICTIONARIES
 ================================
 
-
 Principal Use Cases for Dictionaries
 ------------------------------------
 
@@ -21,7 +20,6 @@
 
 Builtins
     Frequent reads.  Almost never written.
-    Size 126 interned strings (as of Py2.3b1).
     A few keys are accessed much more frequently than others.
 
 Uniquification
@@ -59,34 +57,33 @@
     Characterized by deletions interspersed with adds and replacements.
     Performance benefits greatly from the re-use of dummy entries.
 
-
-Data Layout (assuming a 32-bit box with 64 bytes per cache line)
+Data Layout
 ----------------------------------------------------------------
 
-Smalldicts (8 entries) are attached to the dictobject structure
-and the whole group nearly fills two consecutive cache lines.
-
-Larger dicts use the first half of the dictobject structure (one cache
-line) and a separate, continuous block of entries (at 12 bytes each
-for a total of 5.333 entries per cache line).
+Dictionaries are composed of 3 components:
+The dictobject struct itself
+A dict-keys object (keys & hashes)
+A values array
 
 
 Tunable Dictionary Parameters
 -----------------------------
 
-* PyDict_MINSIZE.  Currently set to 8.
+* PyDict_MINSIZE.  Currently set to 4 (to keep instance dicts small).
     Must be a power of two.  New dicts have to zero-out every cell.
-    Each additional 8 consumes 1.5 cache lines.  Increasing improves
-    the sparseness of small dictionaries but costs time to read in
-    the additional cache lines if they are not already in cache.
+    Increasing improves the sparseness of small dictionaries but costs time
+    to read in the additional cache lines if they are not already in cache.
     That case is common when keyword arguments are passed.
 
-* Maximum dictionary load in PyDict_SetItem.  Currently set to 2/3.
+* Maximum dictionary load in PyDict_SetItem.  Currently set to 5/8.
     Increasing this ratio makes dictionaries more dense resulting
     in more collisions.  Decreasing it improves sparseness at the
     expense of spreading entries over more cache lines and at the
     cost of total memory consumed.
 
+    The load is designed to be as close to the historical optimised
+    value of 2/3, but 5/8 does not require a division, enabling it to
+    be cheaply recomputed during resizing.
     The load test occurs in highly time sensitive code.  Efforts
     to make the test more complex (for example, varying the load
     for different sizes) have degraded performance.
@@ -126,8 +123,8 @@
 Also, every dictionary iterates at least twice, once for the memset()
 when it is created and once by dealloc().
 
-Dictionary operations involving only a single key can be O(1) unless 
-resizing is possible.  By checking for a resize only when the 
+Dictionary operations involving only a single key can be O(1) unless
+resizing is possible.  By checking for a resize only when the
 dictionary can grow (and may *require* resizing), other operations
 remain O(1), and the odds of resize thrashing or memory fragmentation
 are reduced. In particular, an algorithm that empties a dictionary
@@ -135,136 +132,21 @@
 not be necessary at all because the dictionary is eventually
 discarded entirely.
 
+The key differences between this implementation and earlier versions are:
+    1. The table can be split into two parts, the keys and the values.
 
-Results of Cache Locality Experiments
--------------------------------------
+    2. There is an additional key-value combination: (key, NULL).
+       Unlike (<dummy>, NULL) which represents a deleted value, (key, NULL)
+       represented a yet to be inserted value. This combination can only occur
+       when the table is split.
 
-When an entry is retrieved from memory, 4.333 adjacent entries are also
-retrieved into a cache line.  Since accessing items in cache is *much*
-cheaper than a cache miss, an enticing idea is to probe the adjacent
-entries as a first step in collision resolution.  Unfortunately, the
-introduction of any regularity into collision searches results in more
-collisions than the current random chaining approach.
+    3. No small table embedded in the dict,
+       as this would make sharing of key-tables impossible.
 
-Exploiting cache locality at the expense of additional collisions fails
-to payoff when the entries are already loaded in cache (the expense
-is paid with no compensating benefit).  This occurs in small dictionaries
-where the whole dictionary fits into a pair of cache lines.  It also
-occurs frequently in large dictionaries which have a common access pattern
-where some keys are accessed much more frequently than others.  The
-more popular entries *and* their collision chains tend to remain in cache.
 
-To exploit cache locality, change the collision resolution section
-in lookdict() and lookdict_string().  Set i^=1 at the top of the
-loop and move the  i = (i << 2) + i + perturb + 1 to an unrolled
-version of the loop.
+These changes have the following consequences.
+   1. General dictionaries are slightly larger.
 
-This optimization strategy can be leveraged in several ways:
+   2. All object dictionaries of a single class can share a single key-table,
+      saving about 60% memory for such cases.
 
-* If the dictionary is kept sparse (through the tunable parameters),
-then the occurrence of additional collisions is lessened.
-
-* If lookdict() and lookdict_string() are specialized for small dicts
-and for largedicts, then the versions for large_dicts can be given
-an alternate search strategy without increasing collisions in small dicts
-which already have the maximum benefit of cache locality.
-
-* If the use case for a dictionary is known to have a random key
-access pattern (as opposed to a more common pattern with a Zipf's law
-distribution), then there will be more benefit for large dictionaries
-because any given key is no more likely than another to already be
-in cache.
-
-* In use cases with paired accesses to the same key, the second access
-is always in cache and gets no benefit from efforts to further improve
-cache locality.
-
-Optimizing the Search of Small Dictionaries
--------------------------------------------
-
-If lookdict() and lookdict_string() are specialized for smaller dictionaries,
-then a custom search approach can be implemented that exploits the small
-search space and cache locality.
-
-* The simplest example is a linear search of contiguous entries.  This is
-  simple to implement, guaranteed to terminate rapidly, never searches
-  the same entry twice, and precludes the need to check for dummy entries.
-
-* A more advanced example is a self-organizing search so that the most
-  frequently accessed entries get probed first.  The organization
-  adapts if the access pattern changes over time.  Treaps are ideally
-  suited for self-organization with the most common entries at the
-  top of the heap and a rapid binary search pattern.  Most probes and
-  results are all located at the top of the tree allowing them all to
-  be located in one or two cache lines.
-
-* Also, small dictionaries may be made more dense, perhaps filling all
-  eight cells to take the maximum advantage of two cache lines.
-
-
-Strategy Pattern
-----------------
-
-Consider allowing the user to set the tunable parameters or to select a
-particular search method.  Since some dictionary use cases have known
-sizes and access patterns, the user may be able to provide useful hints.
-
-1) For example, if membership testing or lookups dominate runtime and memory
-   is not at a premium, the user may benefit from setting the maximum load
-   ratio at 5% or 10% instead of the usual 66.7%.  This will sharply
-   curtail the number of collisions but will increase iteration time.
-   The builtin namespace is a prime example of a dictionary that can
-   benefit from being highly sparse.
-
-2) Dictionary creation time can be shortened in cases where the ultimate
-   size of the dictionary is known in advance.  The dictionary can be
-   pre-sized so that no resize operations are required during creation.
-   Not only does this save resizes, but the key insertion will go
-   more quickly because the first half of the keys will be inserted into
-   a more sparse environment than before.  The preconditions for this
-   strategy arise whenever a dictionary is created from a key or item
-   sequence and the number of *unique* keys is known.
-
-3) If the key space is large and the access pattern is known to be random,
-   then search strategies exploiting cache locality can be fruitful.
-   The preconditions for this strategy arise in simulations and
-   numerical analysis.
-
-4) If the keys are fixed and the access pattern strongly favors some of
-   the keys, then the entries can be stored contiguously and accessed
-   with a linear search or treap.  This exploits knowledge of the data,
-   cache locality, and a simplified search routine.  It also eliminates
-   the need to test for dummy entries on each probe.  The preconditions
-   for this strategy arise in symbol tables and in the builtin dictionary.
-
-
-Readonly Dictionaries
----------------------
-Some dictionary use cases pass through a build stage and then move to a
-more heavily exercised lookup stage with no further changes to the
-dictionary.
-
-An idea that emerged on python-dev is to be able to convert a dictionary
-to a read-only state.  This can help prevent programming errors and also
-provide knowledge that can be exploited for lookup optimization.
-
-The dictionary can be immediately rebuilt (eliminating dummy entries),
-resized (to an appropriate level of sparseness), and the keys can be
-jostled (to minimize collisions).  The lookdict() routine can then
-eliminate the test for dummy entries (saving about 1/4 of the time
-spent in the collision resolution loop).
-
-An additional possibility is to insert links into the empty spaces
-so that dictionary iteration can proceed in len(d) steps instead of
-(mp->mask + 1) steps.  Alternatively, a separate tuple of keys can be
-kept just for iteration.
-
-
-Caching Lookups
----------------
-The idea is to exploit key access patterns by anticipating future lookups
-based on previous lookups.
-
-The simplest incarnation is to save the most recently accessed entry.
-This gives optimal performance for use cases where every get is followed
-by a set or del to the same key.
diff -r 58bd6a58365d -r a9138aba7896 Objects/dictobject.c
--- a/Objects/dictobject.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/dictobject.c	Wed Feb 08 13:38:20 2012 +0000
@@ -10,7 +10,6 @@
 #include "Python.h"
 #include "stringlib/eq.h"
 
-
 /* Set a key error with the specified argument, wrapping it in a
  * tuple automatically so that tuple keys are not unpacked as the
  * exception arguments. */
@@ -25,10 +24,6 @@
     Py_DECREF(tup);
 }
 
-/* Define this out if you don't want conversion statistics on exit. */
-#undef SHOW_CONVERSION_COUNTS
-
-/* See large comment block below.  This must be >= 1. */
 #define PERTURB_SHIFT 5
 
 /*
@@ -126,8 +121,13 @@
 
 */
 
-/* Object used as dummy key to fill deleted entries */
-static PyObject *dummy = NULL; /* Initialized by first call to newPyDictObject() */
+/* Object used as dummy key to fill deleted entries
+ * This could be any unique object,
+ * use a custom type dict in order to minimise coupling.
+*/
+static PyObject _dummy_struct;
+
+#define dummy (&_dummy_struct)
 
 #ifdef Py_REF_DEBUG
 PyObject *
@@ -138,77 +138,18 @@
 #endif
 
 /* forward declarations */
-static PyDictEntry *
-lookdict_unicode(PyDictObject *mp, PyObject *key, Py_hash_t hash);
-
-#ifdef SHOW_CONVERSION_COUNTS
-static long created = 0L;
-static long converted = 0L;
-
-static void
-show_counts(void)
-{
-    fprintf(stderr, "created %ld string dicts\n", created);
-    fprintf(stderr, "converted %ld to normal dicts\n", converted);
-    fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created);
-}
-#endif
-
-/* Debug statistic to compare allocations with reuse through the free list */
-#undef SHOW_ALLOC_COUNT
-#ifdef SHOW_ALLOC_COUNT
-static size_t count_alloc = 0;
-static size_t count_reuse = 0;
-
-static void
-show_alloc(void)
-{
-    fprintf(stderr, "Dict allocations: %" PY_FORMAT_SIZE_T "d\n",
-        count_alloc);
-    fprintf(stderr, "Dict reuse through freelist: %" PY_FORMAT_SIZE_T
-        "d\n", count_reuse);
-    fprintf(stderr, "%.2f%% reuse rate\n\n",
-        (100.0*count_reuse/(count_alloc+count_reuse)));
-}
-#endif
-
-/* Debug statistic to count GC tracking of dicts */
-#ifdef SHOW_TRACK_COUNT
-static Py_ssize_t count_untracked = 0;
-static Py_ssize_t count_tracked = 0;
-
-static void
-show_track(void)
-{
-    fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
-        count_tracked + count_untracked);
-    fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
-        "d\n", count_tracked);
-    fprintf(stderr, "%.2f%% dict tracking rate\n\n",
-        (100.0*count_tracked/(count_untracked+count_tracked)));
-}
-#endif
-
-
-/* Initialization macros.
-   There are two ways to create a dict:  PyDict_New() is the main C API
-   function, and the tp_new slot maps to dict_new().  In the latter case we
-   can save a little time over what PyDict_New does because it's guaranteed
-   that the PyDictObject struct is already zeroed out.
-   Everyone except dict_new() should use EMPTY_TO_MINSIZE (unless they have
-   an excellent reason not to).
-*/
-
-#define INIT_NONZERO_DICT_SLOTS(mp) do {                                \
-    (mp)->ma_table = (mp)->ma_smalltable;                               \
-    (mp)->ma_mask = PyDict_MINSIZE - 1;                                 \
-    } while(0)
-
-#define EMPTY_TO_MINSIZE(mp) do {                                       \
-    memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));        \
-    (mp)->ma_used = (mp)->ma_fill = 0;                                  \
-    INIT_NONZERO_DICT_SLOTS(mp);                                        \
-    } while(0)
+static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,
+                                Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,
+                                        Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *
+lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr);
+static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,
+                                      Py_hash_t hash, PyObject ***value_addr);
+
+static int
+dictresize(PyDictObject *mp, Py_ssize_t minused);
 
 /* Dictionary reuse scheme to save calls to malloc, free, and memset */
 #ifndef PyDict_MAXFREELIST
@@ -236,61 +177,148 @@
     PyDict_ClearFreeList();
 }
 
-PyObject *
-PyDict_New(void)
+#define DK_INCREF(dk) (++(dk)->dk_refcnt)
+#define DK_DECREF(dk) if ((--(dk)->dk_refcnt) == 0) free_keys_object(dk)
+#define DK_SIZE(dk) ((dk)->dk_size)
+#define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
+
+/* USABLE_FRACTION must obey the following:
+ * USABLE_FRACTION(n) < n for n >= PyDict_MINSIZE
+ * USABLE_FRACTION(n) > 0 for n >= PyDict_MINSIZE
+ * 2n/3, (2n+1)/3 and n/2+n/4-n/8 (f(4)==3 otherwise f(n) == 5n/8)
+ * seem to work well in practice.
+ */
+/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for
+ * combined tables (the two fractions round to the same number n < ),
+ * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite
+ * a lot of space for small, split tables */
+#define USABLE_FRACTION(n) ((((n) << 1)+1)/3)
+
+/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make
+ * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
+ * 32 * 2/3 = 21, 32 * 5/8 = 20.
+ * Its advantage is that it is faster to compute on machines with slow division.
+ * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
+*/
+
+#define ENSURE_DELETEABLE(d, fail) \
+    if ((d)->ma_keys->dk_lookup == lookdict_unicode_nodummy) { \
+        (d)->ma_keys->dk_lookup = lookdict_unicode; \
+    } else if ((d)->ma_keys->dk_lookup == lookdict_split) { \
+        if (dictresize(d, DK_SIZE((d)->ma_keys))) \
+            return (fail); \
+        (d)->ma_keys->dk_lookup = lookdict_unicode; \
+    }
+
+static PyDictKeysObject *new_keys_object(Py_ssize_t size) {
+    PyDictKeysObject *dk;
+    Py_ssize_t i;
+    PyDictKeyEntry *ep0;
+
+    assert(size >= PyDict_MINSIZE);
+    assert(IS_POWER_OF_2(size));
+    dk = PyMem_Malloc(sizeof(PyDictKeysObject) +
+                      sizeof(PyDictKeyEntry) * (size-1));
+    if (dk == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    dk->dk_refcnt = 1;
+    dk->dk_size = size;
+    dk->dk_free = USABLE_FRACTION(size);
+    ep0 = &dk->dk_entries[0];
+    for (i = 0; i < size; i++) {
+        ep0[i].me_key = NULL;
+        ep0[i]._me_value = NULL;
+    }
+    dk->dk_lookup = lookdict_unicode_nodummy;
+    return dk;
+}
+
+PyDictKeysObject *
+_PyDict_NewKeysForClass(void)
 {
-    register PyDictObject *mp;
-    if (dummy == NULL) { /* Auto-initialize dummy */
-        dummy = PyUnicode_FromString("<dummy key>");
-        if (dummy == NULL)
-            return NULL;
-#ifdef SHOW_CONVERSION_COUNTS
-        Py_AtExit(show_counts);
-#endif
-#ifdef SHOW_ALLOC_COUNT
-        Py_AtExit(show_alloc);
-#endif
-#ifdef SHOW_TRACK_COUNT
-        Py_AtExit(show_track);
-#endif
+    PyDictKeysObject *keys = new_keys_object(4);
+    if (keys != NULL)
+        keys->dk_lookup = lookdict_split;
+    return keys;
+}
+
+static void
+free_keys_object(PyDictKeysObject *keys)
+{
+    PyDictKeyEntry *entries = &keys->dk_entries[0];
+    Py_ssize_t i, n;
+    for (i = 0, n = DK_SIZE(keys); i < n; i++) {
+        Py_XDECREF(entries[i].me_key);
+        Py_XDECREF(entries[i]._me_value);
     }
+    PyMem_DEL(keys);
+}
+
+/* Consumes a reference to the keys object */
+static PyObject *
+new_dict(PyDictKeysObject *keys, PyObject **values)
+{
+    PyDictObject *mp;
     if (numfree) {
         mp = free_list[--numfree];
         assert (mp != NULL);
         assert (Py_TYPE(mp) == &PyDict_Type);
         _Py_NewReference((PyObject *)mp);
-        if (mp->ma_fill) {
-            EMPTY_TO_MINSIZE(mp);
-        } else {
-            /* At least set ma_table and ma_mask; these are wrong
-               if an empty but presized dict is added to freelist */
-            INIT_NONZERO_DICT_SLOTS(mp);
-        }
-        assert (mp->ma_used == 0);
-        assert (mp->ma_table == mp->ma_smalltable);
-        assert (mp->ma_mask == PyDict_MINSIZE - 1);
-#ifdef SHOW_ALLOC_COUNT
-        count_reuse++;
-#endif
-    } else {
+    }
+    else {
         mp = PyObject_GC_New(PyDictObject, &PyDict_Type);
         if (mp == NULL)
             return NULL;
-        EMPTY_TO_MINSIZE(mp);
-#ifdef SHOW_ALLOC_COUNT
-        count_alloc++;
-#endif
+
     }
-    mp->ma_lookup = lookdict_unicode;
-#ifdef SHOW_TRACK_COUNT
-    count_untracked++;
-#endif
-#ifdef SHOW_CONVERSION_COUNTS
-    ++created;
-#endif
+    mp->ma_keys = keys;
+    mp->ma_values = (void *)values;
+    mp->ma_used = 0;
     return (PyObject *)mp;
 }
 
+#define new_values(size) PyMem_New(PyObject *, size)
+
+#define free_values(values) PyMem_DEL(values)
+
+/* Consumes a reference to the keys object */
+static PyObject *
+new_dict_with_shared_keys(PyDictKeysObject *keys) {
+    PyObject **values;
+    Py_ssize_t i, size;
+
+    size = DK_SIZE(keys);
+    values = new_values(size);
+    if (values == NULL) {
+        return PyErr_NoMemory();
+    }
+    for (i = 0; i < size; i++) {
+        values[i] = NULL;
+    }
+    return new_dict(keys, values);
+}
+
+PyObject *
+PyDict_New(void) {
+    return new_dict(new_keys_object(8), NULL);
+}
+
+#define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
+
+PyObject *
+PyDict_NewForInstance(PyTypeObject *tp) {
+    if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && CACHED_KEYS(tp)) {
+        DK_INCREF(CACHED_KEYS(tp));
+        return new_dict_with_shared_keys(CACHED_KEYS(tp));
+    } else {
+        return PyDict_New();
+    }
+}
+
+
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
@@ -309,29 +337,29 @@
 lookdict() is general-purpose, and may return NULL if (and only if) a
 comparison raises an exception (this was new in Python 2.5).
 lookdict_unicode() below is specialized to string keys, comparison of which can
-never raise an exception; that function can never return NULL.  For both, when
-the key isn't found a PyDictEntry* is returned for which the me_value field is
-NULL; this is the slot in the dict at which the key would have been found, and
-the caller can (if it wishes) add the <key, value> pair to the returned
-PyDictEntry*.
+never raise an exception; that function can never return NULL.
+lookdict_unicode_nodummy is further specialized for string keys that cannot be
+the <dummy> value.
 */
-static PyDictEntry *
-lookdict(PyDictObject *mp, PyObject *key, register Py_hash_t hash)
+static PyDictKeyEntry *
+lookdict(PyDictObject *mp, PyObject *key,
+         Py_hash_t hash, PyObject ***value_addr)
 {
     register size_t i;
     register size_t perturb;
-    register PyDictEntry *freeslot;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
+    register PyDictKeyEntry *freeslot;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
     register int cmp;
     PyObject *startkey;
 
     i = (size_t)hash & mask;
     ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key)
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
         return ep;
-
+    }
     if (ep->me_key == dummy)
         freeslot = ep;
     else {
@@ -342,9 +370,11 @@
             Py_DECREF(startkey);
             if (cmp < 0)
                 return NULL;
-            if (ep0 == mp->ma_table && ep->me_key == startkey) {
-                if (cmp > 0)
+            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+                if (cmp > 0) {
+                    *value_addr = &ep->_me_value;
                     return ep;
+                }
             }
             else {
                 /* The compare did major nasty stuff to the
@@ -352,7 +382,7 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return lookdict(mp, key, hash, value_addr);
             }
         }
         freeslot = NULL;
@@ -363,20 +393,33 @@
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
-        if (ep->me_key == NULL)
-            return freeslot == NULL ? ep : freeslot;
-        if (ep->me_key == key)
+        if (ep->me_key == NULL) {
+            if (freeslot == NULL) {
+                *value_addr = &ep->_me_value;
+                return ep;
+            } else {
+                *value_addr = &freeslot->_me_value;
+                return freeslot;
+            }
+        }
+        if (ep->me_key == key) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         if (ep->me_hash == hash && ep->me_key != dummy) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
-            if (cmp < 0)
+            if (cmp < 0) {
+                *value_addr = NULL;
                 return NULL;
-            if (ep0 == mp->ma_table && ep->me_key == startkey) {
-                if (cmp > 0)
+            }
+            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+                if (cmp > 0) {
+                    *value_addr = &ep->_me_value;
                     return ep;
+                }
             }
             else {
                 /* The compare did major nasty stuff to the
@@ -384,7 +427,7 @@
                  * XXX A clever adversary could prevent this
                  * XXX from terminating.
                  */
-                return lookdict(mp, key, hash);
+                return lookdict(mp, key, hash, value_addr);
             }
         }
         else if (ep->me_key == dummy && freeslot == NULL)
@@ -394,46 +437,39 @@
     return 0;
 }
 
-/*
- * Hacked up version of lookdict which can assume keys are always
- * unicodes; this assumption allows testing for errors during
- * PyObject_RichCompareBool() to be dropped; unicode-unicode
- * comparisons never raise exceptions.  This also means we don't need
- * to go through PyObject_RichCompareBool(); we can always use
- * unicode_eq() directly.
- *
- * This is valuable because dicts with only unicode keys are very common.
- */
-static PyDictEntry *
-lookdict_unicode(PyDictObject *mp, PyObject *key, register Py_hash_t hash)
+/* Specialized version for string-only keys */
+static PyDictKeyEntry *
+lookdict_unicode(PyDictObject *mp, PyObject *key,
+                 Py_hash_t hash, PyObject ***value_addr)
 {
     register size_t i;
     register size_t perturb;
-    register PyDictEntry *freeslot;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
+    register PyDictKeyEntry *freeslot;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
 
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
-#ifdef SHOW_CONVERSION_COUNTS
-        ++converted;
-#endif
-        mp->ma_lookup = lookdict;
-        return lookdict(mp, key, hash);
+        mp->ma_keys->dk_lookup = lookdict;
+        return lookdict(mp, key, hash, value_addr);
     }
     i = (size_t)hash & mask;
     ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key)
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
         return ep;
+    }
     if (ep->me_key == dummy)
         freeslot = ep;
     else {
-        if (ep->me_hash == hash && unicode_eq(ep->me_key, key))
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         freeslot = NULL;
     }
 
@@ -442,13 +478,22 @@
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
-        if (ep->me_key == NULL)
-            return freeslot == NULL ? ep : freeslot;
+        if (ep->me_key == NULL) {
+            if (freeslot == NULL) {
+                *value_addr = &ep->_me_value;
+                return ep;
+            } else {
+                *value_addr = &freeslot->_me_value;
+                return freeslot;
+            }
+        }
         if (ep->me_key == key
             || (ep->me_hash == hash
             && ep->me_key != dummy
-            && unicode_eq(ep->me_key, key)))
+            && unicode_eq(ep->me_key, key))) {
+            *value_addr = &ep->_me_value;
             return ep;
+        }
         if (ep->me_key == dummy && freeslot == NULL)
             freeslot = ep;
     }
@@ -456,6 +501,96 @@
     return 0;
 }
 
+/* Faster version of lookdict_unicode when it is known that no <dummy> keys
+ * will be present. */
+static PyDictKeyEntry *
+lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr)
+{
+    register size_t i;
+    register size_t perturb;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
+
+    /* Make sure this function doesn't have to handle non-unicode keys,
+       including subclasses of str; e.g., one reason to subclass
+       unicodes is to override __eq__, and for speed we don't cater to
+       that here. */
+    if (!PyUnicode_CheckExact(key)) {
+        mp->ma_keys->dk_lookup = lookdict;
+        return lookdict(mp, key, hash, value_addr);
+    }
+    i = (size_t)hash & mask;
+    ep = &ep0[i];
+    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &ep->_me_value;
+        return ep;
+    }
+    if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        *value_addr = &ep->_me_value;
+        return ep;
+    }
+    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+        i = (i << 2) + i + perturb + 1;
+        ep = &ep0[i & mask];
+        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == NULL || ep->me_key == key) {
+            *value_addr = &ep->_me_value;
+            return ep;
+        }
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &ep->_me_value;
+            return ep;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return 0;
+}
+
+/* Version of lookdict for split tables. */
+static PyDictKeyEntry *
+lookdict_split(PyDictObject *mp, PyObject *key,
+                         Py_hash_t hash, PyObject ***value_addr)
+{
+    register size_t i;
+    register size_t perturb;
+    register size_t mask = (size_t)mp->ma_keys->dk_size-1;
+    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+    register PyDictKeyEntry *ep;
+
+    if (!PyUnicode_CheckExact(key)) {
+        return lookdict(mp, key, hash, value_addr);
+    }
+    i = (size_t)hash & mask;
+    ep = &ep0[i];
+    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == NULL || ep->me_key == key) {
+        *value_addr = &mp->ma_values[i];
+        return ep;
+    }
+    if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        *value_addr = &mp->ma_values[i];
+        return ep;
+    }
+    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+        i = (i << 2) + i + perturb + 1;
+        ep = &ep0[i & mask];
+        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == NULL || ep->me_key == key) {
+            *value_addr = &mp->ma_values[i & mask];
+            return ep;
+        }
+        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+            *value_addr = &mp->ma_values[i & mask];
+            return ep;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return 0;
+}
+
 int
 _PyDict_HasOnlyStringKeys(PyObject *dict)
 {
@@ -463,7 +598,7 @@
     PyObject *key, *value;
     assert(PyDict_Check(dict));
     /* Shortcut */
-    if (((PyDictObject *)dict)->ma_lookup == lookdict_unicode)
+    if (((PyDictObject *)dict)->ma_keys->dk_lookup != lookdict)
         return 1;
     while (PyDict_Next(dict, &pos, &key, &value))
         if (!PyUnicode_Check(key))
@@ -471,15 +606,6 @@
     return 1;
 }
 
-#ifdef SHOW_TRACK_COUNT
-#define INCREASE_TRACK_COUNT \
-    (count_tracked++, count_untracked--);
-#define DECREASE_TRACK_COUNT \
-    (count_tracked--, count_untracked++);
-#else
-#define INCREASE_TRACK_COUNT
-#define DECREASE_TRACK_COUNT
-#endif
 
 #define MAINTAIN_TRACKING(mp, key, value) \
     do { \
@@ -487,7 +613,6 @@
             if (_PyObject_GC_MAY_BE_TRACKED(key) || \
                 _PyObject_GC_MAY_BE_TRACKED(value)) { \
                 _PyObject_GC_TRACK(mp); \
-                INCREASE_TRACK_COUNT \
             } \
         } \
     } while(0)
@@ -497,26 +622,42 @@
 {
     PyDictObject *mp;
     PyObject *value;
-    Py_ssize_t mask, i;
-    PyDictEntry *ep;
+    Py_ssize_t i, size;
 
     if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
         return;
 
     mp = (PyDictObject *) op;
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0; i <= mask; i++) {
-        if ((value = ep[i].me_value) == NULL)
-            continue;
-        if (_PyObject_GC_MAY_BE_TRACKED(value) ||
-            _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
-            return;
+    size = DK_SIZE(mp->ma_keys);
+    if (_PyDict_HasSplitTable(mp)) {
+        for (i = 0; i < size; i++) {
+            if ((value = mp->ma_values[i]) == NULL)
+                continue;
+            if (_PyObject_GC_MAY_BE_TRACKED(value)) {
+                assert(!_PyObject_GC_MAY_BE_TRACKED(mp->ma_keys->dk_entries[i].me_key));
+                return;
+            }
+        }
+    } else {
+        PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
+        for (i = 0; i < size; i++) {
+            if ((value = ep0[i]._me_value) == NULL)
+                continue;
+            if (_PyObject_GC_MAY_BE_TRACKED(value) ||
+                _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
+                return;
+        }
     }
-    DECREASE_TRACK_COUNT
     _PyObject_GC_UNTRACK(op);
 }
 
+static int
+insertion_resize(PyDictObject *mp)
+{
+    if (_PyDict_HasSplitTable(mp) || mp->ma_used > 50000)
+        return dictresize(mp, mp->ma_used * 2);
+    return dictresize(mp, mp->ma_used * 4);
+}
 
 /*
 Internal routine to insert a new item into the table.
@@ -525,38 +666,63 @@
 Returns -1 if an error occurred, or 0 on success.
 */
 static int
-insertdict(register PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
+insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 {
     PyObject *old_value;
-    register PyDictEntry *ep;
-    typedef PyDictEntry *(*lookupfunc)(PyDictObject *, PyObject *, Py_hash_t);
-
-    assert(mp->ma_lookup != NULL);
-    ep = mp->ma_lookup(mp, key, hash);
+    PyObject **value_addr;
+    PyDictKeyEntry *ep;
+    assert(key != dummy);
+
+    if (mp->ma_values != NULL) {
+        if (!PyUnicode_CheckExact(key))
+            if (insertion_resize(mp) < 0)
+                return -1;
+    }
+
+    ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
     if (ep == NULL) {
         Py_DECREF(key);
         Py_DECREF(value);
         return -1;
     }
     MAINTAIN_TRACKING(mp, key, value);
-    if (ep->me_value != NULL) {
-        old_value = ep->me_value;
-        ep->me_value = value;
+    old_value = *value_addr;
+    if (old_value != NULL) {
+        assert(ep->me_key != NULL && ep->me_key != dummy);
+        *value_addr = value;
         Py_DECREF(old_value); /* which **CAN** re-enter */
         Py_DECREF(key);
+    } else {
+        if (ep->me_key == NULL) {
+            if (mp->ma_keys->dk_free <= 0) {
+                /* Need to resize. */
+                if (insertion_resize(mp) < 0)
+                    return -1;
+                ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
+                if (ep == NULL) {
+                    Py_DECREF(key);
+                    Py_DECREF(value);
+                    return -1;
+                }
+            }
+            mp->ma_keys->dk_free--;
+            assert(mp->ma_keys->dk_free >= 0);
+            ep->me_key = key;
+            ep->me_hash = hash;
+        } else {
+            if (ep->me_key == dummy) {
+                ep->me_key = key;
+                ep->me_hash = hash;
+                Py_DECREF(dummy);
+            } else {
+                Py_DECREF(key);
+                assert(_PyDict_HasSplitTable(mp));
+            }
+        }
+        mp->ma_used++;
+        *value_addr = value;
     }
-    else {
-        if (ep->me_key == NULL)
-            mp->ma_fill++;
-        else {
-            assert(ep->me_key == dummy);
-            Py_DECREF(dummy);
-        }
-        ep->me_key = key;
-        ep->me_hash = hash;
-        ep->me_value = value;
-        mp->ma_used++;
-    }
+    assert(ep->me_key != NULL && ep->me_key != dummy);
     return 0;
 }
 
@@ -567,30 +733,39 @@
 using insertdict() in dictresize() is dangerous (SF bug #1456209).
 Note that no refcounts are changed by this routine; if needed, the caller
 is responsible for incref'ing `key` and `value`.
+Neither mp->ma_used nor k->dk_free are modified by this routine; the caller
+must set them correctly
 */
 static void
-insertdict_clean(register PyDictObject *mp, PyObject *key, Py_hash_t hash,
+insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
                  PyObject *value)
 {
-    register size_t i;
-    register size_t perturb;
-    register size_t mask = (size_t)mp->ma_mask;
-    PyDictEntry *ep0 = mp->ma_table;
-    register PyDictEntry *ep;
-
+    size_t i;
+    size_t perturb;
+    PyDictKeysObject *k = mp->ma_keys;
+    size_t mask = (size_t)DK_SIZE(k)-1;
+    PyDictKeyEntry *ep0 = &k->dk_entries[0];
+    PyDictKeyEntry *ep;
+
+    assert(k->dk_lookup != NULL);
+    assert(value != NULL);
+    assert(key != NULL);
+    assert(key != dummy);
+    if (!PyUnicode_CheckExact(key)) {
+        assert(mp->ma_values == NULL);
+        k->dk_lookup = lookdict;
+    }
     MAINTAIN_TRACKING(mp, key, value);
-    i = (size_t)hash & mask;
+    i = hash & mask;
     ep = &ep0[i];
     for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
         ep = &ep0[i & mask];
     }
-    assert(ep->me_value == NULL);
-    mp->ma_fill++;
+    assert(ep->_me_value == NULL);
     ep->me_key = key;
     ep->me_hash = hash;
-    ep->me_value = value;
-    mp->ma_used++;
+    ep->_me_value = value;
 }
 
 /*
@@ -602,14 +777,11 @@
 dictresize(PyDictObject *mp, Py_ssize_t minused)
 {
     Py_ssize_t newsize;
-    PyDictEntry *oldtable, *newtable, *ep;
-    Py_ssize_t i;
-    int is_oldtable_malloced;
-    PyDictEntry small_copy[PyDict_MINSIZE];
-
-    assert(minused >= 0);
-
-    /* Find the smallest table size > minused. */
+    PyDictKeysObject *oldkeys;
+    PyObject **oldvalues;
+    Py_ssize_t i, size;
+
+/* Find the smallest table size > minused. */
     for (newsize = PyDict_MINSIZE;
          newsize <= minused && newsize > 0;
          newsize <<= 1)
@@ -618,72 +790,92 @@
         PyErr_NoMemory();
         return -1;
     }
-
-    /* Get space for a new table. */
-    oldtable = mp->ma_table;
-    assert(oldtable != NULL);
-    is_oldtable_malloced = oldtable != mp->ma_smalltable;
-
-    if (newsize == PyDict_MINSIZE) {
-        /* A large table is shrinking, or we can't get any smaller. */
-        newtable = mp->ma_smalltable;
-        if (newtable == oldtable) {
-            if (mp->ma_fill == mp->ma_used) {
-                /* No dummies, so no point doing anything. */
-                return 0;
+    oldkeys = mp->ma_keys;
+    oldvalues = mp->ma_values;
+    /* Logic below assumes we can transfer refcount to new keys
+     * and that value is stored in _me_value.
+     * Increment ref-counts and copy values here to compensate
+     * This (resizing a split table) should be relatively rare */
+    size = DK_SIZE(oldkeys);
+    if (oldvalues != NULL) {
+        for (i = 0; i < size; i++)
+            if (oldvalues[i] != NULL) {
+                Py_INCREF(oldkeys->dk_entries[i].me_key);
+                oldkeys->dk_entries[i]._me_value = oldvalues[i];
             }
-            /* We're not going to resize it, but rebuild the
-               table anyway to purge old dummy entries.
-               Subtle:  This is *necessary* if fill==size,
-               as lookdict needs at least one virgin slot to
-               terminate failing searches.  If fill < size, it's
-               merely desirable, as dummies slow searches. */
-            assert(mp->ma_fill > mp->ma_used);
-            memcpy(small_copy, oldtable, sizeof(small_copy));
-            oldtable = small_copy;
+    }
+    /* Allocate a new table. */
+    mp->ma_keys = new_keys_object(newsize);
+    if (mp->ma_keys == NULL) {
+        mp->ma_keys = oldkeys;
+        return -1;
+    }
+    mp->ma_values = NULL;
+    for (i = 0; i < size; i++) {
+        PyDictKeyEntry *ep = &oldkeys->dk_entries[i];
+        if (ep->_me_value != NULL) {
+            assert(ep->me_key != dummy);
+            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->_me_value);
         }
     }
+    mp->ma_keys->dk_free -= mp->ma_used;
+    if (oldvalues != NULL) {
+        for (i = 0; i < size; i++)
+            oldkeys->dk_entries[i]._me_value = NULL;
+        free_values(oldvalues);
+        DK_DECREF(oldkeys);
+    }
     else {
-        newtable = PyMem_NEW(PyDictEntry, newsize);
-        if (newtable == NULL) {
-            PyErr_NoMemory();
-            return -1;
+        assert(oldkeys->dk_lookup != lookdict_split);
+        if (oldkeys->dk_lookup != lookdict_unicode_nodummy) {
+            PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0];
+            for (i = 0; i < size; i++) {
+                if (ep0[i].me_key == dummy)
+                    Py_DECREF(dummy);
+            }
         }
+        assert(oldkeys->dk_refcnt == 1);
+        PyMem_DEL(oldkeys);
     }
-
-    /* Make the dict empty, using the new table. */
-    assert(newtable != oldtable);
-    mp->ma_table = newtable;
-    mp->ma_mask = newsize - 1;
-    memset(newtable, 0, sizeof(PyDictEntry) * newsize);
-    mp->ma_used = 0;
-    i = mp->ma_fill;
-    mp->ma_fill = 0;
-
-    /* Copy the data over; this is refcount-neutral for active entries;
-       dummy entries aren't copied over, of course */
-    for (ep = oldtable; i > 0; ep++) {
-        if (ep->me_value != NULL) {             /* active entry */
-            --i;
-            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
-        }
-        else if (ep->me_key != NULL) {          /* dummy entry */
-            --i;
-            assert(ep->me_key == dummy);
-            Py_DECREF(ep->me_key);
-        }
-        /* else key == value == NULL:  nothing to do */
-    }
-
-    if (is_oldtable_malloced)
-        PyMem_DEL(oldtable);
     return 0;
 }
 
-/* Create a new dictionary pre-sized to hold an estimated number of elements.
-   Underestimates are okay because the dictionary will resize as necessary.
-   Overestimates just mean the dictionary will be more sparse than usual.
-*/
+static PyDictKeysObject *
+make_keys_shared(PyObject *op) {
+    Py_ssize_t i;
+    Py_ssize_t size;
+    PyDictObject *mp = (PyDictObject *)op;
+
+    assert(PyDict_CheckExact(op));
+    mp = (PyDictObject *)op;
+    if (!_PyDict_HasSplitTable(mp)) {
+        PyDictKeyEntry *ep0;
+        PyObject **values;
+        assert(mp->ma_keys->dk_refcnt == 1);
+        if (mp->ma_keys->dk_lookup == lookdict) {
+            return NULL;
+        } else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
+            /* Remove dummy keys */
+            if (dictresize(mp, DK_SIZE(mp->ma_keys)))
+                return NULL;
+        }
+        assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
+        /* Copy values into a new array */
+        ep0 = &mp->ma_keys->dk_entries[0];
+        values = new_values(DK_SIZE(mp->ma_keys));
+        if (values == NULL)
+            return NULL;
+        size = DK_SIZE(mp->ma_keys);
+        for (i = 0; i < size; i++) {
+            values[i] = ep0[i]._me_value;
+            ep0[i]._me_value = NULL;
+        }
+        mp->ma_keys->dk_lookup = lookdict_split;
+        mp->ma_values = values;
+    }
+    DK_INCREF(mp->ma_keys);
+    return mp->ma_keys;
+}
 
 PyObject *
 _PyDict_NewPresized(Py_ssize_t minused)
@@ -712,8 +904,10 @@
 {
     Py_hash_t hash;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
     PyThreadState *tstate;
+    PyObject **value_addr;
+
     if (!PyDict_Check(op))
         return NULL;
     if (!PyUnicode_CheckExact(key) ||
@@ -737,20 +931,20 @@
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
         PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_lookup)(mp, key, hash);
+        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
         /* ignore errors */
         PyErr_Restore(err_type, err_value, err_tb);
         if (ep == NULL)
             return NULL;
     }
     else {
-        ep = (mp->ma_lookup)(mp, key, hash);
+        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
         if (ep == NULL) {
             PyErr_Clear();
             return NULL;
         }
     }
-    return ep->me_value;
+    return *value_addr;
 }
 
 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
@@ -762,7 +956,8 @@
 {
     Py_hash_t hash;
     PyDictObject*mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
@@ -777,10 +972,10 @@
         }
     }
 
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    return ep->me_value;
+    return *value_addr;
 }
 
 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
@@ -790,12 +985,10 @@
  * remove them.
  */
 int
-PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
+PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
 {
-    register PyDictObject *mp;
-    register Py_hash_t hash;
-    register Py_ssize_t n_used;
-
+    PyDictObject *mp;
+    Py_hash_t hash;
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
         return -1;
@@ -810,38 +1003,21 @@
         if (hash == -1)
             return -1;
     }
-    assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
-    n_used = mp->ma_used;
     Py_INCREF(value);
     Py_INCREF(key);
-    if (insertdict(mp, key, hash, value) != 0)
-        return -1;
-    /* If we added a key, we can safely resize.  Otherwise just return!
-     * If fill >= 2/3 size, adjust size.  Normally, this doubles or
-     * quaduples the size, but it's also possible for the dict to shrink
-     * (if ma_fill is much larger than ma_used, meaning a lot of dict
-     * keys have been * deleted).
-     *
-     * Quadrupling the size improves average dictionary sparseness
-     * (reducing collisions) at the cost of some memory and iteration
-     * speed (which loops over every possible entry).  It also halves
-     * the number of expensive resize operations in a growing dictionary.
-     *
-     * Very large dictionaries (over 50K items) use doubling instead.
-     * This may help applications with severe memory constraints.
-     */
-    if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
-        return 0;
-    return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
+
+    /* insertdict() handles any resizing that might be necessary */
+    return insertdict(mp, key, hash, value);
 }
 
 int
 PyDict_DelItem(PyObject *op, PyObject *key)
 {
-    register PyDictObject *mp;
-    register Py_hash_t hash;
-    register PyDictEntry *ep;
-    PyObject *old_value, *old_key;
+    PyDictObject *mp;
+    Py_hash_t hash;
+    PyDictKeyEntry *ep;
+    PyObject *old_key, *old_value;
+    PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
@@ -855,91 +1031,95 @@
             return -1;
     }
     mp = (PyDictObject *)op;
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ENSURE_DELETEABLE(mp, -1);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return -1;
-    if (ep->me_value == NULL) {
+    if (*value_addr == NULL) {
         set_key_error(key);
         return -1;
     }
     old_key = ep->me_key;
     Py_INCREF(dummy);
     ep->me_key = dummy;
-    old_value = ep->me_value;
-    ep->me_value = NULL;
+    old_value = *value_addr;
+    *value_addr = NULL;
     mp->ma_used--;
     Py_DECREF(old_value);
     Py_DECREF(old_key);
     return 0;
 }
 
+/* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
+ * (which cannot fail and thus can do no allocation).
+ */
+static PyDictKeysObject empty_keys_struct = {
+        2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */
+        1, /* dk_size */
+        lookdict_unicode_nodummy, /* dk_lookup */
+        0, /* dk_free (immutable) */
+        {
+            { 0, 0, 0 } /* dk_entries (empty) */
+        }
+};
+
+#define Py_EMPTY_KEYS &empty_keys_struct
+
 void
 PyDict_Clear(PyObject *op)
 {
     PyDictObject *mp;
-    PyDictEntry *ep, *table;
-    int table_is_malloced;
-    Py_ssize_t fill;
-    PyDictEntry small_copy[PyDict_MINSIZE];
-#ifdef Py_DEBUG
+    PyDictKeysObject *oldkeys;
+    PyObject **oldvalues;
     Py_ssize_t i, n;
-#endif
-
+
+    mp = ((PyDictObject *)op);
     if (!PyDict_Check(op))
         return;
-    mp = (PyDictObject *)op;
-#ifdef Py_DEBUG
-    n = mp->ma_mask + 1;
-    i = 0;
-#endif
-
-    table = mp->ma_table;
-    assert(table != NULL);
-    table_is_malloced = table != mp->ma_smalltable;
-
-    /* This is delicate.  During the process of clearing the dict,
-     * decrefs can cause the dict to mutate.  To avoid fatal confusion
-     * (voice of experience), we have to make the dict empty before
-     * clearing the slots, and never refer to anything via mp->xxx while
-     * clearing.
-     */
-    fill = mp->ma_fill;
-    if (table_is_malloced)
-        EMPTY_TO_MINSIZE(mp);
-
-    else if (fill > 0) {
-        /* It's a small table with something that needs to be cleared.
-         * Afraid the only safe way is to copy the dict entries into
-         * another small table first.
-         */
-        memcpy(small_copy, table, sizeof(small_copy));
-        table = small_copy;
-        EMPTY_TO_MINSIZE(mp);
+    oldkeys = mp->ma_keys;
+    oldvalues = mp->ma_values;
+    /* Empty the dict... */
+    mp->ma_keys = Py_EMPTY_KEYS;
+    mp->ma_used = 0;
+    DK_INCREF(Py_EMPTY_KEYS);
+    mp->ma_values = NULL;
+    /* ...then clear the keys and values */
+    n = DK_SIZE(oldkeys);
+    if (oldvalues != NULL)
+        for (i = 0; i < n; i++)
+            Py_CLEAR(oldvalues[i]);
+    if (n == 8 && oldvalues == NULL) {
+        /* Reuse table */
+        Py_ssize_t i, n;
+        assert(oldkeys->dk_refcnt == 1);
+        for (i = 0, n = DK_SIZE(oldkeys); i < n; i++) {
+            Py_CLEAR(oldkeys->dk_entries[i].me_key);
+            Py_CLEAR(oldkeys->dk_entries[i]._me_value);
+        }
+        oldkeys->dk_free = USABLE_FRACTION(oldkeys->dk_size);
+        oldkeys->dk_lookup = lookdict_unicode_nodummy;
+        /* Check that dict is still empty */
+        if (mp->ma_keys == Py_EMPTY_KEYS) {
+            mp->ma_keys = oldkeys;
+            mp->ma_values = NULL;
+            assert(mp->ma_used == 0);
+        } else {
+            free_keys_object(oldkeys);
+        }
     }
-    /* else it's a small table that's already empty */
-
-    /* Now we can finally clear things.  If C had refcounts, we could
-     * assert that the refcount on table is 1 now, i.e. that this function
-     * has unique access to it, so decref side-effects can't alter it.
-     */
-    for (ep = table; fill > 0; ++ep) {
-#ifdef Py_DEBUG
-        assert(i < n);
-        ++i;
-#endif
-        if (ep->me_key) {
-            --fill;
-            Py_DECREF(ep->me_key);
-            Py_XDECREF(ep->me_value);
+    else {
+        if (oldvalues != NULL)
+            free_values(oldvalues);
+        DK_DECREF(oldkeys);
+        if (mp->ma_keys == Py_EMPTY_KEYS) {
+            mp->ma_keys = new_keys_object(8);
+            if (mp->ma_keys == NULL) {
+                mp->ma_keys = Py_EMPTY_KEYS;
+            }
+            else
+                DK_DECREF(Py_EMPTY_KEYS);
         }
-#ifdef Py_DEBUG
-        else
-            assert(ep->me_value == NULL);
-#endif
     }
-
-    if (table_is_malloced)
-        PyMem_DEL(table);
 }
 
 /*
@@ -960,26 +1140,38 @@
 int
 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t mask;
-    register PyDictEntry *ep;
+    Py_ssize_t i;
+    Py_ssize_t mask, offset;
+    PyDictObject *mp;
+    PyObject **value_ptr;
+
 
     if (!PyDict_Check(op))
         return 0;
+    mp = (PyDictObject *)op;
     i = *ppos;
     if (i < 0)
         return 0;
-    ep = ((PyDictObject *)op)->ma_table;
-    mask = ((PyDictObject *)op)->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    if (mp->ma_values) {
+        value_ptr = &mp->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(mp->ma_keys)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     *ppos = i+1;
     if (i > mask)
         return 0;
     if (pkey)
-        *pkey = ep[i].me_key;
+        *pkey = mp->ma_keys->dk_entries[i].me_key;
     if (pvalue)
-        *pvalue = ep[i].me_value;
+        *pvalue = *value_ptr;
     return 1;
 }
 
@@ -987,48 +1179,61 @@
 int
 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue, Py_hash_t *phash)
 {
-    register Py_ssize_t i;
-    register Py_ssize_t mask;
-    register PyDictEntry *ep;
+    Py_ssize_t i;
+    Py_ssize_t mask, offset;
+    PyDictObject *mp;
+    PyObject **value_ptr;
 
     if (!PyDict_Check(op))
         return 0;
+    mp = (PyDictObject *)op;
     i = *ppos;
     if (i < 0)
         return 0;
-    ep = ((PyDictObject *)op)->ma_table;
-    mask = ((PyDictObject *)op)->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    if (mp->ma_values) {
+        value_ptr = &mp->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(mp->ma_keys)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     *ppos = i+1;
     if (i > mask)
         return 0;
-    *phash = ep[i].me_hash;
+    *phash = mp->ma_keys->dk_entries[i].me_hash;
     if (pkey)
-        *pkey = ep[i].me_key;
+        *pkey = mp->ma_keys->dk_entries[i].me_key;
     if (pvalue)
-        *pvalue = ep[i].me_value;
+        *pvalue = *value_ptr;
     return 1;
 }
 
 /* Methods */
 
 static void
-dict_dealloc(register PyDictObject *mp)
+dict_dealloc(PyDictObject *mp)
 {
-    register PyDictEntry *ep;
-    Py_ssize_t fill = mp->ma_fill;
+    PyObject **values = mp->ma_values;
+    PyDictKeysObject *keys = mp->ma_keys;
+    Py_ssize_t i, n;
     PyObject_GC_UnTrack(mp);
     Py_TRASHCAN_SAFE_BEGIN(mp)
-    for (ep = mp->ma_table; fill > 0; ep++) {
-        if (ep->me_key) {
-            --fill;
-            Py_DECREF(ep->me_key);
-            Py_XDECREF(ep->me_value);
+    if (values != NULL) {
+        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            Py_XDECREF(values[i]);
         }
+        free_values(values);
+        DK_DECREF(keys);
     }
-    if (mp->ma_table != mp->ma_smalltable)
-        PyMem_DEL(mp->ma_table);
+    else {
+        free_keys_object(keys);
+    }
     if (numfree < PyDict_MAXFREELIST && Py_TYPE(mp) == &PyDict_Type)
         free_list[numfree++] = mp;
     else
@@ -1036,6 +1241,7 @@
     Py_TRASHCAN_SAFE_END(mp)
 }
 
+
 static PyObject *
 dict_repr(PyDictObject *mp)
 {
@@ -1068,10 +1274,12 @@
     while (PyDict_Next((PyObject *)mp, &i, &key, &value)) {
         int status;
         /* Prevent repr from deleting value during key format. */
+        Py_INCREF(key);
         Py_INCREF(value);
         s = PyObject_Repr(key);
         PyUnicode_Append(&s, colon);
         PyUnicode_AppendAndDel(&s, PyObject_Repr(value));
+        Py_DECREF(key);
         Py_DECREF(value);
         if (s == NULL)
             goto Done;
@@ -1126,18 +1334,19 @@
 {
     PyObject *v;
     Py_hash_t hash;
-    PyDictEntry *ep;
-    assert(mp->ma_table != NULL);
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
+
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    v = ep->me_value;
+    v = *value_addr;
     if (v == NULL) {
         if (!PyDict_CheckExact(mp)) {
             /* Look up __missing__ method if we're a subclass. */
@@ -1181,8 +1390,9 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j;
-    PyDictEntry *ep;
-    Py_ssize_t mask, n;
+    PyDictKeyEntry *ep;
+    Py_ssize_t size, n, offset;
+    PyObject **value_ptr;
 
   again:
     n = mp->ma_used;
@@ -1196,15 +1406,24 @@
         Py_DECREF(v);
         goto again;
     }
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if (ep[i].me_value != NULL) {
+    ep = &mp->ma_keys->dk_entries[0];
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &ep[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        if (*value_ptr != NULL) {
             PyObject *key = ep[i].me_key;
             Py_INCREF(key);
             PyList_SET_ITEM(v, j, key);
             j++;
         }
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
     }
     assert(j == n);
     return v;
@@ -1215,8 +1434,8 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j;
-    PyDictEntry *ep;
-    Py_ssize_t mask, n;
+    Py_ssize_t size, n, offset;
+    PyObject **value_ptr;
 
   again:
     n = mp->ma_used;
@@ -1230,11 +1449,19 @@
         Py_DECREF(v);
         goto again;
     }
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if (ep[i].me_value != NULL) {
-            PyObject *value = ep[i].me_value;
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &mp->ma_keys->dk_entries[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        PyObject *value = *value_ptr;
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
+        if (value != NULL) {
             Py_INCREF(value);
             PyList_SET_ITEM(v, j, value);
             j++;
@@ -1249,9 +1476,10 @@
 {
     register PyObject *v;
     register Py_ssize_t i, j, n;
-    Py_ssize_t mask;
-    PyObject *item, *key, *value;
-    PyDictEntry *ep;
+    Py_ssize_t size, offset;
+    PyObject *item, *key;
+    PyDictKeyEntry *ep;
+    PyObject **value_ptr;
 
     /* Preallocate the list of tuples, to avoid allocations during
      * the loop over the items, which could trigger GC, which
@@ -1278,10 +1506,20 @@
         goto again;
     }
     /* Nothing we do below makes any function calls. */
-    ep = mp->ma_table;
-    mask = mp->ma_mask;
-    for (i = 0, j = 0; i <= mask; i++) {
-        if ((value=ep[i].me_value) != NULL) {
+    ep = mp->ma_keys->dk_entries;
+    size = DK_SIZE(mp->ma_keys);
+    if (mp->ma_values) {
+        value_ptr = mp->ma_values;
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &ep[0]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    for (i = 0, j = 0; i < size; i++) {
+        PyObject *value = *value_ptr;
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
+        if (value != NULL) {
             key = ep[i].me_key;
             item = PyList_GET_ITEM(v, j);
             Py_INCREF(key);
@@ -1323,7 +1561,6 @@
             Py_DECREF(d);
             return NULL;
         }
-
         while (_PyDict_Next(seq, &pos, &key, &oldvalue, &hash)) {
             Py_INCREF(key);
             Py_INCREF(value);
@@ -1341,7 +1578,7 @@
         PyObject *key;
         Py_hash_t hash;
 
-        if (dictresize(mp, PySet_GET_SIZE(seq))) {
+        if (dictresize(mp, PySet_GET_SIZE(seq))){
             Py_DECREF(d);
             return NULL;
         }
@@ -1349,7 +1586,7 @@
         while (_PySet_NextEntry(seq, &pos, &key, &hash)) {
             Py_INCREF(key);
             Py_INCREF(value);
-            if (insertdict(mp, key, hash, value)) {
+            if (insertdict(mp, key, hash, value)){
                 Py_DECREF(d);
                 return NULL;
             }
@@ -1513,8 +1750,8 @@
 PyDict_Merge(PyObject *a, PyObject *b, int override)
 {
     register PyDictObject *mp, *other;
-    register Py_ssize_t i;
-    PyDictEntry *entry;
+    register Py_ssize_t i, n;
+    PyDictKeyEntry *entry;
 
     /* We accept for the argument either a concrete dictionary object,
      * or an abstract "mapping" object.  For the former, we can do
@@ -1541,20 +1778,25 @@
          * incrementally resizing as we insert new items.  Expect
          * that there will be no (or few) overlapping keys.
          */
-        if ((mp->ma_fill + other->ma_used)*3 >= (mp->ma_mask+1)*2) {
-           if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
+        if (mp->ma_keys->dk_free * 3 < other->ma_used * 2)
+            if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
                return -1;
-        }
-        for (i = 0; i <= other->ma_mask; i++) {
-            entry = &other->ma_table[i];
-            if (entry->me_value != NULL &&
+        for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) {
+            PyObject *value;
+            entry = &other->ma_keys->dk_entries[i];
+            if (other->ma_values)
+                value = other->ma_values[i];
+            else
+                value = entry->_me_value;
+
+            if (value != NULL &&
                 (override ||
                  PyDict_GetItem(a, entry->me_key) == NULL)) {
                 Py_INCREF(entry->me_key);
-                Py_INCREF(entry->me_value);
+                Py_INCREF(value);
                 if (insertdict(mp, entry->me_key,
                                entry->me_hash,
-                               entry->me_value) != 0)
+                               value) != 0)
                     return -1;
             }
         }
@@ -1616,11 +1858,35 @@
 PyDict_Copy(PyObject *o)
 {
     PyObject *copy;
+    PyDictObject *mp;
+    Py_ssize_t i, n;
 
     if (o == NULL || !PyDict_Check(o)) {
         PyErr_BadInternalCall();
         return NULL;
     }
+    mp = (PyDictObject *)o;
+    if (_PyDict_HasSplitTable(mp)) {
+        PyDictObject *split_copy;
+        PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys));
+        if (newvalues == NULL)
+            return PyErr_NoMemory();
+        split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
+        if (split_copy == NULL) {
+            PyMem_DEL(newvalues);
+            return NULL;
+        }
+        split_copy->ma_values = newvalues;
+        split_copy->ma_keys = mp->ma_keys;
+        split_copy->ma_used = mp->ma_used;
+        DK_INCREF(mp->ma_keys);
+        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            PyObject *value = mp->ma_values[i];
+            Py_XINCREF(value);
+            split_copy->ma_values[i] = value;
+        }
+        return (PyObject *)split_copy;
+    }
     copy = PyDict_New();
     if (copy == NULL)
         return NULL;
@@ -1682,14 +1948,18 @@
     if (a->ma_used != b->ma_used)
         /* can't be equal if # of entries differ */
         return 0;
-
     /* Same # of entries -- check all of 'em.  Exit early on any diff. */
-    for (i = 0; i <= a->ma_mask; i++) {
-        PyObject *aval = a->ma_table[i].me_value;
+    for (i = 0; i < DK_SIZE(a->ma_keys); i++) {
+        PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i];
+        PyObject *aval;
+        if (a->ma_values)
+            aval = a->ma_values[i];
+        else
+            aval = ep->_me_value;
         if (aval != NULL) {
             int cmp;
             PyObject *bval;
-            PyObject *key = a->ma_table[i].me_key;
+            PyObject *key = ep->me_key;
             /* temporarily bump aval's refcount to ensure it stays
                alive until we're done with it */
             Py_INCREF(aval);
@@ -1710,7 +1980,7 @@
         }
     }
     return 1;
- }
+}
 
 static PyObject *
 dict_richcompare(PyObject *v, PyObject *w, int op)
@@ -1731,13 +2001,14 @@
         res = Py_NotImplemented;
     Py_INCREF(res);
     return res;
- }
+}
 
 static PyObject *
 dict_contains(register PyDictObject *mp, PyObject *key)
 {
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -1745,10 +2016,10 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    return PyBool_FromLong(ep->me_value != NULL);
+    return PyBool_FromLong(*value_addr != NULL);
 }
 
 static PyObject *
@@ -1758,7 +2029,8 @@
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
         return NULL;
@@ -1769,17 +2041,16 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    val = ep->me_value;
+    val = *value_addr;
     if (val == NULL)
         val = failobj;
     Py_INCREF(val);
     return val;
 }
 
-
 static PyObject *
 dict_setdefault(register PyDictObject *mp, PyObject *args)
 {
@@ -1787,7 +2058,8 @@
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "setdefault", 1, 2, &key, &failobj))
         return NULL;
@@ -1798,14 +2070,17 @@
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    val = ep->me_value;
+    val = *value_addr;
     if (val == NULL) {
-        val = failobj;
-        if (PyDict_SetItem((PyObject*)mp, key, failobj))
+        Py_INCREF(failobj);
+        Py_INCREF(key);
+        if (insertdict(mp, key, hash, failobj))
             val = NULL;
+        else
+            val = failobj;
     }
     Py_XINCREF(val);
     return val;
@@ -1823,9 +2098,10 @@
 dict_pop(PyDictObject *mp, PyObject *args)
 {
     Py_hash_t hash;
-    PyDictEntry *ep;
     PyObject *old_value, *old_key;
     PyObject *key, *deflt = NULL;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if(!PyArg_UnpackTuple(args, "pop", 1, 2, &key, &deflt))
         return NULL;
@@ -1833,20 +2109,20 @@
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
-        }
-        set_key_error(key);
+        }        set_key_error(key);
         return NULL;
     }
+    ENSURE_DELETEABLE(mp, NULL);
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
     if (ep == NULL)
         return NULL;
-    if (ep->me_value == NULL) {
+    if (ep->_me_value == NULL) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
@@ -1857,8 +2133,8 @@
     old_key = ep->me_key;
     Py_INCREF(dummy);
     ep->me_key = dummy;
-    old_value = ep->me_value;
-    ep->me_value = NULL;
+    old_value = *value_addr;
+    *value_addr = NULL;
     mp->ma_used--;
     Py_DECREF(old_key);
     return old_value;
@@ -1868,9 +2144,10 @@
 dict_popitem(PyDictObject *mp)
 {
     Py_hash_t i = 0;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
     PyObject *res;
 
+
     /* Allocate the result tuple before checking the size.  Believe it
      * or not, this allocation could trigger a garbage collection which
      * could empty the dict, so if we checked the size first and that
@@ -1889,49 +2166,64 @@
                         "popitem(): dictionary is empty");
         return NULL;
     }
+    ENSURE_DELETEABLE(mp, NULL);
     /* Set ep to "the first" dict entry with a value.  We abuse the hash
      * field of slot 0 to hold a search finger:
      * If slot 0 has a value, use slot 0.
      * Else slot 0 is being used to hold a search finger,
      * and we use its hash value as the first index to look.
      */
-    ep = &mp->ma_table[0];
-    if (ep->me_value == NULL) {
+    ep = &mp->ma_keys->dk_entries[0];
+    if (ep->_me_value == NULL) {
+        Py_ssize_t mask = DK_SIZE(mp->ma_keys)-1;
         i = ep->me_hash;
         /* The hash field may be a real hash value, or it may be a
          * legit search finger, or it may be a once-legit search
          * finger that's out of bounds now because it wrapped around
          * or the table shrunk -- simply make sure it's in bounds now.
          */
-        if (i > mp->ma_mask || i < 1)
+        if (i > mask || i < 1)
             i = 1;              /* skip slot 0 */
-        while ((ep = &mp->ma_table[i])->me_value == NULL) {
+        while ((ep = &mp->ma_keys->dk_entries[i])->_me_value == NULL) {
             i++;
-            if (i > mp->ma_mask)
+            if (i > mask)
                 i = 1;
         }
     }
     PyTuple_SET_ITEM(res, 0, ep->me_key);
-    PyTuple_SET_ITEM(res, 1, ep->me_value);
+    PyTuple_SET_ITEM(res, 1, ep->_me_value);
     Py_INCREF(dummy);
     ep->me_key = dummy;
-    ep->me_value = NULL;
+    ep->_me_value = NULL;
     mp->ma_used--;
-    assert(mp->ma_table[0].me_value == NULL);
-    mp->ma_table[0].me_hash = i + 1;  /* next place to start */
+    assert(mp->ma_keys->dk_entries[0].me_value == NULL);
+    mp->ma_keys->dk_entries[0].me_hash = i + 1;  /* next place to start */
     return res;
 }
 
 static int
 dict_traverse(PyObject *op, visitproc visit, void *arg)
 {
-    Py_ssize_t i = 0;
-    PyObject *pk;
-    PyObject *pv;
-
-    while (PyDict_Next(op, &i, &pk, &pv)) {
-        Py_VISIT(pk);
-        Py_VISIT(pv);
+    Py_ssize_t i, n;
+    PyDictObject *mp = (PyDictObject *)op;
+    if (mp->ma_keys->dk_lookup == lookdict) {
+        for (i = 0; i < DK_SIZE(mp->ma_keys); i++) {
+            if (mp->ma_keys->dk_entries[i]._me_value != NULL) {
+                Py_VISIT(mp->ma_keys->dk_entries[i]._me_value);
+                Py_VISIT(mp->ma_keys->dk_entries[i].me_key);
+            }
+        }
+    } else {
+        if (mp->ma_values != NULL) {
+            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+                Py_VISIT(mp->ma_values[i]);
+            }
+        }
+        else {
+            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+                Py_VISIT(mp->ma_keys->dk_entries[i]._me_value);
+            }
+        }
     }
     return 0;
 }
@@ -1948,12 +2240,22 @@
 static PyObject *
 dict_sizeof(PyDictObject *mp)
 {
-    Py_ssize_t res;
-
+    Py_ssize_t size;
+    double res, keys_size;
+
+    size = DK_SIZE(mp->ma_keys);
     res = sizeof(PyDictObject);
-    if (mp->ma_table != mp->ma_smalltable)
-        res = res + (mp->ma_mask + 1) * sizeof(PyDictEntry);
-    return PyLong_FromSsize_t(res);
+    if (mp->ma_values)
+        res += size * sizeof(PyObject*);
+    /* Count our share of the keys object -- with rounding errors. */
+    keys_size = sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry);
+    /* If refcnt > 1, then one count is (probably) held by a type */
+    /* XXX  This is somewhat approximate :) */
+    if (mp->ma_keys->dk_refcnt < 3)
+        res += keys_size;
+    else
+        res += keys_size / (mp->ma_keys->dk_refcnt - 1);
+    return PyFloat_FromDouble(res);
 }
 
 PyDoc_STRVAR(contains__doc__,
@@ -2044,7 +2346,8 @@
 {
     Py_hash_t hash;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
@@ -2052,8 +2355,8 @@
         if (hash == -1)
             return -1;
     }
-    ep = (mp->ma_lookup)(mp, key, hash);
-    return ep == NULL ? -1 : (ep->me_value != NULL);
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+    return (ep == NULL) ? -1 : (*value_addr != NULL);
 }
 
 /* Internal version of PyDict_Contains used when the hash value is already known */
@@ -2061,10 +2364,11 @@
 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
 {
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictEntry *ep;
-
-    ep = (mp->ma_lookup)(mp, key, hash);
-    return ep == NULL ? -1 : (ep->me_value != NULL);
+    PyDictKeyEntry *ep;
+    PyObject **value_addr;
+
+    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+    return (ep == NULL) ? -1 : (*value_addr != NULL);
 }
 
 /* Hack to implement "key in dict" */
@@ -2081,6 +2385,7 @@
     0,                          /* sq_inplace_repeat */
 };
 
+
 static PyObject *
 dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@@ -2090,22 +2395,15 @@
     self = type->tp_alloc(type, 0);
     if (self != NULL) {
         PyDictObject *d = (PyDictObject *)self;
-        /* It's guaranteed that tp->alloc zeroed out the struct. */
-        assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
-        INIT_NONZERO_DICT_SLOTS(d);
-        d->ma_lookup = lookdict_unicode;
+        d->ma_keys = new_keys_object(8);
+        /* XXX - Should we raise a no-memory error? */
+        if (d->ma_keys == NULL) {
+            DK_INCREF(Py_EMPTY_KEYS);
+            d->ma_keys = Py_EMPTY_KEYS;
+        }
         /* The object has been implicitly tracked by tp_alloc */
         if (type == &PyDict_Type)
             _PyObject_GC_UNTRACK(d);
-#ifdef SHOW_CONVERSION_COUNTS
-        ++created;
-#endif
-#ifdef SHOW_TRACK_COUNT
-        if (_PyObject_GC_IS_TRACKED(d))
-            count_tracked++;
-        else
-            count_untracked++;
-#endif
     }
     return self;
 }
@@ -2278,6 +2576,7 @@
     return PyLong_FromSize_t(len);
 }
 
+
 PyDoc_STRVAR(length_hint_doc,
              "Private method returning an estimate of len(list(it)).");
 
@@ -2290,9 +2589,10 @@
 static PyObject *dictiter_iternextkey(dictiterobject *di)
 {
     PyObject *key;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
+    register PyDictKeysObject *k;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2308,15 +2608,25 @@
     i = di->di_pos;
     if (i < 0)
         goto fail;
-    ep = d->ma_table;
-    mask = d->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    k = d->ma_keys;
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &k->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    mask = DK_SIZE(k)-1;
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     di->di_pos = i+1;
     if (i > mask)
         goto fail;
     di->len--;
-    key = ep[i].me_key;
+    key = k->dk_entries[i].me_key;
     Py_INCREF(key);
     return key;
 
@@ -2362,9 +2672,9 @@
 static PyObject *dictiter_iternextvalue(dictiterobject *di)
 {
     PyObject *value;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2378,17 +2688,26 @@
     }
 
     i = di->di_pos;
-    mask = d->ma_mask;
+    mask = DK_SIZE(d->ma_keys)-1;
     if (i < 0 || i > mask)
         goto fail;
-    ep = d->ma_table;
-    while ((value=ep[i].me_value) == NULL) {
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &d->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
         if (i > mask)
             goto fail;
     }
     di->di_pos = i+1;
     di->len--;
+    value = *value_ptr;
     Py_INCREF(value);
     return value;
 
@@ -2434,9 +2753,9 @@
 static PyObject *dictiter_iternextitem(dictiterobject *di)
 {
     PyObject *key, *value, *result = di->di_result;
-    register Py_ssize_t i, mask;
-    register PyDictEntry *ep;
+    register Py_ssize_t i, mask, offset;
     PyDictObject *d = di->di_dict;
+    PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
@@ -2452,10 +2771,19 @@
     i = di->di_pos;
     if (i < 0)
         goto fail;
-    ep = d->ma_table;
-    mask = d->ma_mask;
-    while (i <= mask && ep[i].me_value == NULL)
+    mask = DK_SIZE(d->ma_keys)-1;
+    if (d->ma_values) {
+        value_ptr = &d->ma_values[i];
+        offset = sizeof(PyObject *);
+    }
+    else {
+        value_ptr = &d->ma_keys->dk_entries[i]._me_value;
+        offset = sizeof(PyDictKeyEntry);
+    }
+    while (i <= mask && *value_ptr == NULL) {
+        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
+    }
     di->di_pos = i+1;
     if (i > mask)
         goto fail;
@@ -2470,8 +2798,8 @@
             return NULL;
     }
     di->len--;
-    key = ep[i].me_key;
-    value = ep[i].me_value;
+    key = d->ma_keys->dk_entries[i].me_key;
+    value = *value_ptr;
     Py_INCREF(key);
     Py_INCREF(value);
     PyTuple_SET_ITEM(result, 0, key);
@@ -2585,6 +2913,7 @@
    - if public then they should probably be in builtins
 */
 
+
 /* Return 1 if self is a subset of other, iterating over self;
    0 if not; -1 if an error occurred. */
 static int
@@ -2611,6 +2940,7 @@
     return ok;
 }
 
+
 static PyObject *
 dictview_richcompare(PyObject *self, PyObject *other, int op)
 {
@@ -2722,7 +3052,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(difference_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2742,7 +3071,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(intersection_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2762,7 +3090,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(update);
-
     if (result == NULL)
         return NULL;
 
@@ -2782,7 +3109,6 @@
     PyObject *result = PySet_New(self);
     PyObject *tmp;
     _Py_IDENTIFIER(symmetric_difference_update);
-
     if (result == NULL)
         return NULL;
 
@@ -2915,6 +3241,7 @@
     return dictview_new(dict, &PyDictKeys_Type);
 }
 
+
 /*** dict_items ***/
 
 static PyObject *
@@ -3001,6 +3328,8 @@
     return dictview_new(dict, &PyDictItems_Type);
 }
 
+
+
 /*** dict_values ***/
 
 static PyObject *
@@ -3065,3 +3394,113 @@
 {
     return dictview_new(dict, &PyDictValues_Type);
 }
+
+int
+_PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
+                     PyObject *key, PyObject *value) {
+    PyObject *dict;
+    int res;
+    PyDictKeysObject *cached;
+
+    assert(dictptr != NULL);
+    if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
+        assert(dictptr != NULL);
+        dict = *dictptr;
+        if (dict == NULL) {
+            DK_INCREF(cached);
+            dict = new_dict_with_shared_keys(cached);
+            if (dict == NULL)
+                return -1;
+        }
+        *dictptr = dict;
+        if (value == NULL) {
+            res = PyDict_DelItem(dict, key);
+            if (cached != ((PyDictObject *)dict)->ma_keys) {
+                CACHED_KEYS(tp) = NULL;
+                DK_DECREF(cached);
+            }
+        } else {
+            res = PyDict_SetItem(dict, key, value);
+            if (cached != ((PyDictObject *)dict)->ma_keys) {
+                /* Either update tp->ht_cached_keys or delete it */
+                if (cached->dk_refcnt == 1) {
+                    CACHED_KEYS(tp) = make_keys_shared(dict);
+                } else {
+                    CACHED_KEYS(tp) = NULL;
+                }
+                DK_DECREF(cached);
+            }
+        }
+    } else {
+        dict = *dictptr;
+        if (dict == NULL) {
+            dict = PyDict_New();
+            if (dict == NULL)
+                return -1;
+            *dictptr = dict;
+        }
+        if (value == NULL) {
+            res = PyDict_DelItem(dict, key);
+        } else {
+            res = PyDict_SetItem(dict, key, value);
+        }
+    }
+    return res;
+}
+
+void
+_PyDictKeys_DecRef(PyDictKeysObject *keys)
+{
+    DK_DECREF(keys);
+}
+
+
+/* ARGSUSED */
+static PyObject *
+dummy_repr(PyObject *op)
+{
+    return PyUnicode_FromString("<dummy key>");
+}
+
+/* ARGUSED */
+static void
+dummy_dealloc(PyObject* ignore)
+{
+    /* This should never get called, but we also don't want to SEGV if
+     * we accidentally decref dummy-key out of existence.
+     */
+    Py_FatalError("deallocating <dummy key>");
+}
+
+static PyTypeObject PyDictDummy_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "NoneType",
+    0,
+    0,
+    dummy_dealloc,      /*tp_dealloc*/ /*never called*/
+    0,                  /*tp_print*/
+    0,                  /*tp_getattr*/
+    0,                  /*tp_setattr*/
+    0,                  /*tp_reserved*/
+    dummy_repr,         /*tp_repr*/
+    0,                  /*tp_as_number*/
+    0,                  /*tp_as_sequence*/
+    0,                  /*tp_as_mapping*/
+    0,                  /*tp_hash */
+    0,                  /*tp_call */
+    0,                  /*tp_str */
+    0,                  /*tp_getattro */
+    0,                  /*tp_setattro */
+    0,                  /*tp_as_buffer */
+    Py_TPFLAGS_DEFAULT, /*tp_flags */
+};
+
+static PyObject _dummy_struct = {
+  _PyObject_EXTRA_INIT
+  2, &PyDictDummy_Type
+};
+
+
+
+
+
diff -r 58bd6a58365d -r a9138aba7896 Objects/object.c
--- a/Objects/object.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/object.c	Wed Feb 08 13:38:20 2012 +0000
@@ -1163,13 +1163,10 @@
     if (dict == NULL) {
         dictptr = _PyObject_GetDictPtr(obj);
         if (dictptr != NULL) {
-            dict = *dictptr;
-            if (dict == NULL && value != NULL) {
-                dict = PyDict_New();
-                if (dict == NULL)
-                    goto done;
-                *dictptr = dict;
-            }
+            res = _PyObjectDict_SetItem(Py_TYPE(obj), dictptr, name, value);
+            if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
+                PyErr_SetObject(PyExc_AttributeError, name);
+            goto done;
         }
     }
     if (dict != NULL) {
diff -r 58bd6a58365d -r a9138aba7896 Objects/typeobject.c
--- a/Objects/typeobject.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Objects/typeobject.c	Wed Feb 08 13:38:20 2012 +0000
@@ -1787,7 +1787,7 @@
     }
     dict = *dictptr;
     if (dict == NULL)
-        *dictptr = dict = PyDict_New();
+        *dictptr = dict = PyDict_NewForInstance(Py_TYPE(obj));
     Py_XINCREF(dict);
     return dict;
 }
@@ -2335,6 +2335,9 @@
             type->tp_dictoffset = slotoffset;
         slotoffset += sizeof(PyObject *);
     }
+    if (type->tp_dictoffset) {
+        et->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
     if (add_weak) {
         assert(!base->tp_itemsize);
         type->tp_weaklistoffset = slotoffset;
@@ -2434,6 +2437,9 @@
             res->ht_type.tp_doc = tp_doc;
         }
     }
+    if (res->ht_type.tp_dictoffset) {
+        res->ht_cached_keys = _PyDict_NewKeysForClass();
+    }
 
     if (PyType_Ready(&res->ht_type) < 0)
         goto fail;
diff -r 58bd6a58365d -r a9138aba7896 Python/ceval.c
--- a/Python/ceval.c	Wed Feb 08 04:09:37 2012 +0100
+++ b/Python/ceval.c	Wed Feb 08 13:38:20 2012 +0000
@@ -2102,29 +2102,30 @@
                 /* Inline the PyDict_GetItem() calls.
                    WARNING: this is an extreme speed hack.
                    Do not try this at home. */
+                PyObject **value_addr;
                 Py_hash_t hash = ((PyASCIIObject *)w)->hash;
                 if (hash != -1) {
                     PyDictObject *d;
-                    PyDictEntry *e;
+                    PyDictKeyEntry *e;
                     d = (PyDictObject *)(f->f_globals);
-                    e = d->ma_lookup(d, w, hash);
+                    e = d->ma_keys->dk_lookup(d, w, hash, &value_addr);
                     if (e == NULL) {
                         x = NULL;
                         break;
                     }
-                    x = e->me_value;
+                    x = *value_addr;
                     if (x != NULL) {
                         Py_INCREF(x);
                         PUSH(x);
                         DISPATCH();
                     }
                     d = (PyDictObject *)(f->f_builtins);
-                    e = d->ma_lookup(d, w, hash);
+                    e = d->ma_keys->dk_lookup(d, w, hash, &value_addr);
                     if (e == NULL) {
                         x = NULL;
                         break;
                     }
-                    x = e->me_value;
+                    x = *value_addr;
                     if (x != NULL) {
                         Py_INCREF(x);
                         PUSH(x);
diff -r 58bd6a58365d -r a9138aba7896 Tools/gdb/libpython.py
--- a/Tools/gdb/libpython.py	Wed Feb 08 04:09:37 2012 +0100
+++ b/Tools/gdb/libpython.py	Wed Feb 08 13:38:20 2012 +0000
@@ -634,9 +634,14 @@
         Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
         analagous to dict.iteritems()
         '''
-        for i in safe_range(self.field('ma_mask') + 1):
-            ep = self.field('ma_table') + i
-            pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
+        keys = self.field('ma_keys')
+        values = self.field('ma_values')
+        for i in safe_range(keys['dk_size']):
+            ep = keys['dk_entries'].address + i
+            if long(values):
+                pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
+            else:
+                pyop_value = PyObjectPtr.from_pyobject_ptr(ep['_me_value'])
             if not pyop_value.is_null():
                 pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
                 yield (pyop_key, pyop_value)