diff -r 54c8d785bd39 Include/object.h
--- a/Include/object.h	Sun Aug 14 16:10:31 2016 -0400
+++ b/Include/object.h	Mon Aug 15 08:12:05 2016 +0900
@@ -703,17 +703,16 @@
  * Trust me <wink>:  while painful, this is 20x easier to understand than,
  * e.g, defining _Py_NewReference five different times in a maze of nested
  * #ifdefs (we used to do that -- it was impenetrable).
  */
 #ifdef Py_REF_DEBUG
 PyAPI_DATA(Py_ssize_t) _Py_RefTotal;
 PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname,
                                             int lineno, PyObject *op);
-PyAPI_FUNC(PyObject *) _PyDict_Dummy(void);
 PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
 #define _Py_INC_REFTOTAL        _Py_RefTotal++
 #define _Py_DEC_REFTOTAL        _Py_RefTotal--
 #define _Py_REF_DEBUG_COMMA     ,
 #define _Py_CHECK_REFCNT(OP)                                    \
 {       if (((PyObject*)OP)->ob_refcnt < 0)                             \
                 _Py_NegativeRefcount(__FILE__, __LINE__,        \
                                      (PyObject *)(OP));         \
diff -r 54c8d785bd39 Include/pyport.h
--- a/Include/pyport.h	Sun Aug 14 16:10:31 2016 -0400
+++ b/Include/pyport.h	Mon Aug 15 08:12:05 2016 +0900
@@ -82,16 +82,26 @@
 
 /* a build with 30-bit digits for Python integers needs an exact-width
  * 32-bit unsigned integer type to store those digits.  (We could just use
  * type 'unsigned long', but that would be wasteful on a system where longs
  * are 64-bits.)  On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines
  * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t.
  * However, it doesn't set HAVE_UINT32_T, so we do that here.
  */
+#ifdef uint16_t
+#define HAVE_UINT16_T 1
+#endif
+
+#ifdef HAVE_UINT16_T
+#ifndef PY_UINT16_T
+#define PY_UINT16_T uint16_t
+#endif
+#endif
+
 #ifdef uint32_t
 #define HAVE_UINT32_T 1
 #endif
 
 #ifdef HAVE_UINT32_T
 #ifndef PY_UINT32_T
 #define PY_UINT32_T uint32_t
 #endif
@@ -106,16 +116,26 @@
 
 #ifdef HAVE_UINT64_T
 #ifndef PY_UINT64_T
 #define PY_UINT64_T uint64_t
 #endif
 #endif
 
 /* Signed variants of the above */
+#ifdef int16_t
+#define HAVE_INT16_T 1
+#endif
+
+#ifdef HAVE_INT16_T
+#ifndef PY_INT16_T
+#define PY_INT16_T int16_t
+#endif
+#endif
+
 #ifdef int32_t
 #define HAVE_INT32_T 1
 #endif
 
 #ifdef HAVE_INT32_T
 #ifndef PY_INT32_T
 #define PY_INT32_T int32_t
 #endif
diff -r 54c8d785bd39 Lib/test/test_descr.py
--- a/Lib/test/test_descr.py	Sun Aug 14 16:10:31 2016 -0400
+++ b/Lib/test/test_descr.py	Mon Aug 15 08:12:05 2016 +0900
@@ -5111,22 +5111,24 @@
         class A:
             pass
         class B(A):
             pass
 
         a, b = A(), B()
         self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
         self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
-        a.x, a.y, a.z, a.w = range(4)
+        # Initial hash table can contain at most 5 elements.
+        # Set 6 attributes to cause internal resizing.
+        a.x, a.y, a.z, a.w, a.v, a.u = range(6)
         self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
         a2 = A()
         self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
         self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
-        b.u, b.v, b.w, b.t = range(4)
+        b.u, b.v, b.w, b.t, b.s, b.r = range(6)
         self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({}))
 
 
 class DebugHelperMeta(type):
     """
     Sets default __doc__ and simplifies repr() output.
     """
     def __new__(mcls, name, bases, attrs):
diff -r 54c8d785bd39 Lib/test/test_ordered_dict.py
--- a/Lib/test/test_ordered_dict.py	Sun Aug 14 16:10:31 2016 -0400
+++ b/Lib/test/test_ordered_dict.py	Mon Aug 15 08:12:05 2016 +0900
@@ -1,8 +1,9 @@
+import builtins
 import contextlib
 import copy
 import gc
 import pickle
 from random import randrange, shuffle
 import struct
 import sys
 import unittest
@@ -616,42 +617,62 @@
 
 
 class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
 
     module = py_coll
     OrderedDict = py_coll.OrderedDict
 
 
+class CPythonBuiltinDictTests(unittest.TestCase):
+    """Builtin dict preserves insertion order.
+
+    Reuse some of tests in OrderedDict selectively.
+    """
+
+    module = builtins
+    OrderedDict = dict
+
+for method in (
+    "test_init test_update test_abc test_clear test_delitem " +
+    "test_setitem test_detect_deletion_during_iteration " +
+    "test_popitem test_reinsert test_override_update " +
+    "test_highly_nested test_highly_nested_subclass " +
+    "test_delitem_hash_collision ").split():
+    setattr(CPythonBuiltinDictTests, method, getattr(OrderedDictTests, method))
+del method
+
+
 @unittest.skipUnless(c_coll, 'requires the C version of the collections module')
 class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
 
     module = c_coll
     OrderedDict = c_coll.OrderedDict
     check_sizeof = support.check_sizeof
 
     @support.cpython_only
     def test_sizeof_exact(self):
         OrderedDict = self.OrderedDict
         calcsize = struct.calcsize
         size = support.calcobjsize
         check = self.check_sizeof
 
-        basicsize = size('n2P' + '3PnPn2P') + calcsize('2nPn')
-        entrysize = calcsize('n2P') + calcsize('P')
+        basicsize = size('n2P' + '3PnPn2P') + calcsize('2nP2n')
+        entrysize = calcsize('n2P')
+        p = calcsize('P')
         nodesize = calcsize('Pn2P')
 
         od = OrderedDict()
-        check(od, basicsize + 8*entrysize)
+        check(od, basicsize + 8*p + 8 + 5*entrysize)  # 8byte indicies + 8*2//3 * entry table
         od.x = 1
-        check(od, basicsize + 8*entrysize)
+        check(od, basicsize + 8*p + 8 + 5*entrysize)
         od.update([(i, i) for i in range(3)])
-        check(od, basicsize + 8*entrysize + 3*nodesize)
+        check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
         od.update([(i, i) for i in range(3, 10)])
-        check(od, basicsize + 16*entrysize + 10*nodesize)
+        check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
 
         check(od.keys(), size('P'))
         check(od.items(), size('P'))
         check(od.values(), size('P'))
 
         itersize = size('iP2n2P')
         check(iter(od), itersize)
         check(iter(od.keys()), itersize)
diff -r 54c8d785bd39 Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Sun Aug 14 16:10:31 2016 -0400
+++ b/Lib/test/test_sys.py	Mon Aug 15 08:12:05 2016 +0900
@@ -931,19 +931,19 @@
         # getset_descriptor (descriptor object)
         import collections
         check(collections.defaultdict.default_factory, size('3PP'))
         # wrapper_descriptor (descriptor object)
         check(int.__add__, size('3P2P'))
         # method-wrapper (descriptor object)
         check({}.__iter__, size('2P'))
         # dict
-        check({}, size('n2P') + calcsize('2nPn') + 8*calcsize('n2P'))
+        check({}, size('n2P') + calcsize('2nP2n') + 8 + (8*2//3)*calcsize('n2P'))
         longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
-        check(longdict, size('n2P') + calcsize('2nPn') + 16*calcsize('n2P'))
+        check(longdict, size('n2P') + calcsize('2nP2n') + 16 + (16*2//3)*calcsize('n2P'))
         # dictionary-keyview
         check({}.keys(), size('P'))
         # dictionary-valueview
         check({}.values(), size('P'))
         # dictionary-itemview
         check({}.items(), size('P'))
         # dictionary iterator
         check(iter({}), size('P2nPn'))
@@ -1091,23 +1091,23 @@
         check(int, s)
         s = vsize(fmt +                 # PyTypeObject
                   '3P'                  # PyAsyncMethods
                   '36P'                 # PyNumberMethods
                   '3P'                  # PyMappingMethods
                   '10P'                 # PySequenceMethods
                   '2P'                  # PyBufferProcs
                   '4P')
-        # Separate block for PyDictKeysObject with 4 entries
-        s += calcsize("2nPn") + 4*calcsize("n2P")
+        # Separate block for PyDictKeysObject with 8 keys and 5 entries
+        s += calcsize("2nP2n") + 8 + 5*calcsize("n2P")
         # class
         class newstyleclass(object): pass
         check(newstyleclass, s)
         # dict with shared keys
-        check(newstyleclass().__dict__, size('n2P' + '2nPn'))
+        check(newstyleclass().__dict__, size('n2P' + '2nP2n'))
         # unicode
         # each tuple contains a string and its expected character size
         # don't put any static strings here, as they may contain
         # wchar_t or UTF-8 representations
         samples = ['1'*100, '\xff'*50,
                    '\u0100'*40, '\uffff'*100,
                    '\U00010000'*30, '\U0010ffff'*100]
         asciifields = "nnbP"
diff -r 54c8d785bd39 Lib/test/test_weakref.py
--- a/Lib/test/test_weakref.py	Sun Aug 14 16:10:31 2016 -0400
+++ b/Lib/test/test_weakref.py	Mon Aug 15 08:12:05 2016 +0900
@@ -1320,23 +1320,26 @@
         # Check that len() works when both iterating and removing keys
         # explicitly through various means (.pop(), .clear()...), while
         # implicit mutation is deferred because an iterator is alive.
         # (each call to testcontext() should schedule one item for removal
         #  for this test to work properly)
         o = Object(123456)
         with testcontext():
             n = len(dict)
-            dict.popitem()
+            # Since underlaying dict is ordered, first item is popped
+            dict.pop(next(dict.keys()))
             self.assertEqual(len(dict), n - 1)
             dict[o] = o
             self.assertEqual(len(dict), n)
+        # last item in objects is removed from dict in context shutdown
         with testcontext():
             self.assertEqual(len(dict), n - 1)
-            dict.pop(next(dict.keys()))
+            # Then, (o, o) is popped
+            dict.popitem()
             self.assertEqual(len(dict), n - 2)
         with testcontext():
             self.assertEqual(len(dict), n - 3)
             del dict[next(dict.keys())]
             self.assertEqual(len(dict), n - 4)
         with testcontext():
             self.assertEqual(len(dict), n - 5)
             dict.popitem()
diff -r 54c8d785bd39 Objects/dict-common.h
--- a/Objects/dict-common.h	Sun Aug 14 16:10:31 2016 -0400
+++ b/Objects/dict-common.h	Mon Aug 15 08:12:05 2016 +0900
@@ -3,20 +3,30 @@
 
 typedef struct {
     /* Cached hash code of me_key. */
     Py_hash_t me_hash;
     PyObject *me_key;
     PyObject *me_value; /* This field is only meaningful for combined tables */
 } PyDictKeyEntry;
 
-typedef PyDictKeyEntry *(*dict_lookup_func)
-(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr);
+/* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index].
+ * -1 when no entry found, -3 when compare raises error.
+ */
+typedef Py_ssize_t (*dict_lookup_func)
+(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr,
+ Py_ssize_t *hashpos);
 
+#define DKIX_EMPTY (-1)
+#define DKIX_DUMMY (-2)  /* Used internally */
+#define DKIX_ERROR (-3)
+
+/* See dictobject.c for actual layout of DictKeysObject */
 struct _dictkeysobject {
     Py_ssize_t dk_refcnt;
     Py_ssize_t dk_size;
     dict_lookup_func dk_lookup;
     Py_ssize_t dk_usable;
-    PyDictKeyEntry dk_entries[1];
+    Py_ssize_t dk_nentries;  /* How many entries are used. */
+    char dk_indices[8];      /* dynamically sized. 8 is minimum. */
 };
 
 #endif
diff -r 54c8d785bd39 Objects/dictobject.c
--- a/Objects/dictobject.c	Sun Aug 14 16:10:31 2016 -0400
+++ b/Objects/dictobject.c	Mon Aug 15 08:12:05 2016 +0900
@@ -1,75 +1,108 @@
-
 /* Dictionary object implementation using a hash table */
 
 /* The distribution includes a separate file, Objects/dictnotes.txt,
    describing explorations into dictionary design and optimization.
    It covers typical dictionary use patterns, the parameters for
    tuning dictionaries, and several ideas for possible optimizations.
 */
 
+/* PyDictKeysObject
+
+This implements the dictionary's hashtable.
+
+As of Python 3.6, this is compact and orderd. Basic idea is described here.
+https://morepypy.blogspot.jp/2015/01/faster-more-memory-efficient-and-more.html
+
+layout:
+
++---------------+
+| dk_refcnt     |
+| dk_size       |
+| dk_lookup     |
+| dk_usable     |
+| dk_nentries   |
++---------------+
+| dk_indices    |
+|               |
++---------------+
+| dk_entries    |
+|               |
++---------------+
+
+dk_indices is actual hashtable.  It holds index in entries, or DKIX_EMPTY(-1)
+or DKIX_DUMMY(-2).
+Size of indices is dk_size.  Type of each index in indices is vary on dk_size:
+
+* int8  for          dk_size <= 128
+* int16 for 256   <= dk_size <= 2**15
+* int32 for 2**16 <= dk_size <= 2**31
+* int64 for 2**32 <= dk_size
+
+dk_entries is array of PyDictKeyEntry.  It's size is USABLE_FRACTION(dk_size).
+DK_ENTRIES(dk) can be used to get pointer to entries.
+*/
+
 
 /*
-There are four kinds of slots in the table:
-
-1. Unused.  me_key == me_value == NULL
-   Does not hold an active (key, value) pair now and never did.  Unused can
-   transition to Active upon key insertion.  This is the only case in which
-   me_key is NULL, and is each slot's initial state.
-
-2. Active.  me_key != NULL and me_key != dummy and me_value != NULL
-   Holds an active (key, value) pair.  Active can transition to Dummy or
-   Pending upon key deletion (for combined and split tables respectively).
-   This is the only case in which me_value != NULL.
-
-3. Dummy.  me_key == dummy and me_value == NULL
-   Previously held an active (key, value) pair, but that was deleted and an
-   active pair has not yet overwritten the slot.  Dummy can transition to
-   Active upon key insertion.  Dummy slots cannot be made Unused again
-   (cannot have me_key set to NULL), else the probe sequence in case of
-   collision would have no way to know they were once active.
-
-4. Pending. Not yet inserted or deleted from a split-table.
-   key != NULL, key != dummy and value == NULL
-
 The DictObject can be in one of two forms.
+
 Either:
   A combined table:
     ma_values == NULL, dk_refcnt == 1.
     Values are stored in the me_value field of the PyDictKeysObject.
-    Slot kind 4 is not allowed i.e.
-        key != NULL, key != dummy and value == NULL is illegal.
 Or:
   A split table:
     ma_values != NULL, dk_refcnt >= 1
     Values are stored in the ma_values array.
-    Only string (unicode) keys are allowed, no <dummy> keys are present.
-
-Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
-hold a search finger.  The me_hash field of Unused or Dummy slots has no
-meaning otherwise. As a consequence of this popitem always converts the dict
-to the combined-table form.
+    Only string (unicode) keys are allowed.
+
+There are four kinds of slots in the table (slot is index, and
+DK_ENTRIES(keys)[index] if index >= 0):
+
+1. Unused.  index == DKIX_EMPTY
+   Does not hold an active (key, value) pair now and never did.  Unused can
+   transition to Active upon key insertion.  This is each slot's initial state.
+
+2. Active.  index >= 0, me_key != NULL and me_value != NULL
+   Holds an active (key, value) pair.  Active can transition to Dummy or
+   Pending upon key deletion (for combined and split tables respectively).
+   This is the only case in which me_value != NULL.
+
+3. Dummy.  index == DKIX_DUMMY  (combined only)
+   Previously held an active (key, value) pair, but that was deleted and an
+   active pair has not yet overwritten the slot.  Dummy can transition to
+   Active upon key insertion.  Dummy slots cannot be made Unused again
+   else the probe sequence in case of collision would have no way to know
+   they were once active.
+
+4. Pending. index >= 0, key != NULL, and value == NULL  (split only)
+   Not yet inserted in split-table.
 */
 
-/* PyDict_MINSIZE_SPLIT is the minimum size of a split dictionary.
- * It must be a power of 2, and at least 4.
- * Resizing of split dictionaries is very rare, so the saving memory is more
- * important than the cost of resizing.
+/* Preserving insertion order
+ *
+ * In combined table, it's simple.  dk_entries is mostly append only.
+ * One exception is .popitem().  It delete last item in dk_entries, and decrement
+ * dk_nentries to achieve O(1).
+ *
+ * In split table, inserting into pending entry is allowed only for dk_entries[ix]
+ * where ix == mp->ma_used.  Inserting into other index and deleting item cause
+ * converting the dict to the combined-table form.
  */
-#define PyDict_MINSIZE_SPLIT 4
-
-/* PyDict_MINSIZE_COMBINED is the starting size for any new, non-split dict.
+
+/* PyDict_MINSIZE is the starting size for any new dict.
  * 8 allows dicts with no more than 5 active entries; experiments suggested
  * this suffices for the majority of dicts (consisting mostly of usually-small
  * dicts created to pass keyword arguments).
  * Making this 8, rather than 4 reduces the number of resizes for most
  * dictionaries, without any significant extra memory use.
  */
-#define PyDict_MINSIZE_COMBINED 8
+#define PyDict_MINSIZE 8
 
 #include "Python.h"
 #include "dict-common.h"
 #include "stringlib/eq.h"
 
 /*[clinic input]
 class dict "PyDictObject *" "&PyDict_Type"
 [clinic start generated code]*/
@@ -172,64 +205,53 @@
 (e.g., computing 5*j can go on at the same time as computing 1+perturb in the
 above, and then shifting perturb can be done while the table index is being
 masked); and the PyDictObject struct required a member to hold the table's
 polynomial.  In Tim's experiments the current scheme ran faster, produced
 equally good collision statistics, needed less code & used less memory.
 
 */
 
-/* Object used as dummy key to fill deleted entries
- * This could be any unique object,
- * use a custom type in order to minimise coupling.
-*/
-static PyObject _dummy_struct;
-
-#define dummy (&_dummy_struct)
-
-#ifdef Py_REF_DEBUG
-PyObject *
-_PyDict_Dummy(void)
-{
-    return dummy;
-}
-#endif
-
 /* forward declarations */
-static PyDictKeyEntry *lookdict(PyDictObject *mp, PyObject *key,
-                                Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_unicode(PyDictObject *mp, PyObject *key,
-                                        Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *
+static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key,
+                           Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_unicode(PyDictObject *mp, PyObject *key,
+                                   Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos);
+static Py_ssize_t
 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
-                         Py_hash_t hash, PyObject ***value_addr);
-static PyDictKeyEntry *lookdict_split(PyDictObject *mp, PyObject *key,
-                                      Py_hash_t hash, PyObject ***value_addr);
+                         Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos);
+static Py_ssize_t lookdict_split(PyDictObject *mp, PyObject *key,
+                                 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos);
 
 static int dictresize(PyDictObject *mp, Py_ssize_t minused);
 
-/* Dictionary reuse scheme to save calls to malloc, free, and memset */
+/* Dictionary reuse scheme to save calls to malloc and free */
 #ifndef PyDict_MAXFREELIST
 #define PyDict_MAXFREELIST 80
 #endif
 static PyDictObject *free_list[PyDict_MAXFREELIST];
 static int numfree = 0;
+static PyDictKeysObject *keys_free_list[PyDict_MAXFREELIST];
+static int numfreekeys = 0;
 
 #include "clinic/dictobject.c.h"
 
 int
 PyDict_ClearFreeList(void)
 {
     PyDictObject *op;
-    int ret = numfree;
+    int ret = numfree + numfreekeys;
     while (numfree) {
         op = free_list[--numfree];
         assert(PyDict_CheckExact(op));
         PyObject_GC_Del(op);
     }
+    while (numfreekeys) {
+        PyObject_FREE(keys_free_list[--numfreekeys]);
+    }
     return ret;
 }
 
 /* Print summary info about the state of the optimized allocator */
 void
 _PyDict_DebugMallocStats(FILE *out)
 {
     _PyDebugAllocatorStats(out,
@@ -238,50 +260,104 @@
 
 
 void
 PyDict_Fini(void)
 {
     PyDict_ClearFreeList();
 }
 
+#define DK_SIZE(dk) ((dk)->dk_size)
+#if SIZEOF_VOID_P > 4
+#define DK_IXSIZE(dk) (DK_SIZE(dk) <= 0xff ? 1 : DK_SIZE(dk) <= 0xffff ? 2 : \
+                       DK_SIZE(dk) <= 0xffffffff ? 4 : sizeof(Py_ssize_t))
+#else
+#define DK_IXSIZE(dk) (DK_SIZE(dk) <= 0xff ? 1 : DK_SIZE(dk) <= 0xffff ? 2 : \
+                       sizeof(Py_ssize_t))
+#endif
+#define DK_ENTRIES(dk) ((PyDictKeyEntry*)(&(dk)->dk_indices[DK_SIZE(dk) * \
+                        DK_IXSIZE(dk)]))
+
 #define DK_DEBUG_INCREF _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA
 #define DK_DEBUG_DECREF _Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA
 
 #define DK_INCREF(dk) (DK_DEBUG_INCREF ++(dk)->dk_refcnt)
 #define DK_DECREF(dk) if (DK_DEBUG_DECREF (--(dk)->dk_refcnt) == 0) free_keys_object(dk)
-#define DK_SIZE(dk) ((dk)->dk_size)
 #define DK_MASK(dk) (((dk)->dk_size)-1)
 #define IS_POWER_OF_2(x) (((x) & (x-1)) == 0)
 
+
+/* lookup indices.  returns DKIX_EMPTY, DKIX_DUMMY, or ix >=0 */
+Py_LOCAL_INLINE(Py_ssize_t)
+dk_get_index(PyDictKeysObject *keys, Py_ssize_t i)
+{
+    Py_ssize_t s = DK_SIZE(keys);
+    if (s <= 0xff) {
+        return ((char*) &keys->dk_indices[0])[i];
+    }
+    else if (s <= 0xffff) {
+        return ((PY_INT16_T*)&keys->dk_indices[0])[i];
+    }
+#if SIZEOF_VOID_P > 4
+    else if (s <= 0xffffffff) {
+        return ((PY_INT32_T*)&keys->dk_indices[0])[i];
+    }
+#endif
+    else {
+        return ((Py_ssize_t*)&keys->dk_indices[0])[i];
+    }
+}
+
+/* write to indices. */
+Py_LOCAL_INLINE(void)
+dk_set_index(PyDictKeysObject *keys, Py_ssize_t i, Py_ssize_t ix)
+{
+    Py_ssize_t s = DK_SIZE(keys);
+    if (s <= 0xff) {
+        ((char*) &keys->dk_indices[0])[i] = (int8_t)ix;
+    }
+    else if (s <= 0xffff) {
+        ((PY_INT16_T*) &keys->dk_indices[0])[i] = (PY_INT16_T)ix;
+    }
+#if SIZEOF_VOID_P > 4
+    else if (s <= 0xffffffff) {
+        ((PY_INT32_T*) &keys->dk_indices[0])[i] = (PY_INT32_T)ix;
+    }
+#endif
+    else {
+        ((Py_ssize_t*) &keys->dk_indices[0])[i] = ix;
+    }
+}
+
+
 /* USABLE_FRACTION is the maximum dictionary load.
- * Currently set to (2n+1)/3. Increasing this ratio makes dictionaries more
- * dense resulting in more collisions.  Decreasing it improves sparseness
- * at the expense of spreading entries over more cache lines and at the
- * cost of total memory consumed.
+ * Increasing this ratio makes dictionaries more dense resulting in more
+ * collisions.  Decreasing it improves sparseness at the expense of spreading
+ * indices over more cache lines and at the cost of total memory consumed.
  *
  * USABLE_FRACTION must obey the following:
  *     (0 < USABLE_FRACTION(n) < n) for all n >= 2
  *
- * USABLE_FRACTION should be very quick to calculate.
- * Fractions around 5/8 to 2/3 seem to work well in practice.
+ * USABLE_FRACTION should be quick to calculate.
+ * Fractions around 1/2 to 2/3 seem to work well in practice.
  */
-
-/* Use (2n+1)/3 rather than 2n+3 because: it makes no difference for
- * combined tables (the two fractions round to the same number n < ),
- * but 2*4/3 is 2 whereas (2*4+1)/3 is 3 which potentially saves quite
- * a lot of space for small, split tables */
-#define USABLE_FRACTION(n) ((((n) << 1)+1)/3)
-
-/* Alternative fraction that is otherwise close enough to (2n+1)/3 to make
+#define USABLE_FRACTION(n) (((n) << 1)/3)
+
+/* ESTIMATE_SIZE is reverse function of USABLE_FRACTION.
+ * This can be used to reserve enough size to insert n entries without
+ * resizing.
+ */
+#define ESTIMATE_SIZE(n)  (((n)*3) >> 1)
+
+/* Alternative fraction that is otherwise close enough to 2n/3 to make
  * little difference. 8 * 2/3 == 8 * 5/8 == 5. 16 * 2/3 == 16 * 5/8 == 10.
  * 32 * 2/3 = 21, 32 * 5/8 = 20.
  * Its advantage is that it is faster to compute on machines with slow division.
  * #define USABLE_FRACTION(n) (((n) >> 1) + ((n) >> 2) - ((n) >> 3))
-*/
+ */
 
 /* GROWTH_RATE. Growth rate upon hitting maximum load.
  * Currently set to used*2 + capacity/2.
  * This means that dicts double in size when growing without deletions,
  * but have more head room when the number of deletions is on a par with the
  * number of insertions.
  * Raising this to used*4 doubles memory consumption depending on the size of
  * the dictionary, but results in half the number of resizes, less effort to
@@ -299,67 +375,88 @@
 /* This immutable, empty PyDictKeysObject is used for PyDict_Clear()
  * (which cannot fail and thus can do no allocation).
  */
 static PyDictKeysObject empty_keys_struct = {
         2, /* dk_refcnt 1 for this struct, 1 for dummy_struct */
         1, /* dk_size */
         lookdict_split, /* dk_lookup */
         0, /* dk_usable (immutable) */
-        {
-            { 0, 0, 0 } /* dk_entries (empty) */
-        }
+        0, /* dk_nentries */
+        {DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY,
+         DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY, DKIX_EMPTY}, /* dk_indices */
 };
 
 static PyObject *empty_values[1] = { NULL };
 
 #define Py_EMPTY_KEYS &empty_keys_struct
 
 static PyDictKeysObject *new_keys_object(Py_ssize_t size)
 {
     PyDictKeysObject *dk;
-    Py_ssize_t i;
-    PyDictKeyEntry *ep0;
-
-    assert(size >= PyDict_MINSIZE_SPLIT);
+    Py_ssize_t es, usable;
+
+    assert(size >= PyDict_MINSIZE);
     assert(IS_POWER_OF_2(size));
-    dk = PyObject_MALLOC(sizeof(PyDictKeysObject) +
-                      sizeof(PyDictKeyEntry) * (size-1));
-    if (dk == NULL) {
-        PyErr_NoMemory();
-        return NULL;
+
+    usable = USABLE_FRACTION(size);
+    if (size <= 0xff) {
+        es = 1;
+    }
+    else if (size <= 0xffff) {
+        es = 2;
+    }
+#if SIZEOF_VOID_P > 4
+    else if (size <= 0xffffffff) {
+        es = 4;
+    }
+#endif
+    else {
+        es = sizeof(Py_ssize_t);
+    }
+
+    if (size == PyDict_MINSIZE && numfreekeys > 0) {
+        dk = keys_free_list[--numfreekeys];
+    }
+    else {
+        dk = PyObject_MALLOC(sizeof(PyDictKeysObject) - 8 +
+                             es * size +
+                             sizeof(PyDictKeyEntry) * usable);
+        if (dk == NULL) {
+            PyErr_NoMemory();
+            return NULL;
+        }
     }
     DK_DEBUG_INCREF dk->dk_refcnt = 1;
     dk->dk_size = size;
-    dk->dk_usable = USABLE_FRACTION(size);
-    ep0 = &dk->dk_entries[0];
-    /* Hash value of slot 0 is used by popitem, so it must be initialized */
-    ep0->me_hash = 0;
-    for (i = 0; i < size; i++) {
-        ep0[i].me_key = NULL;
-        ep0[i].me_value = NULL;
-    }
+    dk->dk_usable = usable;
     dk->dk_lookup = lookdict_unicode_nodummy;
+    dk->dk_nentries = 0;
+    memset(&dk->dk_indices[0], 0xff, es * size);
+    memset(DK_ENTRIES(dk), 0, sizeof(PyDictKeyEntry) * usable);
     return dk;
 }
 
 static void
 free_keys_object(PyDictKeysObject *keys)
 {
-    PyDictKeyEntry *entries = &keys->dk_entries[0];
+    PyDictKeyEntry *entries = DK_ENTRIES(keys);
     Py_ssize_t i, n;
-    for (i = 0, n = DK_SIZE(keys); i < n; i++) {
+    for (i = 0, n = keys->dk_nentries; i < n; i++) {
         Py_XDECREF(entries[i].me_key);
         Py_XDECREF(entries[i].me_value);
     }
+    if (keys->dk_size == PyDict_MINSIZE && numfreekeys < PyDict_MAXFREELIST) {
+        keys_free_list[numfreekeys++] = keys;
+        return;
+    }
     PyObject_FREE(keys);
 }
 
 #define new_values(size) PyMem_NEW(PyObject *, size)
-
 #define free_values(values) PyMem_FREE(values)
 
 /* Consumes a reference to the keys object */
 static PyObject *
 new_dict(PyDictKeysObject *keys, PyObject **values)
 {
     PyDictObject *mp;
     assert(keys != NULL);
@@ -385,297 +482,397 @@
 
 /* Consumes a reference to the keys object */
 static PyObject *
 new_dict_with_shared_keys(PyDictKeysObject *keys)
 {
     PyObject **values;
     Py_ssize_t i, size;
 
-    size = DK_SIZE(keys);
+    size = USABLE_FRACTION(DK_SIZE(keys));
     values = new_values(size);
     if (values == NULL) {
         DK_DECREF(keys);
         return PyErr_NoMemory();
     }
     for (i = 0; i < size; i++) {
         values[i] = NULL;
     }
     return new_dict(keys, values);
 }
 
 PyObject *
 PyDict_New(void)
 {
-    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
     if (keys == NULL)
         return NULL;
     return new_dict(keys, NULL);
 }
 
+/* Search index of hash table from offset of entry table */
+static Py_ssize_t
+lookdict_index(PyDictKeysObject *k, Py_hash_t hash, Py_ssize_t index)
+{
+    size_t i, perturb;
+    size_t mask = DK_MASK(k);
+    Py_ssize_t ix;
+
+    i = (size_t)hash & mask;
+    ix = dk_get_index(k, i);
+    if (ix == index) {
+        return i;
+    }
+    if (ix == DKIX_EMPTY) {
+        return DKIX_EMPTY;
+    }
+
+    for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(k, i);
+        if (ix == index) {
+            return i;
+        }
+        if (ix == DKIX_EMPTY) {
+            return DKIX_EMPTY;
+        }
+    }
+    assert(0);          /* NOT REACHED */
+    return DKIX_ERROR;
+}
+
 /*
 The basic lookup function used by all operations.
 This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4.
 Open addressing is preferred over chaining since the link overhead for
 chaining would be substantial (100% with typical malloc overhead).
 
 The initial probe index is computed as hash mod the table size. Subsequent
 probe indices are computed as explained earlier.
 
 All arithmetic on hash should ignore overflow.
 
 The details in this version are due to Tim Peters, building on many past
 contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
 Christian Tismer.
 
-lookdict() is general-purpose, and may return NULL if (and only if) a
+lookdict() is general-purpose, and may return DKIX_ERROR if (and only if) a
 comparison raises an exception (this was new in Python 2.5).
 lookdict_unicode() below is specialized to string keys, comparison of which can
-never raise an exception; that function can never return NULL.
+never raise an exception; that function can never return DKIX_ERROR.
 lookdict_unicode_nodummy is further specialized for string keys that cannot be
 the <dummy> value.
-For both, when the key isn't found a PyDictEntry* is returned
-where the key would have been found, *value_addr points to the matching value
-slot.
+For both, when the key isn't found a DKIX_EMPTY is returned. hashpos returns
+where the key index should be inserted.
 */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict(PyDictObject *mp, PyObject *key,
-         Py_hash_t hash, PyObject ***value_addr)
+         Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
-    PyDictKeyEntry *freeslot;
-    size_t mask;
-    PyDictKeyEntry *ep0;
-    PyDictKeyEntry *ep;
+    size_t i, perturb, mask;
+    Py_ssize_t ix, freeslot;
     int cmp;
+    PyDictKeysObject *dk;
+    PyDictKeyEntry *ep0, *ep;
     PyObject *startkey;
 
 top:
-    mask = DK_MASK(mp->ma_keys);
-    ep0 = &mp->ma_keys->dk_entries[0];
+    dk = mp->ma_keys;
+    mask = DK_MASK(dk);
+    ep0 = DK_ENTRIES(dk);
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key) {
-        *value_addr = &ep->me_value;
-        return ep;
+
+    ix = dk_get_index(dk, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
     }
-    if (ep->me_key == dummy)
-        freeslot = ep;
+    if (ix == DKIX_DUMMY) {
+        freeslot = i;
+    }
     else {
-        if (ep->me_hash == hash) {
+        ep = &ep0[ix];
+        if (ep->me_key == key) {
+            *value_addr = &ep->me_value;
+            if (hashpos != NULL)
+                *hashpos = i;
+            return ix;
+        }
+        if (ep->me_key != NULL && ep->me_hash == hash) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
             if (cmp < 0)
-                return NULL;
-            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+                return DKIX_ERROR;
+            if (dk == mp->ma_keys && ep->me_key == startkey) {
                 if (cmp > 0) {
                     *value_addr = &ep->me_value;
-                    return ep;
+                    if (hashpos != NULL)
+                        *hashpos = i;
+                    return ix;
                 }
             }
             else {
                 /* The dict was mutated, restart */
                 goto top;
             }
         }
-        freeslot = NULL;
+        freeslot = -1;
     }
 
-    /* In the loop, me_key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        if (ep->me_key == NULL) {
-            if (freeslot == NULL) {
-                *value_addr = &ep->me_value;
-                return ep;
-            } else {
-                *value_addr = &freeslot->me_value;
-                return freeslot;
+        i = ((i << 2) + i + perturb + 1) & mask;
+        ix = dk_get_index(dk, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL) {
+                *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
             }
+            *value_addr = NULL;
+            return ix;
         }
+        if (ix == DKIX_DUMMY) {
+            if (freeslot == -1)
+                freeslot = i;
+            continue;
+        }
+        ep = &ep0[ix];
         if (ep->me_key == key) {
+            if (hashpos != NULL) {
+                *hashpos = i;
+            }
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
-        if (ep->me_hash == hash && ep->me_key != dummy) {
+        if (ep->me_hash == hash && ep->me_key != NULL) {
             startkey = ep->me_key;
             Py_INCREF(startkey);
             cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
             Py_DECREF(startkey);
             if (cmp < 0) {
                 *value_addr = NULL;
-                return NULL;
+                return DKIX_ERROR;
             }
-            if (ep0 == mp->ma_keys->dk_entries && ep->me_key == startkey) {
+            if (dk == mp->ma_keys && ep->me_key == startkey) {
                 if (cmp > 0) {
+                    if (hashpos != NULL) {
+                        *hashpos = i;
+                    }
                     *value_addr = &ep->me_value;
-                    return ep;
+                    return ix;
                 }
             }
             else {
                 /* The dict was mutated, restart */
                 goto top;
             }
         }
-        else if (ep->me_key == dummy && freeslot == NULL)
-            freeslot = ep;
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
 /* Specialized version for string-only keys */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_unicode(PyDictObject *mp, PyObject *key,
-                 Py_hash_t hash, PyObject ***value_addr)
+                 Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
-    PyDictKeyEntry *freeslot;
+    size_t i, perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
-
+    Py_ssize_t ix, freeslot;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+
+    assert(mp->ma_values == NULL);
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
         mp->ma_keys->dk_lookup = lookdict;
-        return lookdict(mp, key, hash, value_addr);
+        return lookdict(mp, key, hash, value_addr, hashpos);
     }
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    if (ep->me_key == NULL || ep->me_key == key) {
-        *value_addr = &ep->me_value;
-        return ep;
+    ix = dk_get_index(mp->ma_keys, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
     }
-    if (ep->me_key == dummy)
-        freeslot = ep;
+    if (ix == DKIX_DUMMY) {
+        freeslot = i;
+    }
     else {
-        if (ep->me_hash == hash && unicode_eq(ep->me_key, key)) {
+        ep = &ep0[ix];
+        /* only split table can be ix != DKIX_DUMMY && me_key == NULL */
+        assert(ep->me_key != NULL);
+        if (ep->me_key == key || (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+            if (hashpos != NULL)
+                *hashpos = i;
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
-        freeslot = NULL;
+        freeslot = -1;
     }
 
-    /* In the loop, me_key == dummy is by far (factor of 100s) the
-       least likely outcome, so test for that last. */
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        if (ep->me_key == NULL) {
-            if (freeslot == NULL) {
-                *value_addr = &ep->me_value;
-                return ep;
-            } else {
-                *value_addr = &freeslot->me_value;
-                return freeslot;
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL) {
+                *hashpos = (freeslot == -1) ? (Py_ssize_t)i : freeslot;
             }
+            *value_addr = NULL;
+            return DKIX_EMPTY;
         }
+        if (ix == DKIX_DUMMY) {
+            if (freeslot == -1)
+                freeslot = i;
+            continue;
+        }
+        ep = &ep0[ix];
         if (ep->me_key == key
             || (ep->me_hash == hash
-            && ep->me_key != dummy
-            && unicode_eq(ep->me_key, key))) {
+                && ep->me_key != NULL
+                && unicode_eq(ep->me_key, key))) {
             *value_addr = &ep->me_value;
-            return ep;
+            if (hashpos != NULL) {
+                *hashpos = i;
+            }
+            return ix;
         }
-        if (ep->me_key == dummy && freeslot == NULL)
-            freeslot = ep;
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
 /* Faster version of lookdict_unicode when it is known that no <dummy> keys
  * will be present. */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_unicode_nodummy(PyDictObject *mp, PyObject *key,
-                         Py_hash_t hash, PyObject ***value_addr)
+                         Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
+    size_t i, perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
-
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+
+    assert(mp->ma_values == NULL);
     /* Make sure this function doesn't have to handle non-unicode keys,
        including subclasses of str; e.g., one reason to subclass
        unicodes is to override __eq__, and for speed we don't cater to
        that here. */
     if (!PyUnicode_CheckExact(key)) {
         mp->ma_keys->dk_lookup = lookdict;
-        return lookdict(mp, key, hash, value_addr);
+        return lookdict(mp, key, hash, value_addr, hashpos);
     }
     i = (size_t)hash & mask;
-    ep = &ep0[i];
-    assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-    if (ep->me_key == NULL || ep->me_key == key ||
+    ix = dk_get_index(mp->ma_keys, i);
+    assert (ix != DKIX_DUMMY);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    ep = &ep0[ix];
+    assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+    if (ep->me_key == key ||
         (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+        if (hashpos != NULL)
+            *hashpos = i;
         *value_addr = &ep->me_value;
-        return ep;
+        return ix;
     }
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
-        assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-        if (ep->me_key == NULL || ep->me_key == key ||
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        assert (ix != DKIX_DUMMY);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = NULL;
+            return DKIX_EMPTY;
+        }
+        ep = &ep0[ix];
+        assert(ep->me_key != NULL && PyUnicode_CheckExact(ep->me_key));
+        if (ep->me_key == key ||
             (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
+            if (hashpos != NULL)
+                *hashpos = i;
             *value_addr = &ep->me_value;
-            return ep;
+            return ix;
         }
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
 /* Version of lookdict for split tables.
  * All split tables and only split tables use this lookup function.
  * Split tables only contain unicode keys and no dummy keys,
  * so algorithm is the same as lookdict_unicode_nodummy.
  */
-static PyDictKeyEntry *
+static Py_ssize_t
 lookdict_split(PyDictObject *mp, PyObject *key,
-               Py_hash_t hash, PyObject ***value_addr)
+               Py_hash_t hash, PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
+    size_t i, perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
-
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+
+    /* mp must split table */
+    assert(mp->ma_values != NULL);
     if (!PyUnicode_CheckExact(key)) {
-        ep = lookdict(mp, key, hash, value_addr);
-        /* lookdict expects a combined-table, so fix value_addr */
-        i = ep - ep0;
-        *value_addr = &mp->ma_values[i];
-        return ep;
+        ix = lookdict(mp, key, hash, value_addr, hashpos);
+        if (ix >= 0) {
+            *value_addr = &mp->ma_values[ix];
+        }
+        return ix;
     }
+
     i = (size_t)hash & mask;
-    ep = &ep0[i];
+    ix = dk_get_index(mp->ma_keys, i);
+    if (ix == DKIX_EMPTY) {
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = NULL;
+        return DKIX_EMPTY;
+    }
+    assert(ix >= 0);
+    ep = &ep0[ix];
     assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-    if (ep->me_key == NULL || ep->me_key == key ||
+    if (ep->me_key == key ||
         (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
-        *value_addr = &mp->ma_values[i];
-        return ep;
+        if (hashpos != NULL)
+            *hashpos = i;
+        *value_addr = &mp->ma_values[ix];
+        return ix;
     }
     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
+        i = mask & ((i << 2) + i + perturb + 1);
+        ix = dk_get_index(mp->ma_keys, i);
+        if (ix == DKIX_EMPTY) {
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = NULL;
+            return DKIX_EMPTY;
+        }
+        assert(ix >= 0);
+        ep = &ep0[ix];
         assert(ep->me_key == NULL || PyUnicode_CheckExact(ep->me_key));
-        if (ep->me_key == NULL || ep->me_key == key ||
+        if (ep->me_key == key ||
             (ep->me_hash == hash && unicode_eq(ep->me_key, key))) {
-            *value_addr = &mp->ma_values[i & mask];
-            return ep;
+            if (hashpos != NULL)
+                *hashpos = i;
+            *value_addr = &mp->ma_values[ix];
+            return ix;
         }
     }
     assert(0);          /* NOT REACHED */
     return 0;
 }
 
 int
 _PyDict_HasOnlyStringKeys(PyObject *dict)
@@ -702,75 +899,77 @@
         } \
     } while(0)
 
 void
 _PyDict_MaybeUntrack(PyObject *op)
 {
     PyDictObject *mp;
     PyObject *value;
-    Py_ssize_t i, size;
+    Py_ssize_t i, numentries;
+    PyDictKeyEntry *ep0;
 
     if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
         return;
 
     mp = (PyDictObject *) op;
-    size = DK_SIZE(mp->ma_keys);
+    ep0 = DK_ENTRIES(mp->ma_keys);
+    numentries = mp->ma_keys->dk_nentries;
     if (_PyDict_HasSplitTable(mp)) {
-        for (i = 0; i < size; i++) {
+        for (i = 0; i < numentries; i++) {
             if ((value = mp->ma_values[i]) == NULL)
                 continue;
             if (_PyObject_GC_MAY_BE_TRACKED(value)) {
-                assert(!_PyObject_GC_MAY_BE_TRACKED(
-                    mp->ma_keys->dk_entries[i].me_key));
+                assert(!_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key));
                 return;
             }
         }
     }
     else {
-        PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-        for (i = 0; i < size; i++) {
+        for (i = 0; i < numentries; i++) {
             if ((value = ep0[i].me_value) == NULL)
                 continue;
             if (_PyObject_GC_MAY_BE_TRACKED(value) ||
                 _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
                 return;
         }
     }
     _PyObject_GC_UNTRACK(op);
 }
 
 /* Internal function to find slot for an item from its hash
  * when it is known that the key is not present in the dict.
  */
-static PyDictKeyEntry *
+static Py_ssize_t
 find_empty_slot(PyDictObject *mp, PyObject *key, Py_hash_t hash,
-                PyObject ***value_addr)
+                PyObject ***value_addr, Py_ssize_t *hashpos)
 {
-    size_t i;
-    size_t perturb;
+    size_t i, perturb;
     size_t mask = DK_MASK(mp->ma_keys);
-    PyDictKeyEntry *ep0 = &mp->ma_keys->dk_entries[0];
-    PyDictKeyEntry *ep;
-
+    Py_ssize_t ix;
+    PyDictKeyEntry *ep, *ep0 = DK_ENTRIES(mp->ma_keys);
+
+    assert(hashpos != NULL);
     assert(key != NULL);
     if (!PyUnicode_CheckExact(key))
         mp->ma_keys->dk_lookup = lookdict;
     i = hash & mask;
-    ep = &ep0[i];
-    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
+    ix = dk_get_index(mp->ma_keys, i);
+    for (perturb = hash; ix != DKIX_EMPTY; perturb >>= PERTURB_SHIFT) {
         i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
+        ix = dk_get_index(mp->ma_keys, i & mask);
     }
+    ep = &ep0[mp->ma_keys->dk_nentries];
+    *hashpos = i & mask;
     assert(ep->me_value == NULL);
     if (mp->ma_values)
-        *value_addr = &mp->ma_values[i & mask];
+        *value_addr = &mp->ma_values[ix];
     else
         *value_addr = &ep->me_value;
-    return ep;
+    return ix;
 }
 
 static int
 insertion_resize(PyDictObject *mp)
 {
     return dictresize(mp, GROWTH_RATE(mp));
 }
 
@@ -779,68 +978,89 @@
 Used both by the internal resize routine and by the public insert routine.
 Returns -1 if an error occurred, or 0 on success.
 */
 static int
 insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 {
     PyObject *old_value;
     PyObject **value_addr;
-    PyDictKeyEntry *ep;
-    assert(key != dummy);
+    PyDictKeyEntry *ep, *ep0;
+    Py_ssize_t hashpos, ix;
 
     if (mp->ma_values != NULL && !PyUnicode_CheckExact(key)) {
         if (insertion_resize(mp) < 0)
             return -1;
     }
 
-    ep = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr);
-    if (ep == NULL) {
+    ix = mp->ma_keys->dk_lookup(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR) {
         return -1;
     }
+
     assert(PyUnicode_CheckExact(key) || mp->ma_keys->dk_lookup == lookdict);
     Py_INCREF(value);
     MAINTAIN_TRACKING(mp, key, value);
+
+    /* When insertion order is different from shared key, combine it */
+    if (_PyDict_HasSplitTable(mp) &&
+        ((ix >= 0 && *value_addr == NULL && mp->ma_used != ix) ||
+         (ix == DKIX_EMPTY && mp->ma_used != mp->ma_keys->dk_nentries))) {
+        if (insertion_resize(mp) < 0) {
+            Py_DECREF(value);
+            return -1;
+        }
+        find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        ix = DKIX_EMPTY;
+    }
+
+    if (ix == DKIX_EMPTY) {
+        /* Insert into new slot. */
+        if (mp->ma_keys->dk_usable <= 0) {
+            /* Need to resize. */
+            if (insertion_resize(mp) < 0) {
+                Py_DECREF(value);
+                return -1;
+            }
+            find_empty_slot(mp, key, hash, &value_addr, &hashpos);
+        }
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        ep = &ep0[mp->ma_keys->dk_nentries];
+        dk_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries);
+        Py_INCREF(key);
+        ep->me_key = key;
+        ep->me_hash = hash;
+        if (mp->ma_values) {
+            assert (mp->ma_values[mp->ma_keys->dk_nentries] == NULL);
+            mp->ma_values[mp->ma_keys->dk_nentries] = value;
+        }
+        else {
+            ep->me_value = value;
+        }
+        mp->ma_used++;
+        mp->ma_keys->dk_usable--;
+        mp->ma_keys->dk_nentries++;
+        assert(mp->ma_keys->dk_usable >= 0);
+        return 0;
+    }
+
+    assert(value_addr != NULL);
+
     old_value = *value_addr;
     if (old_value != NULL) {
-        assert(ep->me_key != NULL && ep->me_key != dummy);
         *value_addr = value;
         Py_DECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
+        return 0;
     }
-    else {
-        if (ep->me_key == NULL) {
-            Py_INCREF(key);
-            if (mp->ma_keys->dk_usable <= 0) {
-                /* Need to resize. */
-                if (insertion_resize(mp) < 0) {
-                    Py_DECREF(key);
-                    Py_DECREF(value);
-                    return -1;
-                }
-                ep = find_empty_slot(mp, key, hash, &value_addr);
-            }
-            mp->ma_keys->dk_usable--;
-            assert(mp->ma_keys->dk_usable >= 0);
-            ep->me_key = key;
-            ep->me_hash = hash;
-        }
-        else {
-            if (ep->me_key == dummy) {
-                Py_INCREF(key);
-                ep->me_key = key;
-                ep->me_hash = hash;
-                Py_DECREF(dummy);
-            } else {
-                assert(_PyDict_HasSplitTable(mp));
-            }
-        }
-        mp->ma_used++;
-        *value_addr = value;
-        assert(ep->me_key != NULL && ep->me_key != dummy);
-    }
+
+    /* pending state */
+    assert(_PyDict_HasSplitTable(mp));
+    assert(ix == mp->ma_used);
+    *value_addr = value;
+    mp->ma_used++;
     return 0;
 }
 
 /*
 Internal routine used by dictresize() to insert an item which is
 known to be absent from the dict.  This routine also assumes that
 the dict contains no deleted entries.  Besides the performance benefit,
 using insertdict() in dictresize() is dangerous (SF bug #1456209).
@@ -848,35 +1068,35 @@
 is responsible for incref'ing `key` and `value`.
 Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
 must set them correctly
 */
 static void
 insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
                  PyObject *value)
 {
-    size_t i;
-    size_t perturb;
+    size_t i, perturb;
     PyDictKeysObject *k = mp->ma_keys;
     size_t mask = (size_t)DK_SIZE(k)-1;
-    PyDictKeyEntry *ep0 = &k->dk_entries[0];
+    PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
     PyDictKeyEntry *ep;
 
     assert(k->dk_lookup != NULL);
     assert(value != NULL);
     assert(key != NULL);
-    assert(key != dummy);
     assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
     i = hash & mask;
-    ep = &ep0[i];
-    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
-        i = (i << 2) + i + perturb + 1;
-        ep = &ep0[i & mask];
+    for (perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;
+         perturb >>= PERTURB_SHIFT) {
+        i = mask & ((i << 2) + i + perturb + 1);
     }
+    ep = &ep0[k->dk_nentries];
     assert(ep->me_value == NULL);
+    dk_set_index(k, i, k->dk_nentries);
+    k->dk_nentries++;
     ep->me_key = key;
     ep->me_hash = hash;
     ep->me_value = value;
 }
 
 /*
 Restructure the table by allocating a new table and reinserting all
 items again.  When entries have been deleted, the new table may
@@ -885,23 +1105,23 @@
 then the values are temporarily copied into the table, it is resized as
 a combined table, then the me_value slots in the old table are NULLed out.
 After resizing a table is always combined,
 but can be resplit by make_keys_shared().
 */
 static int
 dictresize(PyDictObject *mp, Py_ssize_t minused)
 {
-    Py_ssize_t newsize;
+    Py_ssize_t i, newsize;
     PyDictKeysObject *oldkeys;
     PyObject **oldvalues;
-    Py_ssize_t i, oldsize;
-
-/* Find the smallest table size > minused. */
-    for (newsize = PyDict_MINSIZE_COMBINED;
+    PyDictKeyEntry *ep0;
+
+    /* Find the smallest table size > minused. */
+    for (newsize = PyDict_MINSIZE;
          newsize <= minused && newsize > 0;
          newsize <<= 1)
         ;
     if (newsize <= 0) {
         PyErr_NoMemory();
         return -1;
     }
     oldkeys = mp->ma_keys;
@@ -909,62 +1129,49 @@
     /* Allocate a new table. */
     mp->ma_keys = new_keys_object(newsize);
     if (mp->ma_keys == NULL) {
         mp->ma_keys = oldkeys;
         return -1;
     }
     if (oldkeys->dk_lookup == lookdict)
         mp->ma_keys->dk_lookup = lookdict;
-    oldsize = DK_SIZE(oldkeys);
     mp->ma_values = NULL;
-    /* If empty then nothing to copy so just return */
-    if (oldsize == 1) {
-        assert(oldkeys == Py_EMPTY_KEYS);
-        DK_DECREF(oldkeys);
-        return 0;
-    }
+    ep0 = DK_ENTRIES(oldkeys);
     /* Main loop below assumes we can transfer refcount to new keys
      * and that value is stored in me_value.
      * Increment ref-counts and copy values here to compensate
      * This (resizing a split table) should be relatively rare */
     if (oldvalues != NULL) {
-        for (i = 0; i < oldsize; i++) {
+        for (i = 0; i < oldkeys->dk_nentries; i++) {
             if (oldvalues[i] != NULL) {
-                Py_INCREF(oldkeys->dk_entries[i].me_key);
-                oldkeys->dk_entries[i].me_value = oldvalues[i];
+                Py_INCREF(ep0[i].me_key);
+                ep0[i].me_value = oldvalues[i];
             }
         }
     }
     /* Main loop */
-    for (i = 0; i < oldsize; i++) {
-        PyDictKeyEntry *ep = &oldkeys->dk_entries[i];
+    for (i = 0; i < oldkeys->dk_nentries; i++) {
+        PyDictKeyEntry *ep = &ep0[i];
         if (ep->me_value != NULL) {
-            assert(ep->me_key != dummy);
             insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
         }
     }
     mp->ma_keys->dk_usable -= mp->ma_used;
     if (oldvalues != NULL) {
         /* NULL out me_value slot in oldkeys, in case it was shared */
-        for (i = 0; i < oldsize; i++)
-            oldkeys->dk_entries[i].me_value = NULL;
-        assert(oldvalues != empty_values);
-        free_values(oldvalues);
+        for (i = 0; i < oldkeys->dk_nentries; i++)
+            ep0[i].me_value = NULL;
         DK_DECREF(oldkeys);
+        if (oldvalues != empty_values) {
+            free_values(oldvalues);
+        }
     }
     else {
         assert(oldkeys->dk_lookup != lookdict_split);
-        if (oldkeys->dk_lookup != lookdict_unicode_nodummy) {
-            PyDictKeyEntry *ep0 = &oldkeys->dk_entries[0];
-            for (i = 0; i < oldsize; i++) {
-                if (ep0[i].me_key == dummy)
-                    Py_DECREF(dummy);
-            }
-        }
         assert(oldkeys->dk_refcnt == 1);
         DK_DEBUG_DECREF PyObject_FREE(oldkeys);
     }
     return 0;
 }
 
 /* Returns NULL if unable to split table.
  * A NULL return does not necessarily indicate an error */
@@ -986,18 +1193,18 @@
         }
         else if (mp->ma_keys->dk_lookup == lookdict_unicode) {
             /* Remove dummy keys */
             if (dictresize(mp, DK_SIZE(mp->ma_keys)))
                 return NULL;
         }
         assert(mp->ma_keys->dk_lookup == lookdict_unicode_nodummy);
         /* Copy values into a new array */
-        ep0 = &mp->ma_keys->dk_entries[0];
-        size = DK_SIZE(mp->ma_keys);
+        ep0 = DK_ENTRIES(mp->ma_keys);
+        size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
         values = new_values(size);
         if (values == NULL) {
             PyErr_SetString(PyExc_MemoryError,
                 "Not enough memory to allocate new values array");
             return NULL;
         }
         for (i = 0; i < size; i++) {
             values[i] = ep0[i].me_value;
@@ -1010,17 +1217,17 @@
     return mp->ma_keys;
 }
 
 PyObject *
 _PyDict_NewPresized(Py_ssize_t minused)
 {
     Py_ssize_t newsize;
     PyDictKeysObject *new_keys;
-    for (newsize = PyDict_MINSIZE_COMBINED;
+    for (newsize = PyDict_MINSIZE;
          newsize <= minused && newsize > 0;
          newsize <<= 1)
         ;
     new_keys = new_keys_object(newsize);
     if (new_keys == NULL)
         return NULL;
     return new_dict(new_keys, NULL);
 }
@@ -1034,18 +1241,18 @@
  * sequence.  A nasty example of the latter is when a Python-coded comparison
  * function hits a stack-depth error, which can cause this to return NULL
  * even if the key is present.
  */
 PyObject *
 PyDict_GetItem(PyObject *op, PyObject *key)
 {
     Py_hash_t hash;
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyThreadState *tstate;
     PyObject **value_addr;
 
     if (!PyDict_Check(op))
         return NULL;
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1)
     {
@@ -1061,96 +1268,96 @@
        Let's just hope that no exception occurs then...  This must be
        _PyThreadState_Current and not PyThreadState_GET() because in debug
        mode, the latter complains if tstate is NULL. */
     tstate = _PyThreadState_UncheckedGet();
     if (tstate != NULL && tstate->curexc_type != NULL) {
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
         PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
         /* ignore errors */
         PyErr_Restore(err_type, err_value, err_tb);
-        if (ep == NULL)
+        if (ix < 0)
             return NULL;
     }
     else {
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-        if (ep == NULL) {
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+        if (ix < 0) {
             PyErr_Clear();
             return NULL;
         }
     }
     return *value_addr;
 }
 
 PyObject *
 _PyDict_GetItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
 {
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyThreadState *tstate;
     PyObject **value_addr;
 
     if (!PyDict_Check(op))
         return NULL;
 
     /* We can arrive here with a NULL tstate during initialization: try
        running "python -Wi" for an example related to string interning.
        Let's just hope that no exception occurs then...  This must be
        _PyThreadState_Current and not PyThreadState_GET() because in debug
        mode, the latter complains if tstate is NULL. */
     tstate = _PyThreadState_UncheckedGet();
     if (tstate != NULL && tstate->curexc_type != NULL) {
         /* preserve the existing exception */
         PyObject *err_type, *err_value, *err_tb;
         PyErr_Fetch(&err_type, &err_value, &err_tb);
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
         /* ignore errors */
         PyErr_Restore(err_type, err_value, err_tb);
-        if (ep == NULL)
+        if (ix == DKIX_EMPTY)
             return NULL;
     }
     else {
-        ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-        if (ep == NULL) {
+        ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+        if (ix == DKIX_EMPTY) {
             PyErr_Clear();
             return NULL;
         }
     }
     return *value_addr;
 }
 
 /* Variant of PyDict_GetItem() that doesn't suppress exceptions.
    This returns NULL *with* an exception set if an exception occurred.
    It returns NULL *without* an exception set if the key wasn't present.
 */
 PyObject *
 PyDict_GetItemWithError(PyObject *op, PyObject *key)
 {
+    Py_ssize_t ix;
     Py_hash_t hash;
     PyDictObject*mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
         return NULL;
     }
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1)
     {
         hash = PyObject_Hash(key);
         if (hash == -1) {
             return NULL;
         }
     }
 
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix < 0)
         return NULL;
     return *value_addr;
 }
 
 PyObject *
 _PyDict_GetItemIdWithError(PyObject *dp, struct _Py_Identifier *key)
 {
     PyObject *kv;
@@ -1165,40 +1372,38 @@
  *
  * Raise an exception and return NULL if an error occurred (ex: computing the
  * key hash failed, key comparison failed, ...). Return NULL if the key doesn't
  * exist. Return the value if the key exists.
  */
 PyObject *
 _PyDict_LoadGlobal(PyDictObject *globals, PyDictObject *builtins, PyObject *key)
 {
+    Py_ssize_t ix;
     Py_hash_t hash;
-    PyDictKeyEntry *entry;
     PyObject **value_addr;
-    PyObject *value;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1)
     {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
 
     /* namespace 1: globals */
-    entry = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr);
-    if (entry == NULL)
+    ix = globals->ma_keys->dk_lookup(globals, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    value = *value_addr;
-    if (value != NULL)
-        return value;
+    if (ix != DKIX_EMPTY && *value_addr != NULL)
+        return *value_addr;
 
     /* namespace 2: builtins */
-    entry = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr);
-    if (entry == NULL)
+    ix = builtins->ma_keys->dk_lookup(builtins, key, hash, &value_addr, NULL);
+    if (ix < 0)
         return NULL;
     return *value_addr;
 }
 
 /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
  * dictionary if it's merely replacing the value for an existing key.
  * This means that it's safe to loop over a dictionary with PyDict_Next()
  * and occasionally replace a value -- but you can't insert new keys or
@@ -1245,85 +1450,65 @@
 
     /* insertdict() handles any resizing that might be necessary */
     return insertdict(mp, key, hash, value);
 }
 
 int
 PyDict_DelItem(PyObject *op, PyObject *key)
 {
-    PyDictObject *mp;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
-    PyObject *old_key, *old_value;
-    PyObject **value_addr;
-
-    if (!PyDict_Check(op)) {
-        PyErr_BadInternalCall();
-        return -1;
-    }
+
     assert(key);
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return -1;
     }
-    mp = (PyDictObject *)op;
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
-        return -1;
-    if (*value_addr == NULL) {
-        _PyErr_SetKeyError(key);
-        return -1;
-    }
-    old_value = *value_addr;
-    *value_addr = NULL;
-    mp->ma_used--;
-    if (!_PyDict_HasSplitTable(mp)) {
-        ENSURE_ALLOWS_DELETIONS(mp);
-        old_key = ep->me_key;
-        Py_INCREF(dummy);
-        ep->me_key = dummy;
-        Py_DECREF(old_key);
-    }
-    Py_DECREF(old_value);
-    return 0;
+
+    return _PyDict_DelItem_KnownHash(op, key, hash);
 }
 
 int
 _PyDict_DelItem_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
 {
+    Py_ssize_t hashpos, ix;
     PyDictObject *mp;
     PyDictKeyEntry *ep;
     PyObject *old_key, *old_value;
     PyObject **value_addr;
 
     if (!PyDict_Check(op)) {
         PyErr_BadInternalCall();
         return -1;
     }
     assert(key);
     assert(hash != -1);
     mp = (PyDictObject *)op;
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return -1;
-    if (*value_addr == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         _PyErr_SetKeyError(key);
         return -1;
     }
+    assert(dk_get_index(mp->ma_keys, hashpos) == ix);
     old_value = *value_addr;
     *value_addr = NULL;
     mp->ma_used--;
-    if (!_PyDict_HasSplitTable(mp)) {
+    if (_PyDict_HasSplitTable(mp)) {
+        mp->ma_keys->dk_usable = 0;
+    }
+    else {
+        ep = &DK_ENTRIES(mp->ma_keys)[ix];
+        dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
         ENSURE_ALLOWS_DELETIONS(mp);
         old_key = ep->me_key;
-        Py_INCREF(dummy);
-        ep->me_key = dummy;
+        ep->me_key = NULL;
         Py_DECREF(old_key);
     }
     Py_DECREF(old_value);
     return 0;
 }
 
 void
 PyDict_Clear(PyObject *op)
@@ -1342,17 +1527,17 @@
         return;
     /* Empty the dict... */
     DK_INCREF(Py_EMPTY_KEYS);
     mp->ma_keys = Py_EMPTY_KEYS;
     mp->ma_values = empty_values;
     mp->ma_used = 0;
     /* ...then clear the keys and values */
     if (oldvalues != NULL) {
-        n = DK_SIZE(oldkeys);
+        n = oldkeys->dk_nentries;
         for (i = 0; i < n; i++)
             Py_CLEAR(oldvalues[i]);
         free_values(oldvalues);
         DK_DECREF(oldkeys);
     }
     else {
        assert(oldkeys->dk_refcnt == 1);
        DK_DECREF(oldkeys);
@@ -1360,40 +1545,43 @@
 }
 
 /* Returns -1 if no more items (or op is not a dict),
  * index of item otherwise. Stores value in pvalue
  */
 Py_LOCAL_INLINE(Py_ssize_t)
 dict_next(PyObject *op, Py_ssize_t i, PyObject **pvalue)
 {
-    Py_ssize_t mask, offset;
+    Py_ssize_t n;
     PyDictObject *mp;
-    PyObject **value_ptr;
-
+    PyObject **value_ptr = NULL;
 
     if (!PyDict_Check(op))
         return -1;
     mp = (PyDictObject *)op;
     if (i < 0)
         return -1;
+
+    n = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
-        value_ptr = &mp->ma_values[i];
-        offset = sizeof(PyObject *);
+        for (; i < n; i++) {
+            value_ptr = &mp->ma_values[i];
+            if (*value_ptr != NULL)
+                break;
+        }
     }
     else {
-        value_ptr = &mp->ma_keys->dk_entries[i].me_value;
-        offset = sizeof(PyDictKeyEntry);
+        PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
+        for (; i < n; i++) {
+            value_ptr = &ep0[i].me_value;
+            if (*value_ptr != NULL)
+                break;
+        }
     }
-    mask = DK_MASK(mp->ma_keys);
-    while (i <= mask && *value_ptr == NULL) {
-        value_ptr = (PyObject **)(((char *)value_ptr) + offset);
-        i++;
-    }
-    if (i > mask)
+    if (i >= n)
         return -1;
     if (pvalue)
         *pvalue = *value_ptr;
     return i;
 }
 
 /*
  * Iterate over a dict.  Use like so:
@@ -1408,51 +1596,54 @@
  * CAUTION:  In general, it isn't safe to use PyDict_Next in a loop that
  * mutates the dict.  One exception:  it is safe if the loop merely changes
  * the values associated with the keys (but doesn't insert new keys or
  * delete keys), via PyDict_SetItem().
  */
 int
 PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
 {
-    PyDictObject *mp;
+    PyDictObject *mp = (PyDictObject*)op;
     Py_ssize_t i = dict_next(op, *ppos, pvalue);
     if (i < 0)
         return 0;
     mp = (PyDictObject *)op;
     *ppos = i+1;
     if (pkey)
-        *pkey = mp->ma_keys->dk_entries[i].me_key;
+        *pkey = DK_ENTRIES(mp->ma_keys)[i].me_key;
     return 1;
 }
 
 /* Internal version of PyDict_Next that returns a hash value in addition
  * to the key and value.
  */
 int
 _PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey,
              PyObject **pvalue, Py_hash_t *phash)
 {
     PyDictObject *mp;
+    PyDictKeyEntry *ep0;
     Py_ssize_t i = dict_next(op, *ppos, pvalue);
     if (i < 0)
         return 0;
     mp = (PyDictObject *)op;
+    ep0 = DK_ENTRIES(mp->ma_keys);
     *ppos = i+1;
-    *phash = mp->ma_keys->dk_entries[i].me_hash;
+    *phash = ep0[i].me_hash;
     if (pkey)
-        *pkey = mp->ma_keys->dk_entries[i].me_key;
+        *pkey = ep0[i].me_key;
     return 1;
 }
 
 /* Internal version of dict.pop(). */
 PyObject *
 _PyDict_Pop(PyDictObject *mp, PyObject *key, PyObject *deflt)
 {
     Py_hash_t hash;
+    Py_ssize_t ix, hashpos;
     PyObject *old_value, *old_key;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (mp->ma_used == 0) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
@@ -1461,35 +1652,36 @@
         return NULL;
     }
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return NULL;
-    old_value = *value_addr;
-    if (old_value == NULL) {
+    if (ix == DKIX_EMPTY) {
         if (deflt) {
             Py_INCREF(deflt);
             return deflt;
         }
         _PyErr_SetKeyError(key);
         return NULL;
     }
+    old_value = *value_addr;
     *value_addr = NULL;
     mp->ma_used--;
     if (!_PyDict_HasSplitTable(mp)) {
+        dk_set_index(mp->ma_keys, hashpos, DKIX_DUMMY);
+        ep = &DK_ENTRIES(mp->ma_keys)[ix];
         ENSURE_ALLOWS_DELETIONS(mp);
         old_key = ep->me_key;
-        Py_INCREF(dummy);
-        ep->me_key = dummy;
+        ep->me_key = NULL;
         Py_DECREF(old_key);
     }
     return old_value;
 }
 
 /* Internal version of dict.from_keys().  It is subclass-friendly. */
 PyObject *
 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)
@@ -1506,17 +1698,17 @@
     if (PyDict_CheckExact(d) && ((PyDictObject *)d)->ma_used == 0) {
         if (PyDict_CheckExact(iterable)) {
             PyDictObject *mp = (PyDictObject *)d;
             PyObject *oldvalue;
             Py_ssize_t pos = 0;
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, Py_SIZE(iterable))) {
+            if (dictresize(mp, ESTIMATE_SIZE(Py_SIZE(iterable)))) {
                 Py_DECREF(d);
                 return NULL;
             }
 
             while (_PyDict_Next(iterable, &pos, &key, &oldvalue, &hash)) {
                 if (insertdict(mp, key, hash, value)) {
                     Py_DECREF(d);
                     return NULL;
@@ -1525,17 +1717,17 @@
             return d;
         }
         if (PyAnySet_CheckExact(iterable)) {
             PyDictObject *mp = (PyDictObject *)d;
             Py_ssize_t pos = 0;
             PyObject *key;
             Py_hash_t hash;
 
-            if (dictresize(mp, PySet_GET_SIZE(iterable))) {
+            if (dictresize(mp, ESTIMATE_SIZE(PySet_GET_SIZE(iterable)))) {
                 Py_DECREF(d);
                 return NULL;
             }
 
             while (_PySet_NextEntry(iterable, &pos, &key, &hash)) {
                 if (insertdict(mp, key, hash, value)) {
                     Py_DECREF(d);
                     return NULL;
@@ -1585,17 +1777,17 @@
 {
     PyObject **values = mp->ma_values;
     PyDictKeysObject *keys = mp->ma_keys;
     Py_ssize_t i, n;
     PyObject_GC_UnTrack(mp);
     Py_TRASHCAN_SAFE_BEGIN(mp)
     if (values != NULL) {
         if (values != empty_values) {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
                 Py_XDECREF(values[i]);
             }
             free_values(values);
         }
         DK_DECREF(keys);
     }
     else if (keys != NULL) {
         assert(keys->dk_refcnt == 1);
@@ -1697,31 +1889,30 @@
 {
     return mp->ma_used;
 }
 
 static PyObject *
 dict_subscript(PyDictObject *mp, PyObject *key)
 {
     PyObject *v;
+    Py_ssize_t ix;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    v = *value_addr;
-    if (v == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
         if (!PyDict_CheckExact(mp)) {
             /* Look up __missing__ method if we're a subclass. */
             PyObject *missing, *res;
             _Py_IDENTIFIER(__missing__);
             missing = _PyObject_LookupSpecial((PyObject *)mp, &PyId___missing__);
             if (missing != NULL) {
                 res = PyObject_CallFunctionObjArgs(missing,
                                                    key, NULL);
@@ -1729,18 +1920,18 @@
                 return res;
             }
             else if (PyErr_Occurred())
                 return NULL;
         }
         _PyErr_SetKeyError(key);
         return NULL;
     }
-    else
-        Py_INCREF(v);
+    v = *value_addr;
+    Py_INCREF(v);
     return v;
 }
 
 static int
 dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
 {
     if (w == NULL)
         return PyDict_DelItem((PyObject *)mp, v);
@@ -1770,18 +1961,18 @@
         return NULL;
     if (n != mp->ma_used) {
         /* Durnit.  The allocations caused the dict to resize.
          * Just start over, this shouldn't normally happen.
          */
         Py_DECREF(v);
         goto again;
     }
-    ep = &mp->ma_keys->dk_entries[0];
-    size = DK_SIZE(mp->ma_keys);
+    ep = DK_ENTRIES(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
     }
     else {
         value_ptr = &ep[0].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
@@ -1813,23 +2004,23 @@
         return NULL;
     if (n != mp->ma_used) {
         /* Durnit.  The allocations caused the dict to resize.
          * Just start over, this shouldn't normally happen.
          */
         Py_DECREF(v);
         goto again;
     }
-    size = DK_SIZE(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
     }
     else {
-        value_ptr = &mp->ma_keys->dk_entries[0].me_value;
+        value_ptr = &(DK_ENTRIES(mp->ma_keys)[0].me_value);
         offset = sizeof(PyDictKeyEntry);
     }
     for (i = 0, j = 0; i < size; i++) {
         PyObject *value = *value_ptr;
         value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         if (value != NULL) {
             Py_INCREF(value);
             PyList_SET_ITEM(v, j, value);
@@ -1870,18 +2061,18 @@
     if (n != mp->ma_used) {
         /* Durnit.  The allocations caused the dict to resize.
          * Just start over, this shouldn't normally happen.
          */
         Py_DECREF(v);
         goto again;
     }
     /* Nothing we do below makes any function calls. */
-    ep = mp->ma_keys->dk_entries;
-    size = DK_SIZE(mp->ma_keys);
+    ep = DK_ENTRIES(mp->ma_keys);
+    size = mp->ma_keys->dk_nentries;
     if (mp->ma_values) {
         value_ptr = mp->ma_values;
         offset = sizeof(PyObject *);
     }
     else {
         value_ptr = &ep[0].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
@@ -2038,17 +2229,17 @@
     return PyDict_Merge(a, b, 1);
 }
 
 int
 PyDict_Merge(PyObject *a, PyObject *b, int override)
 {
     PyDictObject *mp, *other;
     Py_ssize_t i, n;
-    PyDictKeyEntry *entry;
+    PyDictKeyEntry *entry, *ep0;
 
     /* We accept for the argument either a concrete dictionary object,
      * or an abstract "mapping" object.  For the former, we can do
      * things quite efficiently.  For the latter, we only require that
      * PyMapping_Keys() and PyObject_GetItem() be supported.
      */
     if (a == NULL || !PyDict_Check(a) || b == NULL) {
         PyErr_BadInternalCall();
@@ -2068,20 +2259,21 @@
             override = 1;
         /* Do one big resize at the start, rather than
          * incrementally resizing as we insert new items.  Expect
          * that there will be no (or few) overlapping keys.
          */
         if (mp->ma_keys->dk_usable * 3 < other->ma_used * 2)
             if (dictresize(mp, (mp->ma_used + other->ma_used)*2) != 0)
                return -1;
-        for (i = 0, n = DK_SIZE(other->ma_keys); i < n; i++) {
+        ep0 = DK_ENTRIES(other->ma_keys);
+        for (i = 0, n = other->ma_keys->dk_nentries; i < n; i++) {
             PyObject *key, *value;
             Py_hash_t hash;
-            entry = &other->ma_keys->dk_entries[i];
+            entry = &ep0[i];
             key = entry->me_key;
             hash = entry->me_hash;
             if (other->ma_values)
                 value = other->ma_values[i];
             else
                 value = entry->me_value;
 
             if (value != NULL) {
@@ -2090,17 +2282,17 @@
                 Py_INCREF(value);
                 if (override || PyDict_GetItem(a, key) == NULL)
                     err = insertdict(mp, key, hash, value);
                 Py_DECREF(value);
                 Py_DECREF(key);
                 if (err != 0)
                     return -1;
 
-                if (n != DK_SIZE(other->ma_keys)) {
+                if (n != other->ma_keys->dk_nentries) {
                     PyErr_SetString(PyExc_RuntimeError,
                                     "dict mutated during update");
                     return -1;
                 }
             }
         }
     }
     else {
@@ -2165,29 +2357,31 @@
 
     if (o == NULL || !PyDict_Check(o)) {
         PyErr_BadInternalCall();
         return NULL;
     }
     mp = (PyDictObject *)o;
     if (_PyDict_HasSplitTable(mp)) {
         PyDictObject *split_copy;
-        PyObject **newvalues = new_values(DK_SIZE(mp->ma_keys));
+        Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
+        PyObject **newvalues;
+        newvalues = new_values(size);
         if (newvalues == NULL)
             return PyErr_NoMemory();
         split_copy = PyObject_GC_New(PyDictObject, &PyDict_Type);
         if (split_copy == NULL) {
             free_values(newvalues);
             return NULL;
         }
         split_copy->ma_values = newvalues;
         split_copy->ma_keys = mp->ma_keys;
         split_copy->ma_used = mp->ma_used;
         DK_INCREF(mp->ma_keys);
-        for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+        for (i = 0, n = size; i < n; i++) {
             PyObject *value = mp->ma_values[i];
             Py_XINCREF(value);
             split_copy->ma_values[i] = value;
         }
         if (_PyObject_GC_IS_TRACKED(mp))
             _PyObject_GC_TRACK(split_copy);
         return (PyObject *)split_copy;
     }
@@ -2248,35 +2442,35 @@
 dict_equal(PyDictObject *a, PyDictObject *b)
 {
     Py_ssize_t i;
 
     if (a->ma_used != b->ma_used)
         /* can't be equal if # of entries differ */
         return 0;
     /* Same # of entries -- check all of 'em.  Exit early on any diff. */
-    for (i = 0; i < DK_SIZE(a->ma_keys); i++) {
-        PyDictKeyEntry *ep = &a->ma_keys->dk_entries[i];
+    for (i = 0; i < a->ma_keys->dk_nentries; i++) {
+        PyDictKeyEntry *ep = &DK_ENTRIES(a->ma_keys)[i];
         PyObject *aval;
         if (a->ma_values)
             aval = a->ma_values[i];
         else
             aval = ep->me_value;
         if (aval != NULL) {
             int cmp;
             PyObject *bval;
             PyObject **vaddr;
             PyObject *key = ep->me_key;
             /* temporarily bump aval's refcount to ensure it stays
                alive until we're done with it */
             Py_INCREF(aval);
             /* ditto for key */
             Py_INCREF(key);
             /* reuse the known hash value */
-            if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr) == NULL)
+            if ((b->ma_keys->dk_lookup)(b, key, ep->me_hash, &vaddr, NULL) < 0)
                 bval = NULL;
             else
                 bval = *vaddr;
             Py_DECREF(key);
             if (bval == NULL) {
                 Py_DECREF(aval);
                 if (PyErr_Occurred())
                     return -1;
@@ -2324,100 +2518,114 @@
 [clinic start generated code]*/
 
 static PyObject *
 dict___contains__(PyDictObject *self, PyObject *key)
 /*[clinic end generated code: output=a3d03db709ed6e6b input=b852b2a19b51ab24]*/
 {
     register PyDictObject *mp = self;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    return PyBool_FromLong(*value_addr != NULL);
+    if (ix == DKIX_EMPTY || *value_addr == NULL)
+        Py_RETURN_FALSE;
+    Py_RETURN_TRUE;
 }
 
 static PyObject *
 dict_get(PyDictObject *mp, PyObject *args)
 {
     PyObject *key;
     PyObject *failobj = Py_None;
     PyObject *val = NULL;
     Py_hash_t hash;
-    PyDictKeyEntry *ep;
+    Py_ssize_t ix;
     PyObject **value_addr;
 
     if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj))
         return NULL;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
         return NULL;
-    val = *value_addr;
-    if (val == NULL)
+    if (ix == DKIX_EMPTY || *value_addr == NULL)
         val = failobj;
+    else
+        val = *value_addr;
     Py_INCREF(val);
     return val;
 }
 
 PyObject *
 PyDict_SetDefault(PyObject *d, PyObject *key, PyObject *defaultobj)
 {
     PyDictObject *mp = (PyDictObject *)d;
     PyObject *val = NULL;
     Py_hash_t hash;
+    Py_ssize_t hashpos, ix;
     PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyDict_Check(d)) {
         PyErr_BadInternalCall();
         return NULL;
     }
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return NULL;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos);
+    if (ix == DKIX_ERROR)
         return NULL;
-    val = *value_addr;
-    if (val == NULL) {
+    if (ix == DKIX_EMPTY || *value_addr == NULL) {
+        val = defaultobj;
         if (mp->ma_keys->dk_usable <= 0) {
             /* Need to resize. */
             if (insertion_resize(mp) < 0)
                 return NULL;
-            ep = find_empty_slot(mp, key, hash, &value_addr);
+            find_empty_slot(mp, key, hash, &value_addr, &hashpos);
         }
+        ix = mp->ma_keys->dk_nentries;
         Py_INCREF(defaultobj);
         Py_INCREF(key);
         MAINTAIN_TRACKING(mp, key, defaultobj);
+        dk_set_index(mp->ma_keys, hashpos, ix);
+        ep = &DK_ENTRIES(mp->ma_keys)[ix];
         ep->me_key = key;
         ep->me_hash = hash;
-        *value_addr = defaultobj;
-        val = defaultobj;
+        if (mp->ma_values) {
+            mp->ma_values[ix] = val;
+        }
+        else {
+            ep->me_value = val;
+        }
         mp->ma_keys->dk_usable--;
+        mp->ma_keys->dk_nentries++;
         mp->ma_used++;
     }
+    else
+        val = *value_addr;
     return val;
 }
 
 static PyObject *
 dict_setdefault(PyDictObject *mp, PyObject *args)
 {
     PyObject *key, *val;
     PyObject *defaultobj = Py_None;
@@ -2446,21 +2654,20 @@
         return NULL;
 
     return _PyDict_Pop(mp, key, deflt);
 }
 
 static PyObject *
 dict_popitem(PyDictObject *mp)
 {
-    Py_hash_t i = 0;
-    PyDictKeyEntry *ep;
+    Py_ssize_t i, j;
+    PyDictKeyEntry *ep0, *ep;
     PyObject *res;
 
-
     /* Allocate the result tuple before checking the size.  Believe it
      * or not, this allocation could trigger a garbage collection which
      * could empty the dict, so if we checked the size first and that
      * happened, the result would be an infinite loop (searching for an
      * entry that no longer exists).  Note that the usual popitem()
      * idiom is "while d: k, v = d.popitem()". so needing to throw the
      * tuple away if the dict *is* empty isn't a significant
      * inefficiency -- possible, but unlikely in practice.
@@ -2477,71 +2684,63 @@
     /* Convert split table to combined table */
     if (mp->ma_keys->dk_lookup == lookdict_split) {
         if (dictresize(mp, DK_SIZE(mp->ma_keys))) {
             Py_DECREF(res);
             return NULL;
         }
     }
     ENSURE_ALLOWS_DELETIONS(mp);
-    /* Set ep to "the first" dict entry with a value.  We abuse the hash
-     * field of slot 0 to hold a search finger:
-     * If slot 0 has a value, use slot 0.
-     * Else slot 0 is being used to hold a search finger,
-     * and we use its hash value as the first index to look.
-     */
-    ep = &mp->ma_keys->dk_entries[0];
-    if (ep->me_value == NULL) {
-        Py_ssize_t mask = DK_MASK(mp->ma_keys);
-        i = ep->me_hash;
-        /* The hash field may be a real hash value, or it may be a
-         * legit search finger, or it may be a once-legit search
-         * finger that's out of bounds now because it wrapped around
-         * or the table shrunk -- simply make sure it's in bounds now.
-         */
-        if (i > mask || i < 1)
-            i = 1;              /* skip slot 0 */
-        while ((ep = &mp->ma_keys->dk_entries[i])->me_value == NULL) {
-            i++;
-            if (i > mask)
-                i = 1;
-        }
+
+    /* Pop last item */
+    ep0 = DK_ENTRIES(mp->ma_keys);
+    i = mp->ma_keys->dk_nentries - 1;
+    while (i >= 0 && ep0[i].me_value == NULL) {
+        i--;
     }
+    assert(i >= 0);
+
+    ep = &ep0[i];
+    j = lookdict_index(mp->ma_keys, ep->me_hash, i);
+    assert(j >= 0);
+    assert(dk_get_index(mp->ma_keys, j) == i);
+    dk_set_index(mp->ma_keys, j, DKIX_DUMMY);
+
     PyTuple_SET_ITEM(res, 0, ep->me_key);
     PyTuple_SET_ITEM(res, 1, ep->me_value);
-    Py_INCREF(dummy);
-    ep->me_key = dummy;
+    ep->me_key = NULL;
     ep->me_value = NULL;
+    /* We can't dk_usable++ since there is DKIX_DUMMY in indices */
+    mp->ma_keys->dk_nentries = i;
     mp->ma_used--;
-    assert(mp->ma_keys->dk_entries[0].me_value == NULL);
-    mp->ma_keys->dk_entries[0].me_hash = i + 1;  /* next place to start */
     return res;
 }
 
 static int
 dict_traverse(PyObject *op, visitproc visit, void *arg)
 {
     Py_ssize_t i, n;
     PyDictObject *mp = (PyDictObject *)op;
+    PyDictKeyEntry *ep = DK_ENTRIES(mp->ma_keys);
     if (mp->ma_keys->dk_lookup == lookdict) {
-        for (i = 0; i < DK_SIZE(mp->ma_keys); i++) {
-            if (mp->ma_keys->dk_entries[i].me_value != NULL) {
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_key);
+        for (i = 0; i < mp->ma_keys->dk_nentries; i++) {
+            if (ep[i].me_value != NULL) {
+                Py_VISIT(ep[i].me_value);
+                Py_VISIT(ep[i].me_key);
             }
         }
     } else {
         if (mp->ma_values != NULL) {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
+            for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
                 Py_VISIT(mp->ma_values[i]);
             }
         }
         else {
-            for (i = 0, n = DK_SIZE(mp->ma_keys); i < n; i++) {
-                Py_VISIT(mp->ma_keys->dk_entries[i].me_value);
+            for (i = 0, n = mp->ma_keys->dk_nentries; i < n; i++) {
+                Py_VISIT(ep[i].me_value);
             }
         }
     }
     return 0;
 }
 
 static int
 dict_tp_clear(PyObject *op)
@@ -2550,33 +2749,38 @@
     return 0;
 }
 
 static PyObject *dictiter_new(PyDictObject *, PyTypeObject *);
 
 Py_ssize_t
 _PyDict_SizeOf(PyDictObject *mp)
 {
-    Py_ssize_t size, res;
+    Py_ssize_t size, usable, res;
 
     size = DK_SIZE(mp->ma_keys);
+    usable = USABLE_FRACTION(size);
+
     res = _PyObject_SIZE(Py_TYPE(mp));
     if (mp->ma_values)
-        res += size * sizeof(PyObject*);
+        res += usable * sizeof(PyObject*);
     /* If the dictionary is split, the keys portion is accounted-for
        in the type object. */
     if (mp->ma_keys->dk_refcnt == 1)
-        res += sizeof(PyDictKeysObject) + (size-1) * sizeof(PyDictKeyEntry);
+        res += sizeof(PyDictKeysObject) - 8 + DK_IXSIZE(mp->ma_keys) * size +
+            sizeof(PyDictKeyEntry) * usable;
     return res;
 }
 
 Py_ssize_t
 _PyDict_KeysSize(PyDictKeysObject *keys)
 {
-    return sizeof(PyDictKeysObject) + (DK_SIZE(keys)-1) * sizeof(PyDictKeyEntry);
+    return sizeof(PyDictKeysObject) - 8
+        + DK_IXSIZE(keys) * DK_SIZE(keys)
+        + USABLE_FRACTION(DK_SIZE(keys)) * sizeof(PyDictKeyEntry);
 }
 
 static PyObject *
 dict_sizeof(PyDictObject *mp)
 {
     return PyLong_FromSsize_t(_PyDict_SizeOf(mp));
 }
 
@@ -2653,40 +2857,44 @@
     {NULL,              NULL}   /* sentinel */
 };
 
 /* Return 1 if `key` is in dict `op`, 0 if not, and -1 on error. */
 int
 PyDict_Contains(PyObject *op, PyObject *key)
 {
     Py_hash_t hash;
+    Py_ssize_t ix;
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
 
     if (!PyUnicode_CheckExact(key) ||
         (hash = ((PyASCIIObject *) key)->hash) == -1) {
         hash = PyObject_Hash(key);
         if (hash == -1)
             return -1;
     }
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    return (ep == NULL) ? -1 : (*value_addr != NULL);
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
+        return -1;
+    return (ix != DKIX_EMPTY && *value_addr != NULL);
 }
 
 /* Internal version of PyDict_Contains used when the hash value is already known */
 int
 _PyDict_Contains(PyObject *op, PyObject *key, Py_hash_t hash)
 {
     PyDictObject *mp = (PyDictObject *)op;
-    PyDictKeyEntry *ep;
     PyObject **value_addr;
-
-    ep = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr);
-    return (ep == NULL) ? -1 : (*value_addr != NULL);
+    Py_ssize_t ix;
+
+    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, NULL);
+    if (ix == DKIX_ERROR)
+        return -1;
+    return (ix != DKIX_EMPTY && *value_addr != NULL);
 }
 
 /* Hack to implement "key in dict" */
 static PySequenceMethods dict_as_sequence = {
     0,                          /* sq_length */
     0,                          /* sq_concat */
     0,                          /* sq_repeat */
     0,                          /* sq_item */
@@ -2710,17 +2918,17 @@
         return NULL;
     d = (PyDictObject *)self;
 
     /* The object has been implicitly tracked by tp_alloc */
     if (type == &PyDict_Type)
         _PyObject_GC_UNTRACK(d);
 
     d->ma_used = 0;
-    d->ma_keys = new_keys_object(PyDict_MINSIZE_COMBINED);
+    d->ma_keys = new_keys_object(PyDict_MINSIZE);
     if (d->ma_keys == NULL) {
         Py_DECREF(self);
         return NULL;
     }
     return self;
 }
 
 static int
@@ -2938,17 +3146,17 @@
      {"__reduce__", (PyCFunction)dictiter_reduce, METH_NOARGS,
      reduce_doc},
     {NULL,              NULL}           /* sentinel */
 };
 
 static PyObject *dictiter_iternextkey(dictiterobject *di)
 {
     PyObject *key;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n, offset;
     PyDictKeysObject *k;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
     assert (PyDict_Check(d));
 
@@ -2963,29 +3171,29 @@
     if (i < 0)
         goto fail;
     k = d->ma_keys;
     if (d->ma_values) {
         value_ptr = &d->ma_values[i];
         offset = sizeof(PyObject *);
     }
     else {
-        value_ptr = &k->dk_entries[i].me_value;
+        value_ptr = &DK_ENTRIES(k)[i].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
-    mask = DK_SIZE(k)-1;
-    while (i <= mask && *value_ptr == NULL) {
+    n = k->dk_nentries - 1;
+    while (i <= n && *value_ptr == NULL) {
         value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
     }
     di->di_pos = i+1;
-    if (i > mask)
+    if (i > n)
         goto fail;
     di->len--;
-    key = k->dk_entries[i].me_key;
+    key = DK_ENTRIES(k)[i].me_key;
     Py_INCREF(key);
     return key;
 
 fail:
     di->di_dict = NULL;
     Py_DECREF(d);
     return NULL;
 }
@@ -3021,47 +3229,47 @@
     (iternextfunc)dictiter_iternextkey,         /* tp_iternext */
     dictiter_methods,                           /* tp_methods */
     0,
 };
 
 static PyObject *dictiter_iternextvalue(dictiterobject *di)
 {
     PyObject *value;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n, offset;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
     assert (PyDict_Check(d));
 
     if (di->di_used != d->ma_used) {
         PyErr_SetString(PyExc_RuntimeError,
                         "dictionary changed size during iteration");
         di->di_used = -1; /* Make this state sticky */
         return NULL;
     }
 
     i = di->di_pos;
-    mask = DK_SIZE(d->ma_keys)-1;
-    if (i < 0 || i > mask)
+    n = d->ma_keys->dk_nentries - 1;
+    if (i < 0 || i > n)
         goto fail;
     if (d->ma_values) {
         value_ptr = &d->ma_values[i];
         offset = sizeof(PyObject *);
     }
     else {
-        value_ptr = &d->ma_keys->dk_entries[i].me_value;
+        value_ptr = &DK_ENTRIES(d->ma_keys)[i].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
-    while (i <= mask && *value_ptr == NULL) {
+    while (i <= n && *value_ptr == NULL) {
         value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
-        if (i > mask)
+        if (i > n)
             goto fail;
     }
     di->di_pos = i+1;
     di->len--;
     value = *value_ptr;
     Py_INCREF(value);
     return value;
 
@@ -3102,17 +3310,17 @@
     (iternextfunc)dictiter_iternextvalue,       /* tp_iternext */
     dictiter_methods,                           /* tp_methods */
     0,
 };
 
 static PyObject *dictiter_iternextitem(dictiterobject *di)
 {
     PyObject *key, *value, *result = di->di_result;
-    Py_ssize_t i, mask, offset;
+    Py_ssize_t i, n, offset;
     PyDictObject *d = di->di_dict;
     PyObject **value_ptr;
 
     if (d == NULL)
         return NULL;
     assert (PyDict_Check(d));
 
     if (di->di_used != d->ma_used) {
@@ -3120,44 +3328,44 @@
                         "dictionary changed size during iteration");
         di->di_used = -1; /* Make this state sticky */
         return NULL;
     }
 
     i = di->di_pos;
     if (i < 0)
         goto fail;
-    mask = DK_SIZE(d->ma_keys)-1;
+    n = d->ma_keys->dk_nentries - 1;
     if (d->ma_values) {
         value_ptr = &d->ma_values[i];
         offset = sizeof(PyObject *);
     }
     else {
-        value_ptr = &d->ma_keys->dk_entries[i].me_value;
+        value_ptr = &DK_ENTRIES(d->ma_keys)[i].me_value;
         offset = sizeof(PyDictKeyEntry);
     }
-    while (i <= mask && *value_ptr == NULL) {
+    while (i <= n && *value_ptr == NULL) {
         value_ptr = (PyObject **)(((char *)value_ptr) + offset);
         i++;
     }
     di->di_pos = i+1;
-    if (i > mask)
+    if (i > n)
         goto fail;
 
     if (result->ob_refcnt == 1) {
         Py_INCREF(result);
         Py_DECREF(PyTuple_GET_ITEM(result, 0));
         Py_DECREF(PyTuple_GET_ITEM(result, 1));
     } else {
         result = PyTuple_New(2);
         if (result == NULL)
             return NULL;
     }
     di->len--;
-    key = d->ma_keys->dk_entries[i].me_key;
+    key = DK_ENTRIES(d->ma_keys)[i].me_key;
     value = *value_ptr;
     Py_INCREF(key);
     Py_INCREF(value);
     PyTuple_SET_ITEM(result, 0, key);  /* steals reference */
     PyTuple_SET_ITEM(result, 1, value);  /* steals reference */
     return result;
 
 fail:
@@ -3787,17 +3995,17 @@
     return _PyDictView_New(dict, &PyDictValues_Type);
 }
 
 /* Returns NULL if cannot allocate a new PyDictKeysObject,
    but does not set an error */
 PyDictKeysObject *
 _PyDict_NewKeysForClass(void)
 {
-    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE_SPLIT);
+    PyDictKeysObject *keys = new_keys_object(PyDict_MINSIZE);
     if (keys == NULL)
         PyErr_Clear();
     else
         keys->dk_lookup = lookdict_split;
     return keys;
 }
 
 #define CACHED_KEYS(tp) (((PyHeapTypeObject*)tp)->ht_cached_keys)
@@ -3823,17 +4031,17 @@
         }
     }
     Py_XINCREF(dict);
     return dict;
 }
 
 int
 _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr,
-                     PyObject *key, PyObject *value)
+                      PyObject *key, PyObject *value)
 {
     PyObject *dict;
     int res;
     PyDictKeysObject *cached;
 
     assert(dictptr != NULL);
     if ((tp->tp_flags & Py_TPFLAGS_HEAPTYPE) && (cached = CACHED_KEYS(tp))) {
         assert(dictptr != NULL);
@@ -3852,17 +4060,18 @@
                 DK_DECREF(cached);
             }
         } else {
             res = PyDict_SetItem(dict, key, value);
             if (cached != ((PyDictObject *)dict)->ma_keys) {
                 /* Either update tp->ht_cached_keys or delete it */
                 if (cached->dk_refcnt == 1) {
                     CACHED_KEYS(tp) = make_keys_shared(dict);
-                } else {
+                }
+                else {
                     CACHED_KEYS(tp) = NULL;
                 }
                 DK_DECREF(cached);
                 if (CACHED_KEYS(tp) == NULL && PyErr_Occurred())
                     return -1;
             }
         }
     } else {
@@ -3882,55 +4091,8 @@
     return res;
 }
 
 void
 _PyDictKeys_DecRef(PyDictKeysObject *keys)
 {
     DK_DECREF(keys);
 }
-
-
-/* ARGSUSED */
-static PyObject *
-dummy_repr(PyObject *op)
-{
-    return PyUnicode_FromString("<dummy key>");
-}
-
-/* ARGUSED */
-static void
-dummy_dealloc(PyObject* ignore)
-{
-    /* This should never get called, but we also don't want to SEGV if
-     * we accidentally decref dummy-key out of existence.
-     */
-    Py_FatalError("deallocating <dummy key>");
-}
-
-static PyTypeObject PyDictDummy_Type = {
-    PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "<dummy key> type",
-    0,
-    0,
-    dummy_dealloc,      /*tp_dealloc*/ /*never called*/
-    0,                  /*tp_print*/
-    0,                  /*tp_getattr*/
-    0,                  /*tp_setattr*/
-    0,                  /*tp_reserved*/
-    dummy_repr,         /*tp_repr*/
-    0,                  /*tp_as_number*/
-    0,                  /*tp_as_sequence*/
-    0,                  /*tp_as_mapping*/
-    0,                  /*tp_hash */
-    0,                  /*tp_call */
-    0,                  /*tp_str */
-    0,                  /*tp_getattro */
-    0,                  /*tp_setattro */
-    0,                  /*tp_as_buffer */
-    Py_TPFLAGS_DEFAULT, /*tp_flags */
-};
-
-static PyObject _dummy_struct = {
-  _PyObject_EXTRA_INIT
-  2, &PyDictDummy_Type
-};
-
diff -r 54c8d785bd39 Objects/object.c
--- a/Objects/object.c	Sun Aug 14 16:10:31 2016 -0400
+++ b/Objects/object.c	Mon Aug 15 08:12:05 2016 +0900
@@ -17,22 +17,16 @@
 #ifdef Py_REF_DEBUG
 Py_ssize_t _Py_RefTotal;
 
 Py_ssize_t
 _Py_GetRefTotal(void)
 {
     PyObject *o;
     Py_ssize_t total = _Py_RefTotal;
-    /* ignore the references to the dummy object of the dicts and sets
-       because they are not reliable and not useful (now that the
-       hash table code is well-tested) */
-    o = _PyDict_Dummy();
-    if (o != NULL)
-        total -= o->ob_refcnt;
     o = _PySet_Dummy;
     if (o != NULL)
         total -= o->ob_refcnt;
     return total;
 }
 
 void
 _PyDebug_PrintTotalRefs(void) {
diff -r 54c8d785bd39 Objects/odictobject.c
--- a/Objects/odictobject.c	Sun Aug 14 16:10:31 2016 -0400
+++ b/Objects/odictobject.c	Mon Aug 15 08:12:05 2016 +0900
@@ -531,24 +531,27 @@
     }
 }
 
 /* Return the index into the hash table, regardless of a valid node. */
 static Py_ssize_t
 _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
 {
     PyObject **value_addr = NULL;
-    PyDictKeyEntry *ep;
     PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
+    Py_ssize_t ix;
 
-    ep = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr);
-    if (ep == NULL)
+    ix = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr, NULL);
+    if (ix == DKIX_EMPTY) {
+        return keys->dk_nentries;  /* index of new entry */
+    }
+    if (ix < 0)
         return -1;
     /* We use pointer arithmetic to get the entry's index into the table. */
-    return ep - keys->dk_entries;
+    return ix;
 }
 
 /* Replace od->od_fast_nodes with a new table matching the size of dict's. */
 static int
 _odict_resize(PyODictObject *od) {
     Py_ssize_t size, i;
     _ODictNode **fast_nodes, *node;
 
@@ -560,17 +563,17 @@
         return -1;
     }
     for (i = 0; i < size; i++)
         fast_nodes[i] = NULL;
 
     /* Copy the current nodes into the table. */
     _odict_FOREACH(od, node) {
         i = _odict_get_index_raw(od, _odictnode_KEY(node),
-                                  _odictnode_HASH(node));
+                                 _odictnode_HASH(node));
         if (i < 0) {
             PyMem_FREE(fast_nodes);
             return -1;
         }
         fast_nodes[i] = node;
     }
 
     /* Replace the old fast nodes table. */
diff -r 54c8d785bd39 PC/pyconfig.h
--- a/PC/pyconfig.h	Sun Aug 14 16:10:31 2016 -0400
+++ b/PC/pyconfig.h	Mon Aug 15 08:12:05 2016 +0900
@@ -380,18 +380,23 @@
 #else
 /* VC6, VS 2002 and eVC4 don't support the C99 LL suffix for 64-bit integer literals */
 #define Py_LL(x) x##I64
 #endif  /* _MSC_VER > 1300  */
 #endif  /* _MSC_VER */
 
 #endif
 
-/* define signed and unsigned exact-width 32-bit and 64-bit types, used in the
-   implementation of Python integers. */
+/* define signed and unsigned exact-width 16, 32, and 64-bit types, used in the
+   implementation of Python integers and dict. */
+#ifndef PY_UINT16_T
+#define HAVE_UINT16_T 1
+#define PY_UINT16_T unsigned short
+#endif
+
 #ifndef PY_UINT32_T
 #if SIZEOF_INT == 4
 #define HAVE_UINT32_T 1
 #define PY_UINT32_T unsigned int
 #elif SIZEOF_LONG == 4
 #define HAVE_UINT32_T 1
 #define PY_UINT32_T unsigned long
 #endif
@@ -399,16 +404,21 @@
 
 #ifndef PY_UINT64_T
 #if SIZEOF_LONG_LONG == 8
 #define HAVE_UINT64_T 1
 #define PY_UINT64_T unsigned PY_LONG_LONG
 #endif
 #endif
 
+#ifndef PY_INT16_T
+#define HAVE_INT16_T 1
+#define PY_INT16_T short
+#endif
+
 #ifndef PY_INT32_T
 #if SIZEOF_INT == 4
 #define HAVE_INT32_T 1
 #define PY_INT32_T int
 #elif SIZEOF_LONG == 4
 #define HAVE_INT32_T 1
 #define PY_INT32_T long
 #endif
diff -r 54c8d785bd39 configure
--- a/configure	Sun Aug 14 16:10:31 2016 -0400
+++ b/configure	Mon Aug 15 08:12:05 2016 +0900
@@ -8106,16 +8106,36 @@
 
 # There are two separate checks for each of the exact-width integer types we
 # need.  First we check whether the type is available using the usual
 # AC_CHECK_TYPE macro with the default includes (which includes <inttypes.h>
 # and <stdint.h> where available).  We then also use the special type checks of
 # the form AC_TYPE_UINT32_T, which in the case that uint32_t is not available
 # directly, #define's uint32_t to be a suitable type.
 
+ac_fn_c_check_type "$LINENO" "uint16_t" "ac_cv_type_uint16_t" "$ac_includes_default"
+if test "x$ac_cv_type_uint16_t" = xyes; then :
+
+$as_echo "#define HAVE_UINT16_T 1" >>confdefs.h
+
+fi
+
+ac_fn_c_find_uintX_t "$LINENO" "16" "ac_cv_c_uint16_t"
+case $ac_cv_c_uint16_t in #(
+  no|yes) ;; #(
+  *)
+
+
+cat >>confdefs.h <<_ACEOF
+#define uint16_t $ac_cv_c_uint16_t
+_ACEOF
+;;
+  esac
+
+
 ac_fn_c_check_type "$LINENO" "uint32_t" "ac_cv_type_uint32_t" "$ac_includes_default"
 if test "x$ac_cv_type_uint32_t" = xyes; then :
 
 $as_echo "#define HAVE_UINT32_T 1" >>confdefs.h
 
 fi
 
 ac_fn_c_find_uintX_t "$LINENO" "32" "ac_cv_c_uint32_t"
@@ -8150,16 +8170,35 @@
 
 cat >>confdefs.h <<_ACEOF
 #define uint64_t $ac_cv_c_uint64_t
 _ACEOF
 ;;
   esac
 
 
+ac_fn_c_check_type "$LINENO" "int16_t" "ac_cv_type_int16_t" "$ac_includes_default"
+if test "x$ac_cv_type_int16_t" = xyes; then :
+
+$as_echo "#define HAVE_INT16_T 1" >>confdefs.h
+
+fi
+
+ac_fn_c_find_intX_t "$LINENO" "16" "ac_cv_c_int16_t"
+case $ac_cv_c_int16_t in #(
+  no|yes) ;; #(
+  *)
+
+cat >>confdefs.h <<_ACEOF
+#define int16_t $ac_cv_c_int16_t
+_ACEOF
+;;
+esac
+
+
 ac_fn_c_check_type "$LINENO" "int32_t" "ac_cv_type_int32_t" "$ac_includes_default"
 if test "x$ac_cv_type_int32_t" = xyes; then :
 
 $as_echo "#define HAVE_INT32_T 1" >>confdefs.h
 
 fi
 
 ac_fn_c_find_intX_t "$LINENO" "32" "ac_cv_c_int32_t"
diff -r 54c8d785bd39 configure.ac
--- a/configure.ac	Sun Aug 14 16:10:31 2016 -0400
+++ b/configure.ac	Mon Aug 15 08:12:05 2016 +0900
@@ -2036,24 +2036,32 @@
 
 # There are two separate checks for each of the exact-width integer types we
 # need.  First we check whether the type is available using the usual
 # AC_CHECK_TYPE macro with the default includes (which includes <inttypes.h>
 # and <stdint.h> where available).  We then also use the special type checks of
 # the form AC_TYPE_UINT32_T, which in the case that uint32_t is not available
 # directly, #define's uint32_t to be a suitable type.
 
+AC_CHECK_TYPE(uint16_t,
+  AC_DEFINE(HAVE_UINT16_T, 1, [Define if your compiler provides uint16_t.]),,)
+AC_TYPE_UINT16_T
+
 AC_CHECK_TYPE(uint32_t,
   AC_DEFINE(HAVE_UINT32_T, 1, [Define if your compiler provides uint32_t.]),,)
 AC_TYPE_UINT32_T
 
 AC_CHECK_TYPE(uint64_t,
   AC_DEFINE(HAVE_UINT64_T, 1, [Define if your compiler provides uint64_t.]),,)
 AC_TYPE_UINT64_T
 
+AC_CHECK_TYPE(int16_t,
+  AC_DEFINE(HAVE_INT16_T, 1, [Define if your compiler provides int16_t.]),,)
+AC_TYPE_INT16_T
+
 AC_CHECK_TYPE(int32_t,
   AC_DEFINE(HAVE_INT32_T, 1, [Define if your compiler provides int32_t.]),,)
 AC_TYPE_INT32_T
 
 AC_CHECK_TYPE(int64_t,
   AC_DEFINE(HAVE_INT64_T, 1, [Define if your compiler provides int64_t.]),,)
 AC_TYPE_INT64_T
 
diff -r 54c8d785bd39 pyconfig.h.in
--- a/pyconfig.h.in	Sun Aug 14 16:10:31 2016 -0400
+++ b/pyconfig.h.in	Mon Aug 15 08:12:05 2016 +0900
@@ -479,16 +479,19 @@
 #undef HAVE_INET_ATON
 
 /* Define if you have the 'inet_pton' function. */
 #undef HAVE_INET_PTON
 
 /* Define to 1 if you have the `initgroups' function. */
 #undef HAVE_INITGROUPS
 
+/* Define if your compiler provides int16_t. */
+#undef HAVE_INT16_T
+
 /* Define if your compiler provides int32_t. */
 #undef HAVE_INT32_T
 
 /* Define if your compiler provides int64_t. */
 #undef HAVE_INT64_T
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
@@ -1135,16 +1138,19 @@
 
 /* Define to 1 if you don't have `tm_zone' but do have the external array
    `tzname'. */
 #undef HAVE_TZNAME
 
 /* Define this if you have tcl and TCL_UTF_MAX==6 */
 #undef HAVE_UCS4_TCL
 
+/* Define if your compiler provides uint16_t. */
+#undef HAVE_UINT16_T
+
 /* Define if your compiler provides uint32_t. */
 #undef HAVE_UINT32_T
 
 /* Define if your compiler provides uint64_t. */
 #undef HAVE_UINT64_T
 
 /* Define to 1 if the system has the type `uintptr_t'. */
 #undef HAVE_UINTPTR_T
@@ -1513,16 +1519,20 @@
 #undef gid_t
 
 /* Define to `__inline__' or `__inline' if that's what the C compiler
    calls it, or to nothing if 'inline' is not supported under any name.  */
 #ifndef __cplusplus
 #undef inline
 #endif
 
+/* Define to the type of a signed integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+#undef int16_t
+
 /* Define to the type of a signed integer type of width exactly 32 bits if
    such a type exists and the standard includes do not define it. */
 #undef int32_t
 
 /* Define to the type of a signed integer type of width exactly 64 bits if
    such a type exists and the standard includes do not define it. */
 #undef int64_t
 
@@ -1542,16 +1552,20 @@
 #undef size_t
 
 /* Define to `int' if <sys/socket.h> does not define. */
 #undef socklen_t
 
 /* Define to `int' if <sys/types.h> doesn't define. */
 #undef uid_t
 
+/* Define to the type of an unsigned integer type of width exactly 16 bits if
+   such a type exists and the standard includes do not define it. */
+#undef uint16_t
+
 /* Define to the type of an unsigned integer type of width exactly 32 bits if
    such a type exists and the standard includes do not define it. */
 #undef uint32_t
 
 /* Define to the type of an unsigned integer type of width exactly 64 bits if
    such a type exists and the standard includes do not define it. */
 #undef uint64_t