diff -r 3e02d70cd07b Modules/_pickle.c
--- a/Modules/_pickle.c	Thu Apr 18 01:44:27 2013 +0200
+++ b/Modules/_pickle.c	Thu Apr 18 02:37:35 2013 -0700
@@ -380,6 +380,9 @@
 } UnpicklerObject;
 
 /* Forward declarations */
+typedef int (*save_func_t)(PicklerObject *, PyObject *);
+static int save_with_type_caching(PicklerObject *, PyObject *, int,
+                                 PyTypeObject **, save_func_t *);
 static int save(PicklerObject *, PyObject *, int);
 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
 static PyTypeObject Pickler_Type;
@@ -489,6 +492,8 @@
     PyMemoEntry *entry;
     Py_hash_t hash = (Py_hash_t)key >> 3;
 
+    assert(key != NULL);
+
     i = hash & mask;
     entry = &table[i];
     if (entry->me_key == NULL || entry->me_key == key)
@@ -1282,6 +1287,8 @@
     if (self->fast)
         return 0;
 
+    assert(!PyMemoTable_Get(self->memo, obj));
+
     x = PyMemoTable_Size(self->memo);
     if (PyMemoTable_Set(self->memo, obj, x) < 0)
         goto error;
@@ -1707,6 +1714,15 @@
 static int
 save_bytes(PicklerObject *self, PyObject *obj)
 {
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     if (self->proto < 3) {
         /* Older pickle protocols do not have an opcode for pickling bytes
            objects. Therefore, we need to fake the copy protocol (i.e.,
@@ -1932,6 +1948,15 @@
 static int
 save_unicode(PicklerObject *self, PyObject *obj)
 {
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     if (self->bin) {
         if (write_unicode_binary(self, obj) < 0)
             return -1;
@@ -2003,6 +2028,15 @@
     const char pop_mark_op = POP_MARK;
     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
 
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     if ((len = PyTuple_Size(obj)) < 0)
         return -1;
 
@@ -2219,6 +2253,8 @@
 {
     PyObject *item = NULL;
     Py_ssize_t this_batch, total;
+    save_func_t save_func = NULL;
+    PyTypeObject *cached_type = NULL;
 
     const char append_op = APPEND;
     const char appends_op = APPENDS;
@@ -2245,8 +2281,22 @@
             return -1;
         while (total < PyList_GET_SIZE(obj)) {
             item = PyList_GET_ITEM(obj, total);
-            if (save(self, item, 0) < 0)
+
+            /* If the type of this item is the same as the previous item,
+               reuse the pickling function that was used for the previous
+               item. This saves us the dispatch cost in the save() function,
+               which can be significant in this hot loop. We disable this
+               optimization if the persistent IDs feature of pickle is being
+               used. */
+            if (Py_TYPE(item) == cached_type && !self->pers_func) {
+                if (save_func(self, item) < 0)
+                    return -1;
+            }
+            else if (save_with_type_caching(self, item, 0,
+                                            &cached_type,
+                                            &save_func) < 0) {
                 return -1;
+            }
             total++;
             if (++this_batch == BATCHSIZE)
                 break;
@@ -2269,6 +2319,15 @@
     if (self->fast && !fast_save_enter(self, obj))
         goto error;
 
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     /* Create an empty list. */
     if (self->bin) {
         header[0] = EMPTY_LIST;
@@ -2470,6 +2529,8 @@
     PyObject *key = NULL, *value = NULL;
     int i;
     Py_ssize_t dict_size, ppos = 0;
+    save_func_t key_save_func = NULL, value_save_func = NULL;
+    PyTypeObject *key_cached_type = NULL, *value_cached_type = NULL;
 
     const char mark_op = MARK;
     const char setitem_op = SETITEM;
@@ -2497,14 +2558,36 @@
         i = 0;
         if (_Pickler_Write(self, &mark_op, 1) < 0)
             return -1;
+
         while (PyDict_Next(obj, &ppos, &key, &value)) {
-            if (save(self, key, 0) < 0)
+            /* If the type of this item is the same as the previous item,
+               reuse the pickling function that was used for the previous
+               item. This saves us the dispatch cost in the save() function,
+               which can be significant in this hot loop. We disable this
+               optimization if the persistent IDs feature of pickle is being
+               used. */
+            if (Py_TYPE(key) == key_cached_type && !self->pers_func) {
+                if (key_save_func(self, key) < 0)
+                    return -1;
+            }
+            else if (save_with_type_caching(self, key, 0,
+                                            &key_cached_type,
+                                            &key_save_func) < 0) {
                 return -1;
-            if (save(self, value, 0) < 0)
+            }
+            if (Py_TYPE(value) == value_cached_type && !self->pers_func) {
+                if (value_save_func(self, value) < 0)
+                    return -1;
+            }
+            else if (save_with_type_caching(self, value, 0,
+                                            &value_cached_type,
+                                            &value_save_func) < 0) {
                 return -1;
+            }
             if (++i == BATCHSIZE)
                 break;
         }
+
         if (_Pickler_Write(self, &setitems_op, 1) < 0)
             return -1;
         if (PyDict_Size(obj) != dict_size) {
@@ -2529,6 +2612,15 @@
     if (self->fast && !fast_save_enter(self, obj))
         goto error;
 
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     /* Create an empty dict. */
     if (self->bin) {
         header[0] = EMPTY_DICT;
@@ -2602,6 +2694,15 @@
 
     const char global_op = GLOBAL;
 
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     if (name_str == NULL) {
         name_str = PyUnicode_InternFromString("__name__");
         if (name_str == NULL)
@@ -2964,6 +3065,15 @@
     const char build_op = BUILD;
     const char newobj_op = NEWOBJ;
 
+    /* Check the memo to see if it has the object. If so, generate
+       a GET (or BINGET) opcode, instead of pickling the object
+       once again. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     size = PyTuple_Size(args);
     if (size < 2 || size > 5) {
         PyErr_SetString(PicklingError, "tuple returned by "
@@ -3137,7 +3247,8 @@
 }
 
 static int
-save(PicklerObject *self, PyObject *obj, int pers_save)
+save_with_type_caching(PicklerObject *self, PyObject *obj, int pers_save,
+                       PyTypeObject **cached_type, save_func_t *save_func)
 {
     PyTypeObject *type;
     PyObject *reduce_func = NULL;
@@ -3166,8 +3277,6 @@
        since benchmarks shown that this optimization was actually slowing
        things down. */
 
-    /* Atom types; these aren't memoized, so don't check the memo. */
-
     if (obj == Py_None) {
         status = save_none(self, obj);
         goto done;
@@ -3180,46 +3289,52 @@
         status = save_notimplemented(self, obj);
         goto done;
     }
-    else if (obj == Py_False || obj == Py_True) {
+    else if (type == &PyBool_Type) {
         status = save_bool(self, obj);
+        *cached_type = type;
+        *save_func = save_bool;
         goto done;
     }
     else if (type == &PyLong_Type) {
         status = save_long(self, obj);
+        *cached_type = type;
+        *save_func = save_long;
         goto done;
     }
     else if (type == &PyFloat_Type) {
         status = save_float(self, obj);
+        *cached_type = type;
+        *save_func = save_float;
         goto done;
     }
-
-    /* Check the memo to see if it has the object. If so, generate
-       a GET (or BINGET) opcode, instead of pickling the object
-       once again. */
-    if (PyMemoTable_Get(self->memo, obj)) {
-        if (memo_get(self, obj) < 0)
-            goto error;
-        goto done;
-    }
-
-    if (type == &PyBytes_Type) {
+    else if (type == &PyBytes_Type) {
         status = save_bytes(self, obj);
+        *cached_type = type;
+        *save_func = save_bytes;
         goto done;
     }
     else if (type == &PyUnicode_Type) {
         status = save_unicode(self, obj);
+        *cached_type = type;
+        *save_func = save_unicode;
         goto done;
     }
     else if (type == &PyDict_Type) {
         status = save_dict(self, obj);
+        *cached_type = type;
+        *save_func = save_dict;
         goto done;
     }
     else if (type == &PyList_Type) {
         status = save_list(self, obj);
+        *cached_type = type;
+        *save_func = save_list;
         goto done;
     }
     else if (type == &PyTuple_Type) {
         status = save_tuple(self, obj);
+        *cached_type = type;
+        *save_func = save_tuple;
         goto done;
     }
     else if (type == &PyType_Type) {
@@ -3241,6 +3356,16 @@
         goto done;
     }
 
+    /* We don't really need to check the memo here, since save_global() and
+       save_reduce() will check it for us. But it is cheap to verify before
+       going through the reduce protocol again and again for objects which
+       are already memoized. */
+    if (PyMemoTable_Get(self->memo, obj)) {
+        if (memo_get(self, obj) < 0)
+            return -1;
+        return 0;
+    }
+
     /* XXX: This part needs some unit tests. */
 
     /* Get a reduction callable, and call it.  This may come from
@@ -3347,6 +3472,16 @@
 }
 
 static int
+save(PicklerObject *self, PyObject *obj, int pers_save)
+{
+    PyTypeObject *cached_type = NULL;
+    save_func_t save_func = NULL;
+    return save_with_type_caching(self, obj, pers_save,
+                                  &cached_type,
+                                  &save_func);
+}
+
+static int
 dump(PicklerObject *self, PyObject *obj)
 {
     const char stop_op = STOP;