diff -r 1267d64c14b3 Lib/sched.py
--- a/Lib/sched.py	Thu Nov 01 14:52:03 2012 +0200
+++ b/Lib/sched.py	Thu Nov 01 18:06:28 2012 +0200
@@ -124,27 +124,29 @@
         """
         # localize variable access to minimize overhead
         # and to improve thread safety
-        with self._lock:
-            q = self._queue
-            delayfunc = self.delayfunc
-            timefunc = self.timefunc
-            pop = heapq.heappop
-            while q:
-                time, priority, action, argument, kwargs = checked_event = q[0]
+        lock = self._lock
+        q = self._queue
+        delayfunc = self.delayfunc
+        timefunc = self.timefunc
+        pop = heapq.heappop
+        while True:
+            with lock:
+                if not q:
+                    break
+                time, priority, action, argument, kwargs = q[0]
                 now = timefunc()
-                if now < time:
-                    if not blocking:
-                        return time - now
-                    delayfunc(time - now)
+                if time > now:
+                    delay = True
                 else:
-                    event = pop(q)
-                    # Verify that the event was not removed or altered
-                    # by another thread after we last looked at q[0].
-                    if event is checked_event:
-                        action(*argument, **kwargs)
-                        delayfunc(0)   # Let other threads run
-                    else:
-                        heapq.heappush(q, event)
+                    delay = False
+                    pop(q)
+            if delay:
+                if not blocking:
+                    return time - now
+                delayfunc(time - now)
+            else:
+                action(*argument, **kwargs)
+                delayfunc(0)   # Let other threads run
 
     @property
     def queue(self):
diff -r 1267d64c14b3 Lib/test/test_sched.py
--- a/Lib/test/test_sched.py	Thu Nov 01 14:52:03 2012 +0200
+++ b/Lib/test/test_sched.py	Thu Nov 01 18:06:28 2012 +0200
@@ -2,9 +2,13 @@
 
 import sched
 import time
+import threading
 import unittest
 from test import support
-
+try:
+    import threading
+except ImportError:
+    threading = None
 
 class TestCase(unittest.TestCase):
 
@@ -26,6 +30,20 @@
         scheduler.run()
         self.assertEqual(l, [0.01, 0.02, 0.03, 0.04, 0.05])
 
+    @unittest.skipUnless(threading, 'Threading required for this test.')
+    def test_enter_concurrent(self):
+        l = []
+        fun = lambda x: l.append(x)
+        scheduler = sched.scheduler(time.time, time.sleep)
+        scheduler.enter(0.03, 1, fun, (0.03,))
+        t = threading.Thread(target=scheduler.run)
+        t.start()
+        for x in [0.05, 0.04, 0.02, 0.01]:
+            z = scheduler.enter(x, 1, fun, (x,))
+        scheduler.run()
+        t.join()
+        self.assertEqual(l, [0.01, 0.02, 0.03, 0.04, 0.05])
+
     def test_priority(self):
         l = []
         fun = lambda x: l.append(x)
@@ -50,6 +68,24 @@
         scheduler.run()
         self.assertEqual(l, [0.02, 0.03, 0.04])
 
+    @unittest.skipUnless(threading, 'Threading required for this test.')
+    def test_cancel_concurrent(self):
+        l = []
+        fun = lambda x: l.append(x)
+        scheduler = sched.scheduler(time.time, time.sleep)
+        now = time.time()
+        event1 = scheduler.enterabs(now + 0.01, 1, fun, (0.01,))
+        event2 = scheduler.enterabs(now + 0.02, 1, fun, (0.02,))
+        event3 = scheduler.enterabs(now + 0.03, 1, fun, (0.03,))
+        event4 = scheduler.enterabs(now + 0.04, 1, fun, (0.04,))
+        event5 = scheduler.enterabs(now + 0.05, 1, fun, (0.05,))
+        t = threading.Thread(target=scheduler.run)
+        t.start()
+        scheduler.cancel(event1)
+        scheduler.cancel(event5)
+        t.join()
+        self.assertEqual(l, [0.02, 0.03, 0.04])
+
     def test_empty(self):
         l = []
         fun = lambda x: l.append(x)
@@ -97,7 +133,6 @@
         scheduler.run(blocking=False)
         self.assertEqual(l, [])
 
-
 def test_main():
     support.run_unittest(TestCase)
 
diff -r 1267d64c14b3 Lib/test/test_zlib.py
--- a/Lib/test/test_zlib.py	Thu Nov 01 14:52:03 2012 +0200
+++ b/Lib/test/test_zlib.py	Thu Nov 01 18:06:28 2012 +0200
@@ -487,6 +487,37 @@
         dco.flush()
         self.assertFalse(dco.eof)
 
+    def test_decompress_unused_data(self):
+        payload = b'abcdefghijklmnopqrstuvwxyz'
+        tail = b'0123456789'
+        x = zlib.compress(payload) + tail
+        print('x = %r' % x)
+        for step in 1, 2, 100:
+            dco = zlib.decompressobj()
+            #data = b''.join(dco.decompress(dco.unconsumed_tail + x[i:i+step], 1)
+                            #for i in range(0, len(x), step))
+            data = b''
+            for i in range(0, len(x), step):
+                data += dco.decompress(dco.unconsumed_tail + x[i:i+step], 1000)
+                print('chunk = %r' % x[i:i+step])
+                print('data = %r' % data)
+                print('dco.unconsumed_tail = %r' % dco.unconsumed_tail)
+            while True:
+                c = dco.decompress(dco.unconsumed_tail, 1)
+                if not c: break
+                data += c
+            print('data = %r' % data)
+            print('dco.unconsumed_tail = %r' % dco.unconsumed_tail)
+            data += dco.flush()
+            print('flush')
+            print('data = %r' % data)
+            print('dco.unconsumed_tail = %r' % dco.unconsumed_tail)
+
+            self.assertTrue(dco.eof)
+            self.assertEqual(data, payload)
+            self.assertEqual(dco.unconsumed_tail, tail)
+            self.assertEqual(dco.unused_data, tail)
+
     if hasattr(zlib.compressobj(), "copy"):
         def test_compresscopy(self):
             # Test copying a compression object
diff -r 1267d64c14b3 Makefile.pre.in
--- a/Makefile.pre.in	Thu Nov 01 14:52:03 2012 +0200
+++ b/Makefile.pre.in	Thu Nov 01 18:06:28 2012 +0200
@@ -220,7 +220,7 @@
 # Used of signalmodule.o is not available
 SIGNAL_OBJS=	@SIGNAL_OBJS@
 
-IO_H=		Modules/_io/_iomodule.h
+IO_H=		$(srcdir)/Modules/_io/_iomodule.h
 
 IO_OBJS=	\
 		Modules/_io/_iomodule.o \
@@ -688,28 +688,30 @@
 				$(srcdir)/Objects/unicodetype_db.h
 
 BYTESTR_DEPS = \
-		$(srcdir)/Include/bytes_methods.h \
 		$(srcdir)/Objects/stringlib/count.h \
 		$(srcdir)/Objects/stringlib/ctype.h \
-		$(srcdir)/Objects/stringlib/eq.h \
 		$(srcdir)/Objects/stringlib/fastsearch.h \
 		$(srcdir)/Objects/stringlib/find.h \
-		$(srcdir)/Objects/stringlib/find_max_char.h \
 		$(srcdir)/Objects/stringlib/join.h \
 		$(srcdir)/Objects/stringlib/partition.h \
 		$(srcdir)/Objects/stringlib/split.h \
 		$(srcdir)/Objects/stringlib/stringdefs.h \
-		$(srcdir)/Objects/stringlib/transmogrify.h \
-		$(srcdir)/Objects/stringlib/unicodedefs.h \
-		$(srcdir)/Objects/stringlib/localeutil.h \
-		$(srcdir)/Objects/stringlib/undef.h
+		$(srcdir)/Objects/stringlib/transmogrify.h
 
-UNICODE_DEPS = $(BYTESTR_DEPS) \
+UNICODE_DEPS = \
 		$(srcdir)/Objects/stringlib/asciilib.h \
 		$(srcdir)/Objects/stringlib/codecs.h \
+		$(srcdir)/Objects/stringlib/count.h \
+		$(srcdir)/Objects/stringlib/fastsearch.h \
+		$(srcdir)/Objects/stringlib/find.h \
+		$(srcdir)/Objects/stringlib/find_max_char.h \
+		$(srcdir)/Objects/stringlib/localeutil.h \
+		$(srcdir)/Objects/stringlib/partition.h \
+		$(srcdir)/Objects/stringlib/split.h \
 		$(srcdir)/Objects/stringlib/ucs1lib.h \
 		$(srcdir)/Objects/stringlib/ucs2lib.h \
 		$(srcdir)/Objects/stringlib/ucs4lib.h \
+		$(srcdir)/Objects/stringlib/undef.h \
 		$(srcdir)/Objects/stringlib/unicode_format.h \
 		$(srcdir)/Objects/stringlib/unicodedefs.h
 
@@ -727,15 +729,43 @@
 
 Python/ceval.o: $(OPCODETARGETS_H) $(srcdir)/Python/ceval_gil.h
 
-Python/formatter_unicode.o: $(srcdir)/Python/formatter_unicode.c \
-				$(BYTESTR_DEPS)
-
 Python/frozen.o: Python/importlib.h
 
 Objects/typeobject.o: Objects/typeslots.inc
 Objects/typeslots.inc: $(srcdir)/Include/typeslots.h $(srcdir)/Objects/typeslots.py
 	$(PYTHON) $(srcdir)/Objects/typeslots.py < $(srcdir)/Include/typeslots.h > Objects/typeslots.inc
 
+# cjkcodecs module
+CJKCODECS_DEPS = $(srcdir)/Modules/cjkcodecs/cjkcodecs.h \
+		$(srcdir)/Modules/cjkcodecs/multibytecodec.h
+# Modules/cjkcodecs/_codecs_cn.o Modules/cjkcodecs/_codecs_hk.o Modules/cjkcodecs/_codecs_iso2022.o Modules/cjkcodecs/_codecs_jp.o Modules/cjkcodecs/_codecs_kr.o Modules/cjkcodecs/_codecs_tw.o: $(srcdir)/Modules/cjkcodecs/cjkcodecs.h $(srcdir)/Modules/cjkcodecs/multibytecodec.h
+Modules/cjkcodecs/_codecs_cn.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/mappings_cn.h
+Modules/cjkcodecs/_codecs_hk.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/mappings_hk.h
+Modules/cjkcodecs/_codecs_iso2022.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/alg_jisx0201.h \
+		$(srcdir)/Modules/cjkcodecs/emu_jisx0213_2000.h \
+		$(srcdir)/Modules/cjkcodecs/mappings_jisx0213_pair.h
+Modules/cjkcodecs/_codecs_jp.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/mappings_jp.h \
+		$(srcdir)/Modules/cjkcodecs/mappings_jisx0213_pair.h \
+		$(srcdir)/Modules/cjkcodecs/alg_jisx0201.h \
+		$(srcdir)/Modules/cjkcodecs/emu_jisx0213_2000.h
+Modules/cjkcodecs/_codecs_kr.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/mappings_kr.h
+Modules/cjkcodecs/_codecs_tw.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/mappings_tw.h
+Modules/cjkcodecs/multibytecodec.o: $(CJKCODECS_DEPS) \
+		$(srcdir)/Modules/cjkcodecs/multibytecodec.h
+
+# multiprocessing module
+Modules/_multiprocessing/multiprocessing.o: $(srcdir)/Modules/_multiprocessing/multiprocessing.h
+Modules/_multiprocessing/semaphore.o: $(srcdir)/Modules/_multiprocessing/multiprocessing.h
+
+# multiprocessing module
+# multiprocessing.o semaphore.o: Modules/_multiprocessing/multiprocessing.h
+
 ############################################################################
 # Header files
 
diff -r 1267d64c14b3 Modules/zipimport.c
--- a/Modules/zipimport.c	Thu Nov 01 14:52:03 2012 +0200
+++ b/Modules/zipimport.c	Thu Nov 01 18:06:28 2012 +0200
@@ -876,6 +876,7 @@
         return NULL;
     }
 
+//     fprintf(stderr, "fseek end%ld\n", (long)(-22));
     if (fseek(fp, -22, SEEK_END) == -1) {
         fclose(fp);
         PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
@@ -904,18 +905,26 @@
         goto error;
 
     /* Start of Central Directory */
+    if (fseek(fp, header_offset, 0) == -1)
+        goto fseek_error;
     count = 0;
     for (;;) {
         PyObject *t;
         int err;
 
-        if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
-            goto fseek_error;
+/*        if (ftell(fp) != header_offset) {
+            fprintf(stderr, "aaa %ld %ld\n", (long)ftell(fp), (long)(header_offset));
+        }*/
+//         fprintf(stderr, "fseekheader_offset %ld\n", (long)(header_offset));
+//         if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
+//             goto fseek_error;
         l = PyMarshal_ReadLongFromFile(fp);
         if (l != 0x02014B50)
             break;              /* Bad: Central Dir File Header */
-        if (fseek(fp, header_offset + 8, 0) == -1)
-            goto fseek_error;
+//         fprintf(stderr, "fseek %ld\n", (long)(header_offset + 8));
+//         if (fseek(fp, header_offset + 8, 0) == -1)
+//             goto fseek_error;
+        /**/PyMarshal_ReadLongFromFile(fp);
         flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
         compress = PyMarshal_ReadShortFromFile(fp);
         time = PyMarshal_ReadShortFromFile(fp);
@@ -924,11 +933,14 @@
         data_size = PyMarshal_ReadLongFromFile(fp);
         file_size = PyMarshal_ReadLongFromFile(fp);
         name_size = PyMarshal_ReadShortFromFile(fp);
-        header_size = 46 + name_size +
+        header_size = /*46 + name_size +*/
            PyMarshal_ReadShortFromFile(fp) +
            PyMarshal_ReadShortFromFile(fp);
-        if (fseek(fp, header_offset + 42, 0) == -1)
-            goto fseek_error;
+        /**/PyMarshal_ReadLongFromFile(fp);
+        /**/PyMarshal_ReadLongFromFile(fp);
+//         fprintf(stderr, "fseek %ld\n", (long)(header_offset + 42));
+//         if (fseek(fp, header_offset + 42, 0) == -1)
+//             goto fseek_error;
         file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
         if (name_size > MAXPATHLEN)
             name_size = MAXPATHLEN;
@@ -941,7 +953,14 @@
             p++;
         }
         *p = 0;         /* Add terminating null byte */
-        header_offset += header_size;
+        header_offset += 46 + name_size + header_size;
+        while(header_size--) getc(fp);
+//         fprintf(stderr, "fseek +%ld\n", (long)(header_size));
+//         if (fseek(fp, header_size, SEEK_CUR) == -1)  /* Start of file header */
+//             goto fseek_error;
+//         if (ftell(fp) != header_offset) {
+//             fprintf(stderr, "bbb %ld %ld\n", (long)ftell(fp), (long)(header_offset));
+//         }
 
         bootstrap = 0;
         if (flags & 0x0800)
@@ -1064,6 +1083,7 @@
     }
 
     /* Check to make sure the local file header is correct */
+//     fprintf(stderr, "fseekfile_offset %ld\n", (long)(file_offset));
     if (fseek(fp, file_offset, 0) == -1) {
         fclose(fp);
         PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
@@ -1079,7 +1099,8 @@
         fclose(fp);
         return NULL;
     }
-    if (fseek(fp, file_offset + 26, 0) == -1) {
+//     fprintf(stderr, "fseek %ld\n", (long)(file_offset + 26));
+    if (fseek(fp, /*file_offset + 26*/22, SEEK_CUR) == -1) {
         fclose(fp);
         PyErr_Format(ZipImportError, "can't read Zip file: %R", archive);
         return NULL;
@@ -1100,6 +1121,7 @@
     }
     buf = PyBytes_AsString(raw_data);
 
+//     fprintf(stderr, "fseekfile_offset2 %ld\n", (long)(file_offset));
     err = fseek(fp, file_offset, 0);
     if (err == 0) {
         bytes_read = fread(buf, 1, data_size, fp);
diff -r 1267d64c14b3 Modules/zlibmodule.c
--- a/Modules/zlibmodule.c	Thu Nov 01 14:52:03 2012 +0200
+++ b/Modules/zlibmodule.c	Thu Nov 01 18:06:28 2012 +0200
@@ -579,6 +579,7 @@
     unsigned int inplen;
     Py_ssize_t old_length, length = DEFAULTALLOC;
     PyObject *RetVal = NULL;
+    PyObject *new_bytes;
     Py_buffer pinput;
     Byte *input;
     unsigned long start_total_out;
@@ -668,44 +669,65 @@
         Py_END_ALLOW_THREADS
     }
 
-    if(max_length) {
-        /* Not all of the compressed data could be accommodated in a buffer of
-           the specified size. Return the unconsumed tail in an attribute. */
+    if (err == Z_STREAM_END) {
+        /* The end of the compressed data has been reached, so set the
+           unused_data attribute to a string containing the remainder of the
+           data in the string.  Note that this is also a logical place to call
+           inflateEnd, but the old behaviour of only calling it on flush() is
+           preserved.
+        */
+        if (self->zst.avail_in) {
+//             size_t old_size = PyBytes_GET_SIZE(self->unused_data);
+//             if(self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+//                 PyErr_NoMemory();
+//                 Py_DECREF(RetVal);
+//                 RetVal = NULL;
+//                 goto error;
+//             }
+//             new_bytes = PyBytes_FromStringAndSize(NULL,
+//                 old_size + self->zst.avail_in);
+            new_bytes = PyBytes_FromStringAndSize((char *)self->zst.next_in, self->zst.avail_in);
+            if (new_bytes == NULL) {
+                Py_DECREF(RetVal);
+                RetVal = NULL;
+                goto error;
+            }
+//             Py_MEMCPY(PyBytes_AS_STRING(new_bytes),
+//                 PyBytes_AS_STRING(self->unused_data), old_size);
+//             Py_MEMCPY(PyBytes_AS_STRING(new_bytes) + old_size,
+//                 self->zst.next_in, self->zst.avail_in);
+            Py_DECREF(self->unused_data);
+            self->unused_data = new_bytes;
+        }
+        Py_INCREF(self->unused_data);
         Py_DECREF(self->unconsumed_tail);
-        self->unconsumed_tail = PyBytes_FromStringAndSize((char *)self->zst.next_in,
-                                                           self->zst.avail_in);
+        self->unconsumed_tail = self->unused_data;
+        self->eof = 1;
     }
-    else if (PyBytes_GET_SIZE(self->unconsumed_tail) > 0) {
-        /* All of the compressed data was consumed. Clear unconsumed_tail. */
-        Py_DECREF(self->unconsumed_tail);
-        self->unconsumed_tail = PyBytes_FromStringAndSize("", 0);
-    }
-    if (self->unconsumed_tail == NULL) {
-        Py_DECREF(RetVal);
-        RetVal = NULL;
-        goto error;
-    }
-
-    /* The end of the compressed data has been reached, so set the
-       unused_data attribute to a string containing the remainder of the
-       data in the string.  Note that this is also a logical place to call
-       inflateEnd, but the old behaviour of only calling it on flush() is
-       preserved.
-    */
-    if (err == Z_STREAM_END) {
-        Py_XDECREF(self->unused_data);  /* Free original empty string */
-        self->unused_data = PyBytes_FromStringAndSize(
-            (char *)self->zst.next_in, self->zst.avail_in);
-        if (self->unused_data == NULL) {
+    else if (max_length || PyBytes_GET_SIZE(self->unconsumed_tail)) {
+        if (max_length) {
+            /* Not all of the compressed data could be accommodated in a buffer of
+                the specified size. Return the unconsumed tail in an attribute. */
+            new_bytes = PyBytes_FromStringAndSize(
+                (char *)self->zst.next_in, self->zst.avail_in);
+        }
+        else {
+            /* All of the compressed data was consumed. Clear unconsumed_tail. */
+            new_bytes = PyBytes_FromStringAndSize("", 0);
+        }
+        if (new_bytes == NULL) {
             Py_DECREF(RetVal);
+            RetVal = NULL;
             goto error;
         }
-        self->eof = 1;
-        /* We will only get Z_BUF_ERROR if the output buffer was full
-           but there wasn't more output when we tried again, so it is
-           not an error condition.
-        */
-    } else if (err != Z_OK && err != Z_BUF_ERROR) {
+        Py_DECREF(self->unconsumed_tail);
+        self->unconsumed_tail = new_bytes;
+    }
+    /* We will only get Z_BUF_ERROR if the output buffer was full
+       but there wasn't more output when we tried again, so it is
+       not an error condition.
+    */
+    if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) {
         zlib_error(self->zst, err, "while decompressing data");
         Py_DECREF(RetVal);
         RetVal = NULL;
@@ -940,6 +962,7 @@
 {
     int err, length = DEFAULTALLOC;
     PyObject * retval = NULL;
+    PyObject * new_bytes;
     unsigned long start_total_out;
 
     if (!PyArg_ParseTuple(args, "|i:flush", &length))
@@ -979,6 +1002,50 @@
         Py_END_ALLOW_THREADS
     }
 
+    if (err == Z_STREAM_END) {
+        /* The end of the compressed data has been reached, so set the
+           unused_data attribute to a string containing the remainder of the
+           data in the string.  Note that this is also a logical place to call
+           inflateEnd, but the old behaviour of only calling it on flush() is
+           preserved.
+        */
+//         if (self->zst.avail_in) {
+//             size_t old_size = PyBytes_GET_SIZE(self->unused_data);
+//             if(self->zst.avail_in > PY_SSIZE_T_MAX - old_size) {
+//                 PyErr_NoMemory();
+//                 Py_DECREF(retval);
+//                 retval = NULL;
+//                 goto error;
+//             }
+//             new_bytes = PyBytes_FromStringAndSize(NULL,
+//                 old_size + self->zst.avail_in);
+//             if (new_bytes == NULL) {
+//                 Py_DECREF(retval);
+//                 retval = NULL;
+//                 goto error;
+//             }
+//             Py_MEMCPY(PyBytes_AS_STRING(new_bytes),
+//                 PyBytes_AS_STRING(self->unused_data), old_size);
+//             Py_MEMCPY(PyBytes_AS_STRING(new_bytes) + old_size,
+//                 self->zst.next_in, self->zst.avail_in);
+//             Py_DECREF(self->unused_data);
+//             self->unused_data = new_bytes;
+//         }
+        Py_INCREF(self->unused_data);
+        Py_DECREF(self->unconsumed_tail);
+        self->unconsumed_tail = self->unused_data;
+    } else if (PyBytes_GET_SIZE(self->unconsumed_tail)) {
+        /* All of the compressed data was consumed. Clear unconsumed_tail. */
+        new_bytes = PyBytes_FromStringAndSize("", 0);
+        if (new_bytes == NULL) {
+            Py_DECREF(retval);
+            retval = NULL;
+            goto error;
+        }
+        Py_DECREF(self->unconsumed_tail);
+        self->unconsumed_tail = new_bytes;
+    }
+
     /* If at end of stream, clean up any memory allocated by zlib. */
     if (err == Z_STREAM_END) {
         self->eof = 1;
@@ -991,6 +1058,7 @@
             goto error;
         }
     }
+
     if (_PyBytes_Resize(&retval, self->zst.total_out - start_total_out) < 0) {
         Py_DECREF(retval);
         retval = NULL;
diff -r 1267d64c14b3 Objects/longobject.c
--- a/Objects/longobject.c	Thu Nov 01 14:52:03 2012 +0200
+++ b/Objects/longobject.c	Thu Nov 01 18:06:28 2012 +0200
@@ -655,6 +655,26 @@
     return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
 }
 
+/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d <
+   2**k if d is nonzero, else 0. */
+
+static const unsigned char BitLengthTable[32] = {
+    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+};
+
+static int
+bits_in_digit(digit d)
+{
+    int d_bits = 0;
+    while (d >= 32) {
+        d_bits += 6;
+        d >>= 6;
+    }
+    d_bits += (int)BitLengthTable[d];
+    return d_bits;
+}
+
 size_t
 _PyLong_NumBits(PyObject *vv)
 {
@@ -667,16 +687,12 @@
     ndigits = ABS(Py_SIZE(v));
     assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0);
     if (ndigits > 0) {
-        digit msd = v->ob_digit[ndigits - 1];
-        if ((size_t)(ndigits - 1) > PY_SIZE_MAX / (size_t)PyLong_SHIFT)
+        if ((size_t)ndigits > PY_SIZE_MAX / (size_t)PyLong_SHIFT + 1)
             goto Overflow;
-        result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
-        do {
-            ++result;
-            if (result == 0)
-                goto Overflow;
-            msd >>= 1;
-        } while (msd);
+        result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT +
+                 bits_in_digit(v->ob_digit[ndigits - 1]);
+        if (ndigits > 1 && result < PyLong_SHIFT)
+            goto Overflow;
     }
     return result;
 
@@ -715,6 +731,34 @@
     if (is_signed)
         is_signed = *pendbyte >= 0x80;
 
+    if (n <= sizeof(long)) {
+        unsigned long value = 0;
+        int i = n;
+        if (little_endian) {
+            do {
+                value = (value << 8) | bytes[--i];
+            } while (i > 0);
+        }
+        else {
+            const unsigned char* p = bytes;
+            do {
+                value = (value << 8) | *p++;
+            } while (--i > 0);
+        }
+        if (!is_signed) {
+            if (value <= LONG_MAX)
+                return PyLong_FromLong((long)value);
+            return PyLong_FromUnsignedLong(value);
+        } else {
+            long svalue;
+            unsigned long signbit = 1uL << (8 * n - 1);
+            if ((value & signbit) == 0)
+                svalue = (long)value;
+            else
+                svalue = -1 - (long)(signbit - 1 - (value - signbit));
+            return PyLong_FromLong(svalue);
+        }
+    }
     /* Compute numsignificantbytes.  This consists of finding the most
        significant byte.  Leading 0 bytes are insignificant if the number
        is positive, and leading 0xff bytes if negative. */
@@ -808,9 +852,10 @@
     unsigned int accumbits;     /* # bits in accum */
     int do_twos_comp;           /* store 2's-comp?  is_signed and v < 0 */
     digit carry;                /* for computing 2's-comp */
-    size_t j;                   /* # bytes filled */
     unsigned char* p;           /* pointer to next byte in bytes */
     int pincr;                  /* direction to move p */
+    int k;
+    digit mask;
 
     assert(v != NULL && PyLong_Check(v));
 
@@ -822,10 +867,14 @@
             return -1;
         }
         do_twos_comp = 1;
+        mask = PyLong_MASK;
+        accum = 1;
     }
     else {
         ndigits = Py_SIZE(v);
         do_twos_comp = 0;
+        mask = 0;
+        accum = 0;
     }
 
     if (little_endian) {
@@ -836,28 +885,79 @@
         p = bytes + n - 1;
         pincr = -1;
     }
+    if (ndigits == 0)
+        goto Fill;
+
+#if 1
+    if (n <= sizeof(long)) {
+        unsigned long value;
+        if (n == 0)
+            goto Overflow;
+        if (is_signed) {
+            long svalue = PyLong_AsLong((PyObject *)v);
+            if (svalue == -1 && PyErr_Occurred())
+                goto Overflow;
+            if (svalue < 0) {
+                value = ~(unsigned long)(-1 - svalue);
+                if (value < (~0uL << (8 * n - 1)))
+                    goto Overflow;
+            }
+            else {
+                value = (unsigned long)svalue;
+                if ((value >> (8 * n - 1)) != 0)
+                    goto Overflow;
+            }
+        }
+        else {
+            value = PyLong_AsUnsignedLong((PyObject *)v);
+            if (value == (unsigned long)-1 && PyErr_Occurred())
+                goto Overflow;
+            if (n < sizeof(long) && (value >> (8 * n)) != 0)
+                goto Overflow;
+        }
+        if (little_endian) {
+            p = bytes;
+            do {
+                *p++ = (unsigned char)(value & 0xff);
+                value >>= 8;
+            } while (--n);
+        }
+        else {
+            p = bytes + n;
+            do {
+                *--p = (unsigned char)(value & 0xff);
+                value >>= 8;
+            } while (--n);
+        }
+        return 0;
+    }
+#endif
 
     /* Copy over all the Python digits.
        It's crucial that every Python digit except for the MSD contribute
        exactly PyLong_SHIFT bits to the total, so first assert that the long is
        normalized. */
     assert(ndigits == 0 || v->ob_digit[ndigits - 1] != 0);
-    j = 0;
-    accum = 0;
     accumbits = 0;
-    carry = do_twos_comp ? 1 : 0;
-    for (i = 0; i < ndigits; ++i) {
+    k = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT / 8;
+    if (n < k)
+        goto Overflow;
+    n -= k;
+    i = 0;
+    while (1) {
         digit thisdigit = v->ob_digit[i];
-        if (do_twos_comp) {
-            thisdigit = (thisdigit ^ PyLong_MASK) + carry;
-            carry = thisdigit >> PyLong_SHIFT;
-            thisdigit &= PyLong_MASK;
-        }
+//         if (do_twos_comp) {
+//             thisdigit = thisdigit ^ PyLong_MASK;
+//         }
+        thisdigit ^= mask;
         /* Because we're going LSB to MSB, thisdigit is more
            significant than what's already in accum, so needs to be
            prepended to accum. */
-        accum |= (twodigits)thisdigit << accumbits;
-
+        accum += (twodigits)thisdigit << accumbits;
+
+        if (++i == ndigits)
+            break;
+#if 0
         /* The most-significant digit may be (probably is) at least
            partly empty. */
         if (i == ndigits - 1) {
@@ -865,33 +965,72 @@
              * although for signed conversion we need later to
              * make sure at least one sign bit gets stored. */
             digit s = do_twos_comp ? thisdigit ^ PyLong_MASK : thisdigit;
-            while (s != 0) {
-                s >>= 1;
-                accumbits++;
+            accumbits += bits_in_digit(s);
+            k = accumbits >> 3;
+            if (n < k)
+                goto Overflow;
+            n -= k;
+        }
+        else
+#endif
+        {
+            accumbits += PyLong_SHIFT;
+            k = accumbits >> 3;
+        }
+
+        /* Store as many bytes as possible. */
+        accumbits &= 7;
+        if (little_endian) {
+            while (k--) {
+                *p++ = (unsigned char)(accum & 0xff);
+                accum >>= 8;
             }
         }
-        else
-            accumbits += PyLong_SHIFT;
-
-        /* Store as many bytes as possible. */
-        while (accumbits >= 8) {
-            if (j >= n)
-                goto Overflow;
-            ++j;
-            *p = (unsigned char)(accum & 0xff);
-            p += pincr;
-            accumbits -= 8;
-            accum >>= 8;
+        else {
+            while (k--) {
+                *p-- = (unsigned char)(accum & 0xff);
+                accum >>= 8;
+            }
         }
+//         while (k--) {
+//             *p = (unsigned char)(accum & 0xff);
+//             p += pincr;
+//             accum >>= 8;
+//         }
+    }
+
+//     accum &= ~(~(twodigits)0 << (accumbits + PyLong_SHIFT));
+    /* The most-significant digit may be (probably is) at least
+        partly empty. */
+    /* Count # of sign bits -- they needn't be stored,
+        * although for signed conversion we need later to
+        * make sure at least one sign bit gets stored. */
+//     digit s = do_twos_comp ? thisdigit ^ PyLong_MASK : thisdigit;
+    accumbits += bits_in_digit((accum >> accumbits) ^ mask);
+/*    if (is_signed) {
+        accumbits++;
+        accum |= mask << accumbits;
+    }*/
+    k = accumbits >> 3;
+    if (n < k)
+        goto Overflow;
+    n -= k;
+
+    /* Store as many bytes as possible. */
+    accumbits &= 7;
+    while (k--) {
+        *p = (unsigned char)(accum & 0xff);
+        p += pincr;
+        accum >>= 8;
     }
 
     /* Store the straggler (if any). */
     assert(accumbits < 8);
     assert(carry == 0);  /* else do_twos_comp and *every* digit was 0 */
     if (accumbits > 0) {
-        if (j >= n)
+        if (n == 0)
             goto Overflow;
-        ++j;
+        --n;
         if (do_twos_comp) {
             /* Fill leading bits of the byte with sign bits
                (appropriately pretending that the long had an
@@ -901,7 +1040,7 @@
         *p = (unsigned char)(accum & 0xff);
         p += pincr;
     }
-    else if (j == n && n > 0 && is_signed) {
+    else if (n == 0 && is_signed) {
         /* The main loop filled the byte array exactly, so the code
            just above didn't get to ensure there's a sign bit, and the
            loop below wouldn't add one either.  Make sure a sign bit
@@ -915,11 +1054,14 @@
             goto Overflow;
     }
 
+  Fill:
     /* Fill remaining bytes with copies of the sign bit. */
+    if (n > 0)
     {
         unsigned char signbyte = do_twos_comp ? 0xffU : 0U;
-        for ( ; j < n; ++j, p += pincr)
-            *p = signbyte;
+        if (!little_endian)
+            p -= n - 1;
+        memset(p, signbyte, n);
     }
 
     return 0;
@@ -1393,26 +1535,6 @@
             Py_RETURN_NOTIMPLEMENTED;                   \
     } while(0)
 
-/* bits_in_digit(d) returns the unique integer k such that 2**(k-1) <= d <
-   2**k if d is nonzero, else 0. */
-
-static const unsigned char BitLengthTable[32] = {
-    0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-};
-
-static int
-bits_in_digit(digit d)
-{
-    int d_bits = 0;
-    while (d >= 32) {
-        d_bits += 6;
-        d >>= 6;
-    }
-    d_bits += (int)BitLengthTable[d];
-    return d_bits;
-}
-
 /* x[0:m] and y[0:n] are digit vectors, LSD first, m >= n required.  x[0:n]
  * is modified in place, by adding y to it.  Carries are propagated as far as
  * x[m-1], and the remaining carry (0 or 1) is returned.
@@ -4546,8 +4668,8 @@
 long_bit_length(PyLongObject *v)
 {
     PyLongObject *result, *x, *y;
-    Py_ssize_t ndigits, msd_bits = 0;
-    digit msd;
+    Py_ssize_t ndigits;
+    int msd_bits;
 
     assert(v != NULL);
     assert(PyLong_Check(v));
@@ -4556,12 +4678,7 @@
     if (ndigits == 0)
         return PyLong_FromLong(0);
 
-    msd = v->ob_digit[ndigits-1];
-    while (msd >= 32) {
-        msd_bits += 6;
-        msd >>= 6;
-    }
-    msd_bits += (long)(BitLengthTable[msd]);
+    msd_bits = bits_in_digit(v->ob_digit[ndigits-1]);
 
     if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
         return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);
@@ -4615,50 +4732,54 @@
 #endif
 
 
+static int
+byteorder_converter(PyObject *arg, void *p)
+{
+    static PyObject *s_little = NULL;
+    static PyObject *s_big = NULL;
+
+    if (s_little == NULL) {
+        s_little = PyUnicode_InternFromString("little");
+        s_big = PyUnicode_InternFromString("big");
+    }
+    if (PyUnicode_Check(arg)) {
+        int little_endian;
+        if (PyUnicode_READY(arg) == -1)
+            return 0;
+        if (arg == s_little)
+            little_endian = 1;
+        else if (arg == s_big)
+            little_endian = 0;
+        else if (!PyUnicode_CompareWithASCIIString(arg, "little"))
+            little_endian = 1;
+        else if (!PyUnicode_CompareWithASCIIString(arg, "big"))
+            little_endian = 0;
+        else
+            goto error;
+        *(int *)p = little_endian;
+        return 1;
+    }
+error:
+    PyErr_SetString(PyExc_ValueError,
+        "byteorder must be either 'little' or 'big'");
+    return 0;
+}
+
 static PyObject *
 long_to_bytes(PyLongObject *v, PyObject *args, PyObject *kwds)
 {
-    PyObject *byteorder_str;
-    PyObject *is_signed_obj = NULL;
     Py_ssize_t length;
     int little_endian;
-    int is_signed;
+    int is_signed = 0;
     PyObject *bytes;
     static char *kwlist[] = {"length", "byteorder", "signed", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "nU|O:to_bytes", kwlist,
-                                     &length, &byteorder_str,
-                                     &is_signed_obj))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "nO&|$p:to_bytes", kwlist,
+                                     &length,
+                                     &byteorder_converter, &little_endian,
+                                     &is_signed))
         return NULL;
 
-    if (args != NULL && Py_SIZE(args) > 2) {
-        PyErr_SetString(PyExc_TypeError,
-            "'signed' is a keyword-only argument");
-        return NULL;
-    }
-
-    if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little"))
-        little_endian = 1;
-    else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big"))
-        little_endian = 0;
-    else {
-        PyErr_SetString(PyExc_ValueError,
-            "byteorder must be either 'little' or 'big'");
-        return NULL;
-    }
-
-    if (is_signed_obj != NULL) {
-        int cmp = PyObject_IsTrue(is_signed_obj);
-        if (cmp < 0)
-            return NULL;
-        is_signed = cmp ? 1 : 0;
-    }
-    else {
-        /* If the signed argument was omitted, use False as the
-           default. */
-        is_signed = 0;
-    }
-
     if (length < 0) {
         PyErr_SetString(PyExc_ValueError,
                         "length argument must be non-negative");
@@ -4700,48 +4821,19 @@
 static PyObject *
 long_from_bytes(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
-    PyObject *byteorder_str;
-    PyObject *is_signed_obj = NULL;
     int little_endian;
-    int is_signed;
+    int is_signed = 0;
     PyObject *obj;
     PyObject *bytes;
     PyObject *long_obj;
     static char *kwlist[] = {"bytes", "byteorder", "signed", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OU|O:from_bytes", kwlist,
-                                     &obj, &byteorder_str,
-                                     &is_signed_obj))
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|$p:from_bytes", kwlist,
+                                     &obj,
+                                     &byteorder_converter, &little_endian,
+                                     &is_signed))
         return NULL;
 
-    if (args != NULL && Py_SIZE(args) > 2) {
-        PyErr_SetString(PyExc_TypeError,
-            "'signed' is a keyword-only argument");
-        return NULL;
-    }
-
-    if (!PyUnicode_CompareWithASCIIString(byteorder_str, "little"))
-        little_endian = 1;
-    else if (!PyUnicode_CompareWithASCIIString(byteorder_str, "big"))
-        little_endian = 0;
-    else {
-        PyErr_SetString(PyExc_ValueError,
-            "byteorder must be either 'little' or 'big'");
-        return NULL;
-    }
-
-    if (is_signed_obj != NULL) {
-        int cmp = PyObject_IsTrue(is_signed_obj);
-        if (cmp < 0)
-            return NULL;
-        is_signed = cmp ? 1 : 0;
-    }
-    else {
-        /* If the signed argument was omitted, use False as the
-           default. */
-        is_signed = 0;
-    }
-
     bytes = PyObject_Bytes(obj);
     if (bytes == NULL)
         return NULL;