diff -r 293180d199f2 Objects/stringlib/asciilib.h
--- a/Objects/stringlib/asciilib.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/asciilib.h	Thu Apr 19 23:28:23 2012 +0300
@@ -7,6 +7,7 @@
 #define STRINGLIB(F)             asciilib_##F
 #define STRINGLIB_OBJECT         PyUnicodeObject
 #define STRINGLIB_SIZEOF_CHAR    1
+#define STRINGLIB_MAX_CHAR       0x7Fu
 #define STRINGLIB_CHAR           Py_UCS1
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
diff -r 293180d199f2 Objects/stringlib/codecs.h
--- a/Objects/stringlib/codecs.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/codecs.h	Thu Apr 19 23:28:23 2012 +0300
@@ -350,4 +350,38 @@
 #undef MAX_SHORT_UNICHARS
 }
 
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf32_try_decode)(STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+                            const unsigned char **inptr,
+                            const unsigned char *e,
+                            int le)
+{
+        const unsigned char *q = *inptr;
+        STRINGLIB_CHAR *p = dest + *outpos;
+        Py_UCS4 ch;
+
+        if (le)
+            while (q < e) {
+                ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+                if (ch > STRINGLIB_MAX_CHAR)
+                    goto Overflow;
+                *p++ = ch;
+                q += 4;
+            }
+        else
+            while (q < e) {
+                ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3];
+                if (ch > STRINGLIB_MAX_CHAR)
+                    goto Overflow;
+                *p++ = ch;
+                q += 4;
+            }
+        *inptr = q;
+        *outpos = p - dest;
+        return 0;
+Overflow:
+        *inptr = q;
+        *outpos = p - dest;
+        return ch;
+}
 #endif /* STRINGLIB_IS_UNICODE */
diff -r 293180d199f2 Objects/stringlib/ucs1lib.h
--- a/Objects/stringlib/ucs1lib.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/ucs1lib.h	Thu Apr 19 23:28:23 2012 +0300
@@ -7,6 +7,7 @@
 #define STRINGLIB(F)             ucs1lib_##F
 #define STRINGLIB_OBJECT         PyUnicodeObject
 #define STRINGLIB_SIZEOF_CHAR    1
+#define STRINGLIB_MAX_CHAR       0xFFu
 #define STRINGLIB_CHAR           Py_UCS1
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
diff -r 293180d199f2 Objects/stringlib/ucs2lib.h
--- a/Objects/stringlib/ucs2lib.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/ucs2lib.h	Thu Apr 19 23:28:23 2012 +0300
@@ -7,6 +7,7 @@
 #define STRINGLIB(F)             ucs2lib_##F
 #define STRINGLIB_OBJECT         PyUnicodeObject
 #define STRINGLIB_SIZEOF_CHAR    2
+#define STRINGLIB_MAX_CHAR       0xFFFFu
 #define STRINGLIB_CHAR           Py_UCS2
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
diff -r 293180d199f2 Objects/stringlib/ucs4lib.h
--- a/Objects/stringlib/ucs4lib.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/ucs4lib.h	Thu Apr 19 23:28:23 2012 +0300
@@ -7,6 +7,7 @@
 #define STRINGLIB(F)             ucs4lib_##F
 #define STRINGLIB_OBJECT         PyUnicodeObject
 #define STRINGLIB_SIZEOF_CHAR    4
+#define STRINGLIB_MAX_CHAR       0x10FFFFu
 #define STRINGLIB_CHAR           Py_UCS4
 #define STRINGLIB_TYPE_NAME      "unicode"
 #define STRINGLIB_PARSE_CODE     "U"
diff -r 293180d199f2 Objects/stringlib/undef.h
--- a/Objects/stringlib/undef.h	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/stringlib/undef.h	Thu Apr 19 23:28:23 2012 +0300
@@ -1,6 +1,7 @@
 #undef  FASTSEARCH
 #undef  STRINGLIB
 #undef  STRINGLIB_SIZEOF_CHAR
+#undef  STRINGLIB_MAX_CHAR
 #undef  STRINGLIB_CHAR
 #undef  STRINGLIB_STR
 #undef  STRINGLIB_LEN
diff -r 293180d199f2 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Thu Apr 19 18:21:04 2012 +0200
+++ b/Objects/unicodeobject.c	Thu Apr 19 23:28:23 2012 +0300
@@ -4555,6 +4555,10 @@
     return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
 }
 
+#include "stringlib/asciilib.h"
+#include "stringlib/codecs.h"
+#include "stringlib/undef.h"
+
 #include "stringlib/ucs1lib.h"
 #include "stringlib/codecs.h"
 #include "stringlib/undef.h"
@@ -5150,14 +5154,8 @@
     Py_ssize_t outpos;
     PyObject *unicode;
     const unsigned char *q, *e;
-    int bo = 0;       /* assume native ordering by default */
+    int le, bo = 0;       /* assume native ordering by default */
     const char *errmsg = "";
-    /* Offsets from q for retrieving bytes in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int iorder[] = {0, 1, 2, 3};
-#else
-    int iorder[] = {3, 2, 1, 0};
-#endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
@@ -5173,85 +5171,86 @@
        stream as-is (giving a ZWNBSP character). */
     if (bo == 0) {
         if (size >= 4) {
-            const Py_UCS4 bom = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-                (q[iorder[1]] << 8) | q[iorder[0]];
+            Py_UCS4 bom = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
+            if (bom == 0x0000FEFF) {
+                bo = -1;
+                q += 4;
+            }
+            else if (bom == 0xFFFE0000) {
+                bo = 1;
+                q += 4;
+            }
+        }
+    }
+
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0x0000FEFF) {
+    le = bo <= 0;
+#else
+    le = bo < 0;
+#endif
+
+    /* This might be one to much, because of a BOM */
+    unicode = PyUnicode_New((e - q + 3) / 4, 127);
+    if (!unicode)
+        return NULL;
+    outpos = 0;
+
+    while (1) {
+        Py_UCS4 ch = 0;
+        if (e - q > 3) {
+            const unsigned char *e2 = e - 3;
+            int kind = PyUnicode_KIND(unicode);
+            switch (kind) {
+            case PyUnicode_1BYTE_KIND:
+                if (PyUnicode_IS_ASCII(unicode))
+                    ch = asciilib_utf32_try_decode(
+                            PyUnicode_1BYTE_DATA(unicode),
+                            &outpos, &q, e2, le);
+                else
+                    ch = ucs1lib_utf32_try_decode(
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            &q, e2, le);
+                break;
+            case PyUnicode_2BYTE_KIND:
+                ch = ucs2lib_utf32_try_decode(
+                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        &q, e2, le);
+                break;
+            case PyUnicode_4BYTE_KIND:
+                ch = ucs4lib_utf32_try_decode(
+                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        &q, e2, le);
+                break;
+            default:
+                assert(0);
+            }
+        }
+        if (ch) {
+            if (ch < 0x110000) {
+                if (unicode_putchar(&unicode, &outpos, ch) < 0)
+                    goto onError;
                 q += 4;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = 1;
-            }
-#else
-            if (bom == 0x0000FEFF) {
-                q += 4;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE0000) {
-                q += 4;
-                bo = -1;
-            }
-#endif
-        }
-    }
-
-    if (bo == -1) {
-        /* force LE */
-        iorder[0] = 0;
-        iorder[1] = 1;
-        iorder[2] = 2;
-        iorder[3] = 3;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        iorder[0] = 3;
-        iorder[1] = 2;
-        iorder[2] = 1;
-        iorder[3] = 0;
-    }
-
-    /* This might be one to much, because of a BOM */
-    unicode = PyUnicode_New((size+3)/4, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
-
-    while (q < e) {
-        Py_UCS4 ch;
-        /* remaining bytes at the end? (size should be divisible by 4) */
-        if (e-q<4) {
-            if (consumed)
+                continue;
+            }
+            errmsg = "codepoint not in range(0x110000)";
+            startinpos = ((const char *)q)-starts;
+            endinpos = startinpos+4;
+        }
+        else {
+            /* remaining bytes at the end? (size should be divisible by 4) */
+            if (q == e || consumed)
                 break;
             errmsg = "truncated data";
             startinpos = ((const char *)q)-starts;
             endinpos = ((const char *)e)-starts;
-            goto utf32Error;
             /* The remaining input chars are ignored if the callback
                chooses to skip the input */
         }
-        ch = (q[iorder[3]] << 24) | (q[iorder[2]] << 16) |
-            (q[iorder[1]] << 8) | q[iorder[0]];
-
-        if (ch >= 0x110000)
-        {
-            errmsg = "codepoint not in range(0x110000)";
-            startinpos = ((const char *)q)-starts;
-            endinpos = startinpos+4;
-            goto utf32Error;
-        }
-        if (unicode_putchar(&unicode, &outpos, ch) < 0)
-            goto onError;
-        q += 4;
-        continue;
-      utf32Error:
         if (unicode_decode_call_errorhandler(
                 errors, &errorHandler,
                 "utf32", errmsg,
-                &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
+                &starts, (const char **)&e, &startinpos, &endinpos,
+                &exc, (const char **)&q,
                 &unicode, &outpos))
             goto onError;
     }