Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c	(révision 68360)
+++ Objects/unicodeobject.c	(copie de travail)
@@ -2001,6 +2001,13 @@
     return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
 }
 
+#define LONG_PTR_MASK (size_t) (SIZEOF_LONG - 1)
+#if (SIZEOF_LONG == 8)
+#define ASCII_CHAR_MASK 0x8080808080808080L
+#else
+#define ASCII_CHAR_MASK 0x80808080L
+#endif
+
 PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
 			                Py_ssize_t size,
 			                const char *errors,
@@ -2011,7 +2018,7 @@
     Py_ssize_t startinpos;
     Py_ssize_t endinpos;
     Py_ssize_t outpos;
-    const char *e;
+    const char *e, *aligned_end;
     PyUnicodeObject *unicode;
     Py_UNICODE *p;
     const char *errmsg = "";
@@ -2032,11 +2039,43 @@
     /* Unpack UTF-8 encoded data */
     p = unicode->str;
     e = s + size;
+    aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
 
     while (s < e) {
         Py_UCS4 ch = (unsigned char)*s;
 
         if (ch < 0x80) {
+            /* Fast path for runs of ASCII characters. */
+            if (!((size_t) s & LONG_PTR_MASK)) {
+                /* Help register allocation */
+                register const char *_s = s;
+                register Py_UNICODE *_p = p;
+                while (_s < aligned_end) {
+                    long word = *(long *) _s;
+                    if (word & ASCII_CHAR_MASK)
+                        break;
+                    _p[0] = (unsigned char) _s[0];
+                    _p[1] = (unsigned char) _s[1];
+                    _p[2] = (unsigned char) _s[2];
+                    _p[3] = (unsigned char) _s[3];
+#if (SIZEOF_LONG == 8)
+                    _p[4] = (unsigned char) _s[4];
+                    _p[5] = (unsigned char) _s[5];
+                    _p[6] = (unsigned char) _s[6];
+                    _p[7] = (unsigned char) _s[7];
+#endif
+                    _s += SIZEOF_LONG;
+                    _p += SIZEOF_LONG;
+                }
+                s = _s;
+                p = _p;
+                if (s == e)
+                    break;
+                ch = (unsigned char)*s;
+            }
+        }
+
+        if (ch < 0x80) {
             *p++ = (Py_UNICODE)ch;
             s++;
             continue;
@@ -2169,6 +2208,7 @@
 	     &starts, &e, &startinpos, &endinpos, &exc, &s,
 	     &unicode, &outpos, &p))
 	goto onError;
+	aligned_end = (const char *) ((size_t) e & ~LONG_PTR_MASK);
     }
     if (consumed)
 	*consumed = s-starts;
@@ -2188,6 +2228,9 @@
     return NULL;
 }
 
+#undef ASCII_CHAR_MASK
+
+
 /* Allocation strategy:  if the string is short, convert into a stack buffer
    and allocate exactly as much space needed at the end.  Else allocate the
    maximum possible needed (4 result bytes per Unicode character), and return
@@ -2582,6 +2625,14 @@
     return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
 }
 
+#if (SIZEOF_LONG == 8)
+#define FAST_CHAR_MASK         0x8000800080008000L
+#define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
+#else
+#define FAST_CHAR_MASK         0x80008000L
+#define SWAPPED_FAST_CHAR_MASK 0x00800080L
+#endif
+
 PyObject *
 PyUnicode_DecodeUTF16Stateful(const char *s,
 			      Py_ssize_t size,
@@ -2595,8 +2646,9 @@
     Py_ssize_t outpos;
     PyUnicodeObject *unicode;
     Py_UNICODE *p;
-    const unsigned char *q, *e;
+    const unsigned char *q, *e, *aligned_end;
     int bo = 0;       /* assume native ordering by default */
+    int native_ordering = 0;
     const char *errmsg = "";
     /* Offsets from q for retrieving byte pairs in the right order. */
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
@@ -2618,7 +2670,7 @@
     /* Unpack UTF-16 encoded data */
     p = unicode->str;
     q = (unsigned char *)s;
-    e = q + size;
+    e = q + size - 1;
 
     if (byteorder)
         bo = *byteorder;
@@ -2662,20 +2714,67 @@
         ihi = 0;
         ilo = 1;
     }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+    native_ordering = ilo < ihi;
+#else
+    native_ordering = ilo > ihi;
+#endif
 
+    aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
     while (q < e) {
 	Py_UNICODE ch;
-	/* remaining bytes at the end? (size should be even) */
-	if (e-q<2) {
-	    if (consumed)
-		break;
-	    errmsg = "truncated data";
-	    startinpos = ((const char *)q)-starts;
-	    endinpos = ((const char *)e)-starts;
-	    goto utf16Error;
-	    /* The remaining input chars are ignored if the callback
-	       chooses to skip the input */
-	}
+        if (!((size_t) q & LONG_PTR_MASK)) {
+            /* Fast path for runs of non-surrogate chars */
+            register const unsigned char *_q = q;
+            Py_UNICODE *_p = p;
+            if (native_ordering) {
+                while (_q < aligned_end) {
+                    long word = * (long *) _q;
+                    if (word & FAST_CHAR_MASK)
+                        break;
+                    _p[0] = ((unsigned short *) _q)[0];
+                    _p[1] = ((unsigned short *) _q)[1];
+#if (SIZEOF_LONG == 8)
+                    _p[2] = ((unsigned short *) _q)[2];
+                    _p[3] = ((unsigned short *) _q)[3];
+#endif
+                    _q += SIZEOF_LONG;
+                    _p += SIZEOF_LONG / 2;
+                }
+            }
+            else {
+                while (_q < aligned_end) {
+                    long word = * (long *) _q;
+                    if (word & SWAPPED_FAST_CHAR_MASK)
+                        break;
+                    /* Zero upper bits in UCS-4 builds */
+#if (Py_UNICODE_SIZE > 2)
+                    _p[0] = 0;
+                    _p[1] = 0;
+#if (SIZEOF_LONG == 8)
+                    _p[2] = 0;
+                    _p[3] = 0;
+#endif
+#endif
+                    ((unsigned char *) _p)[1] = _q[0];
+                    ((unsigned char *) _p)[0] = _q[1];
+                    ((unsigned char *) _p)[1 + Py_UNICODE_SIZE] = _q[2];
+                    ((unsigned char *) _p)[0 + Py_UNICODE_SIZE] = _q[3];
+#if (SIZEOF_LONG == 8)
+                    ((unsigned char *) _p)[1 + 2 * Py_UNICODE_SIZE] = _q[4];
+                    ((unsigned char *) _p)[0 + 2 * Py_UNICODE_SIZE] = _q[5];
+                    ((unsigned char *) _p)[1 + 3 * Py_UNICODE_SIZE] = _q[6];
+                    ((unsigned char *) _p)[0 + 3 * Py_UNICODE_SIZE] = _q[7];
+#endif
+                    _q += SIZEOF_LONG;
+                    _p += SIZEOF_LONG / 2;
+                }
+            }
+            p = _p;
+            q = _q;
+            if (q >= e)
+                break;
+        }
 	ch = (q[ihi] << 8) | q[ilo];
 
 	q += 2;
@@ -2686,10 +2785,10 @@
 	}
 
 	/* UTF-16 code pair: */
-	if (q >= e) {
+	if (q > e) {
 	    errmsg = "unexpected end of data";
 	    startinpos = (((const char *)q)-2)-starts;
-	    endinpos = ((const char *)e)-starts;
+	    endinpos = ((const char *)e)+1-starts;
 	    goto utf16Error;
 	}
 	if (0xD800 <= ch && ch <= 0xDBFF) {
@@ -2726,6 +2825,23 @@
 	         &unicode, &outpos, &p))
 	    goto onError;
     }
+    /* remaining byte at the end? (size should be even) */
+    if (e == q) {
+        if (!consumed) {
+            errmsg = "truncated data";
+            startinpos = ((const char *)q)-starts;
+            endinpos = ((const char *)e)+1-starts;
+            outpos = p-PyUnicode_AS_UNICODE(unicode);
+            if (unicode_decode_call_errorhandler(
+                     errors, &errorHandler,
+                     "utf16", errmsg,
+                     &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
+                     &unicode, &outpos, &p))
+                goto onError;
+            /* The remaining input chars are ignored if the callback
+               chooses to skip the input */
+        }
+    }
 
     if (byteorder)
         *byteorder = bo;
@@ -2748,6 +2864,9 @@
     return NULL;
 }
 
+#undef FAST_CHAR_MASK
+#undef SWAPPED_FAST_CHAR_MASK
+
 PyObject *
 PyUnicode_EncodeUTF16(const Py_UNICODE *s,
 		      Py_ssize_t size,
@@ -3571,6 +3690,7 @@
 {
     PyUnicodeObject *v;
     Py_UNICODE *p;
+    const char *e, *unrolled_end;
 
     /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
     if (size == 1) {
@@ -3584,8 +3704,18 @@
     if (size == 0)
 	return (PyObject *)v;
     p = PyUnicode_AS_UNICODE(v);
-    while (size-- > 0)
-	*p++ = (unsigned char)*s++;
+    e = s + size;
+    unrolled_end = e - 3;
+    while (s < unrolled_end) {
+        p[0] = (unsigned char) s[0];
+        p[1] = (unsigned char) s[1];
+        p[2] = (unsigned char) s[2];
+        p[3] = (unsigned char) s[3];
+        s += 4;
+        p += 4;
+    }
+    while (s < e)
+        *p++ = (unsigned char) *s++;
     return (PyObject *)v;
 
  onError: