diff -r 4a02212064ce Doc/library/codecs.rst
--- a/Doc/library/codecs.rst	Mon Sep 02 03:23:21 2013 -0700
+++ b/Doc/library/codecs.rst	Mon Sep 02 19:50:59 2013 +0300
@@ -346,14 +346,16 @@
 |                         | in :pep:`383`.                                |
 +-------------------------+-----------------------------------------------+
 
-In addition, the following error handlers are specific to a single codec:
+In addition, the following error handlers are specific to Unicode encoding
+schemes:
 
-+-------------------+---------+-------------------------------------------+
-| Value             | Codec   | Meaning                                   |
-+===================+=========+===========================================+
-|``'surrogatepass'``| utf-8   | Allow encoding and decoding of surrogate  |
-|                   |         | codes in UTF-8.                           |
-+-------------------+---------+-------------------------------------------+
++-------------------+------------------------+-------------------------------------------+
+| Value             | Codec                  | Meaning                                   |
++===================+========================+===========================================+
+|``'surrogatepass'``| utf-8, utf-16, utf-32, | Allow encoding and decoding of surrogate  |
+|                   | utf-16-be, utf-16-le,  | codes in all the Unicode encoding schemes.|
+|                   | utf-32-be, utf-32-le   |                                           |
++-------------------+------------------------+-------------------------------------------+
 
 .. versionadded:: 3.1
    The ``'surrogateescape'`` and ``'surrogatepass'`` error handlers.
@@ -1146,6 +1148,14 @@
 | utf_8_sig       |                                | all languages                  |
 +-----------------+--------------------------------+--------------------------------+
 
+.. versionchanged:: 3.4
+    ``utf_16``, ``utf_16_be``, ``utf_16_le``, ``utf_32``, ``utf_32_be`` and
+    ``utf_32_le`` encoders no longer allow surrogate code points
+    (U+D800-U+DFFF) to be encoded anymore. ``utf_32``, ``utf_32_be`` and
+    ``utf_32_le`` encoders no longer decode byte sequences that correspond to
+    surrogate code points.
+
+
 Python Specific Encodings
 -------------------------
 
diff -r 4a02212064ce Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py	Mon Sep 02 03:23:21 2013 -0700
+++ b/Lib/test/test_codecs.py	Mon Sep 02 19:50:59 2013 +0300
@@ -299,8 +299,46 @@
         self.assertEqual(reader.readline(), s5)
         self.assertEqual(reader.readline(), "")
 
+    ill_formed_sequence_replace = "\ufffd"
+
+    def test_lone_surrogates(self):
+        self.assertRaises(UnicodeEncodeError, "\ud800".encode, self.encoding)
+        self.assertEqual("[\uDC80]".encode(self.encoding, "backslashreplace"),
+                         "[\\udc80]".encode(self.encoding))
+        self.assertEqual("[\uDC80]".encode(self.encoding, "xmlcharrefreplace"),
+                         "[&#56448;]".encode(self.encoding))
+        self.assertEqual("[\uDC80]".encode(self.encoding, "ignore"),
+                         "[]".encode(self.encoding))
+        self.assertEqual("[\uDC80]".encode(self.encoding, "replace"),
+                         "[?]".encode(self.encoding))
+
+        bom = "".encode(self.encoding)
+        for before, after in [("\U00010fff", "A"), ("[", "]"),
+                              ("A", "\U00010fff")]:
+            before_sequence = before.encode(self.encoding)[len(bom):]
+            after_sequence = after.encode(self.encoding)[len(bom):]
+            test_string = before + "\uDC80" + after
+            test_sequence = (bom + before_sequence +
+                             self.ill_formed_sequence + after_sequence)
+            self.assertRaises(UnicodeDecodeError, test_sequence.decode,
+                              self.encoding)
+            self.assertEqual(test_string.encode(self.encoding,
+                                                "surrogatepass"),
+                             test_sequence)
+            self.assertEqual(test_sequence.decode(self.encoding,
+                                                  "surrogatepass"),
+                             test_string)
+            self.assertEqual(test_sequence.decode(self.encoding, "ignore"),
+                             before + after)
+            self.assertEqual(test_sequence.decode(self.encoding, "replace"),
+                             before + self.ill_formed_sequence_replace + after)
+
 class UTF32Test(ReadTest, unittest.TestCase):
     encoding = "utf-32"
+    if sys.byteorder == 'little':
+        ill_formed_sequence = b"\x80\xdc\x00\x00"
+    else:
+        ill_formed_sequence = b"\x00\x00\xdc\x80"
 
     spamle = (b'\xff\xfe\x00\x00'
               b's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
@@ -392,6 +430,7 @@
 
 class UTF32LETest(ReadTest, unittest.TestCase):
     encoding = "utf-32-le"
+    ill_formed_sequence = b"\x80\xdc\x00\x00"
 
     def test_partial(self):
         self.check_partial(
@@ -436,6 +475,7 @@
 
 class UTF32BETest(ReadTest, unittest.TestCase):
     encoding = "utf-32-be"
+    ill_formed_sequence = b"\x00\x00\xdc\x80"
 
     def test_partial(self):
         self.check_partial(
@@ -481,6 +521,10 @@
 
 class UTF16Test(ReadTest, unittest.TestCase):
     encoding = "utf-16"
+    if sys.byteorder == 'little':
+        ill_formed_sequence = b"\x80\xdc"
+    else:
+        ill_formed_sequence = b"\xdc\x80"
 
     spamle = b'\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
     spambe = b'\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
@@ -561,6 +605,7 @@
 
 class UTF16LETest(ReadTest, unittest.TestCase):
     encoding = "utf-16-le"
+    ill_formed_sequence = b"\x80\xdc"
 
     def test_partial(self):
         self.check_partial(
@@ -604,6 +649,7 @@
 
 class UTF16BETest(ReadTest, unittest.TestCase):
     encoding = "utf-16-be"
+    ill_formed_sequence = b"\xdc\x80"
 
     def test_partial(self):
         self.check_partial(
@@ -647,6 +693,8 @@
 
 class UTF8Test(ReadTest, unittest.TestCase):
     encoding = "utf-8"
+    ill_formed_sequence = b"\xed\xb2\x80"
+    ill_formed_sequence_replace = "\ufffd" * 3
 
     def test_partial(self):
         self.check_partial(
@@ -676,18 +724,11 @@
                                          u, u.encode(self.encoding))
 
     def test_lone_surrogates(self):
-        self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
-        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
-        self.assertEqual("[\uDC80]".encode("utf-8", "backslashreplace"),
-                         b'[\\udc80]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "xmlcharrefreplace"),
-                         b'[&#56448;]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "surrogateescape"),
+        super().test_lone_surrogates()
+        # not sure if this is making sense for
+        # UTF-16 and UTF-32
+        self.assertEqual("[\uDC80]".encode('utf-8', "surrogateescape"),
                          b'[\x80]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "ignore"),
-                         b'[]')
-        self.assertEqual("[\uDC80]".encode("utf-8", "replace"),
-                         b'[?]')
 
     def test_surrogatepass_handler(self):
         self.assertEqual("abc\ud800def".encode("utf-8", "surrogatepass"),
@@ -820,6 +861,8 @@
             ]
         )
 
+    test_lone_surrogates = None
+
 class UTF16ExTest(unittest.TestCase):
 
     def test_errors(self):
@@ -846,6 +889,8 @@
 
 class UTF8SigTest(ReadTest, unittest.TestCase):
     encoding = "utf-8-sig"
+    ill_formed_sequence = b"\xed\xb2\x80"
+    ill_formed_sequence_replace = "\ufffd" * 3
 
     def test_partial(self):
         self.check_partial(
diff -r 4a02212064ce Objects/stringlib/codecs.h
--- a/Objects/stringlib/codecs.h	Mon Sep 02 03:23:21 2013 -0700
+++ b/Objects/stringlib/codecs.h	Mon Sep 02 19:50:59 2013 +0300
@@ -596,66 +596,106 @@
 #undef SWAB
 
 
-Py_LOCAL_INLINE(void)
-STRINGLIB(utf16_encode)(unsigned short *out,
-                        const STRINGLIB_CHAR *in,
+#if STRINGLIB_MAX_CHAR >= 0x80
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
                         Py_ssize_t len,
+                        unsigned short **outptr,
                         int native_ordering)
 {
+    unsigned short *out = *outptr;
     const STRINGLIB_CHAR *end = in + len;
 #if STRINGLIB_SIZEOF_CHAR == 1
 # define SWAB2(CH)  ((CH) << 8)
 #else
 # define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
 #endif
+    if (native_ordering) {
 #if STRINGLIB_MAX_CHAR < 0x10000
-    if (native_ordering) {
-# if STRINGLIB_SIZEOF_CHAR == 2
-        Py_MEMCPY(out, in, 2 * len);
-# else
-        _PyUnicode_CONVERT_BYTES(STRINGLIB_CHAR, unsigned short, in, end, out);
-# endif
-    } else {
         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
         while (in < unrolled_end) {
+# if STRINGLIB_MAX_CHAR >= 0xd800
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+# endif
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+#endif
+        while (in < end) {
+            Py_UCS4 ch;
+            ch = *in++;
+#if STRINGLIB_MAX_CHAR >= 0xd800
+            if (ch < 0xd800)
+                *out++ = ch;
+            else if (ch < 0xe000)
+                goto fail;
+# if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
+                out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+                out[1] = Py_UNICODE_LOW_SURROGATE(ch);
+                out += 2;
+            }
+# endif
+            else
+#endif
+                *out++ = ch;
+        }
+    } else {
+#if STRINGLIB_MAX_CHAR < 0x10000
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+# if STRINGLIB_MAX_CHAR >= 0xd800
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+# endif
             out[0] = SWAB2(in[0]);
             out[1] = SWAB2(in[1]);
             out[2] = SWAB2(in[2]);
             out[3] = SWAB2(in[3]);
             in += 4; out += 4;
         }
-        while (in < end) {
-            *out++ = SWAB2(*in);
-            ++in;
-        }
-    }
-#else
-    if (native_ordering) {
+#endif
         while (in < end) {
             Py_UCS4 ch = *in++;
-            if (ch < 0x10000)
-                *out++ = ch;
-            else {
-                out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
-                out[1] = Py_UNICODE_LOW_SURROGATE(ch);
-                out += 2;
-            }
-        }
-    } else {
-        while (in < end) {
-            Py_UCS4 ch = *in++;
-            if (ch < 0x10000)
+#if STRINGLIB_MAX_CHAR >= 0xd800
+            if (ch < 0xd800)
                 *out++ = SWAB2((Py_UCS2)ch);
-            else {
+            else if (ch < 0xe000)
+                goto fail;
+# if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
                 Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
                 Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
                 out[0] = SWAB2(ch1);
                 out[1] = SWAB2(ch2);
                 out += 2;
             }
+# endif
+            else
+#endif
+                *out++ = SWAB2((Py_UCS2)ch);
         }
     }
+    *outptr = out;
+    return len;
+#if STRINGLIB_MAX_CHAR >= 0xd800
+  fail:
 #endif
+    *outptr = out;
+    return len - (end - in + 1);
+}
+#endif
+
 #undef SWAB2
-}
+
 #endif /* STRINGLIB_IS_UNICODE */
diff -r 4a02212064ce Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Mon Sep 02 03:23:21 2013 -0700
+++ b/Objects/unicodeobject.c	Mon Sep 02 19:50:59 2013 +0300
@@ -4943,6 +4943,7 @@
     _PyUnicodeWriter writer;
     const unsigned char *q, *e;
     int le, bo = 0;       /* assume native ordering by default */
+    const char *encoding;
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
@@ -4982,6 +4983,7 @@
 #else
     le = bo <= 0;
 #endif
+    encoding = le ? "utf-32-le" : "utf-32-be";
 
     _PyUnicodeWriter_Init(&writer);
     writer.min_length = (e - q + 3) / 4;
@@ -5002,6 +5004,8 @@
                     ch = (q[3] << 24) | (q[2] << 16) | (q[1] << 8) | q[0];
                     if (ch > maxch)
                         break;
+                    if (kind != PyUnicode_1BYTE_KIND && Py_UNICODE_IS_SURROGATE(ch))
+                        break;
                     PyUnicode_WRITE(kind, data, pos++, ch);
                     q += 4;
                 } while (q <= last);
@@ -5011,6 +5015,8 @@
                     ch = (q[0] << 24) | (q[1] << 16) | (q[2] << 8) | q[3];
                     if (ch > maxch)
                         break;
+                    if (kind != PyUnicode_1BYTE_KIND && Py_UNICODE_IS_SURROGATE(ch))
+                        break;
                     PyUnicode_WRITE(kind, data, pos++, ch);
                     q += 4;
                 } while (q <= last);
@@ -5018,7 +5024,12 @@
             writer.pos = pos;
         }
 
-        if (ch <= maxch) {
+        if (Py_UNICODE_IS_SURROGATE(ch)) {
+            errmsg = "codepoint in surrogate code point range(0xd800, 0xe000)";
+            startinpos = ((const char *)q) - starts;
+            endinpos = startinpos + 4;
+        }
+        else if (ch <= maxch) {
             if (q == e || consumed)
                 break;
             /* remaining bytes at the end? (size should be divisible by 4) */
@@ -5042,7 +5053,7 @@
            chooses to skip the input */
         if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
-                "utf32", errmsg,
+                encoding, errmsg,
                 &starts, (const char **)&e, &startinpos, &endinpos, &exc, (const char **)&q,
                 &writer))
             goto onError;
@@ -5079,6 +5090,14 @@
 #else
     int iorder[] = {3, 2, 1, 0};
 #endif
+    const char *encoding;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
+
+#define SWAB4(CH, tmp)  (tmp = (CH), \
+            tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
+            ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
 
 #define STORECHAR(CH)                           \
     do {                                        \
@@ -5118,6 +5137,7 @@
         iorder[1] = 1;
         iorder[2] = 2;
         iorder[3] = 3;
+        encoding = "utf-32-le";
     }
     else if (byteorder == 1) {
         /* force BE */
@@ -5125,13 +5145,102 @@
         iorder[1] = 2;
         iorder[2] = 1;
         iorder[3] = 0;
-    }
-
-    for (i = 0; i < len; i++)
-        STORECHAR(PyUnicode_READ(kind, data, i));
-
+        encoding = "utf-32-be";
+    }
+    else
+        encoding = "utf-32";
+
+    if (kind == PyUnicode_1BYTE_KIND) {
+        for (i = 0; i < len; i++)
+            STORECHAR(PyUnicode_READ(kind, data, i));
+        goto done;
+    }
+
+    for (i = 0; i < len;) {
+        Py_ssize_t repsize, moreunits;
+        Py_UCS4 ch = PyUnicode_READ(kind, data, i++);
+        assert(ch <= MAX_UNICODE);
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            STORECHAR(ch);
+            continue;
+        }
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, i-1, i, &i);
+
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 3) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 4;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, i - 1, i,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* four bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - 4 * moreunits + 4) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 4 * (moreunits - 1)) < 0)
+                goto error;
+            p = (unsigned char*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(p, PyBytes_AS_STRING(rep), repsize);
+            p += repsize;
+        } else /* rep is unicode */ {
+            const Py_UCS1 *repdata;
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            repdata = PyUnicode_1BYTE_DATA(rep);
+            while (repsize--) {
+                Py_UCS4 ch = *repdata++;
+                STORECHAR(ch);
+            }
+        }
+
+        Py_CLEAR(rep);
+    }
+
+    /* Cut back to size actually needed. This is necessary for, for example,
+       encoding of a string containing isolated surrogates and the 'ignore'
+       handler is used. */
+    nsize = p - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
   done:
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
 #undef STORECHAR
 }
 
@@ -5184,6 +5293,7 @@
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    const char *encoding;
 
     q = (unsigned char *)s;
     e = q + size;
@@ -5217,8 +5327,10 @@
 
 #if PY_LITTLE_ENDIAN
     native_ordering = bo <= 0;
+    encoding = bo <= 0 ? "utf-16-le" : "utf-16-be";
 #else
     native_ordering = bo >= 0;
+    encoding = bo >= 0 ? "utf-16-be" : "utf-16-le";
 #endif
 
     /* Note: size will always be longer than the resulting Unicode
@@ -5292,7 +5404,7 @@
         if (unicode_decode_call_errorhandler_writer(
                 errors,
                 &errorHandler,
-                "utf16", errmsg,
+                encoding, errmsg,
                 &starts,
                 (const char **)&e,
                 &startinpos,
@@ -5328,13 +5440,17 @@
     Py_ssize_t len;
     PyObject *v;
     unsigned short *out;
-    Py_ssize_t bytesize;
     Py_ssize_t pairs;
 #if PY_BIG_ENDIAN
     int native_ordering = byteorder >= 0;
 #else
     int native_ordering = byteorder <= 0;
 #endif
+    const char *encoding;
+    Py_ssize_t nsize, pos;
+    PyObject *errorHandler = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
 
     if (!PyUnicode_Check(str)) {
         PyErr_BadArgument();
@@ -5356,8 +5472,8 @@
     }
     if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0))
         return PyErr_NoMemory();
-    bytesize = (len + pairs + (byteorder == 0)) * 2;
-    v = PyBytes_FromStringAndSize(NULL, bytesize);
+    nsize = len + pairs + (byteorder == 0);
+    v = PyBytes_FromStringAndSize(NULL, nsize * 2);
     if (v == NULL)
         return NULL;
 
@@ -5369,25 +5485,106 @@
     if (len == 0)
         goto done;
 
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: {
-        ucs1lib_utf16_encode(out, (const Py_UCS1 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_2BYTE_KIND: {
-        ucs2lib_utf16_encode(out, (const Py_UCS2 *)data, len, native_ordering);
-        break;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        ucs4lib_utf16_encode(out, (const Py_UCS4 *)data, len, native_ordering);
-        break;
-    }
-    default:
-        assert(0);
-    }
-
+    if (kind == PyUnicode_1BYTE_KIND) {
+        ucs1lib_utf16_encode((const Py_UCS1 *)data, len, &out, native_ordering);
+        goto done;
+    }
+
+    if (byteorder < 0)
+        encoding = "utf-16-le";
+    else if (byteorder > 0)
+        encoding = "utf-16-be";
+    else
+        encoding = "utf-16";
+
+    pos = 0;
+    while (pos < len) {
+        Py_ssize_t repsize, moreunits;
+
+        if (kind == PyUnicode_2BYTE_KIND) {
+            pos += ucs2lib_utf16_encode((const Py_UCS2 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        else {
+            assert(kind == PyUnicode_4BYTE_KIND);
+            pos += ucs4lib_utf16_encode((const Py_UCS4 *)data + pos, len - pos,
+                                        &out, native_ordering);
+        }
+        if (pos == len)
+            break;
+
+        rep = unicode_encode_call_errorhandler(
+                errors, &errorHandler,
+                encoding, "surrogates not allowed",
+                str, &exc, pos, pos + 1, &pos);
+        if (!rep)
+            goto error;
+
+        if (PyBytes_Check(rep)) {
+            repsize = PyBytes_GET_SIZE(rep);
+            if (repsize & 1) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+            moreunits = repsize / 2;
+        }
+        else {
+            assert(PyUnicode_Check(rep));
+            if (PyUnicode_READY(rep) < 0)
+                goto error;
+            moreunits = repsize = PyUnicode_GET_LENGTH(rep);
+            if (!PyUnicode_IS_ASCII(rep)) {
+                raise_encode_exception(&exc, encoding,
+                                       str, pos - 1, pos,
+                                       "surrogates not allowed");
+                goto error;
+            }
+        }
+
+        /* two bytes are reserved for each surrogate */
+        if (moreunits > 1) {
+            Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v);
+            if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - 2 * moreunits + 2) {
+                /* integer overflow */
+                PyErr_NoMemory();
+                goto error;
+            }
+            if (_PyBytes_Resize(&v, PyBytes_GET_SIZE(v) + 2 * (moreunits - 1)) < 0)
+                goto error;
+            out = (unsigned short*) PyBytes_AS_STRING(v) + outpos;
+        }
+
+        if (PyBytes_Check(rep)) {
+            Py_MEMCPY(out, PyBytes_AS_STRING(rep), repsize);
+            out += moreunits;
+        } else /* rep is unicode */ {
+            assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
+            ucs1lib_utf16_encode(PyUnicode_1BYTE_DATA(rep), repsize,
+                                 &out, native_ordering);
+        }
+
+        Py_CLEAR(rep);
+    }
+
+    /* Cut back to size actually needed. This is necessary for, for example,
+    encoding of a string containing isolated surrogates and the 'ignore' handler
+    is used. */
+    nsize = (unsigned char*) out - (unsigned char*) PyBytes_AS_STRING(v);
+    if (nsize != PyBytes_GET_SIZE(v))
+      _PyBytes_Resize(&v, nsize);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
   done:
     return v;
+  error:
+    Py_XDECREF(rep);
+    Py_XDECREF(errorHandler);
+    Py_XDECREF(exc);
+    Py_XDECREF(v);
+    return NULL;
+#undef STORECHAR
 }
 
 PyObject *
diff -r 4a02212064ce Python/codecs.c
--- a/Python/codecs.c	Mon Sep 02 03:23:21 2013 -0700
+++ b/Python/codecs.c	Mon Sep 02 19:50:59 2013 +0300
@@ -740,24 +740,67 @@
 {
     PyObject *restuple;
     PyObject *object;
+    PyObject *encode;
+    char *encoding;
+    int bytelength = 3;
+    int le = 1;
     Py_ssize_t i;
     Py_ssize_t start;
     Py_ssize_t end;
     PyObject *res;
     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
-        char *outp;
+        unsigned char *outp;
         if (PyUnicodeEncodeError_GetStart(exc, &start))
             return NULL;
         if (PyUnicodeEncodeError_GetEnd(exc, &end))
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        res = PyBytes_FromStringAndSize(NULL, 3*(end-start));
+        if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        if (!(encoding = PyUnicode_AsUTF8(encode))) {
+            Py_DECREF(object);
+            Py_DECREF(encode);
+            return NULL;
+        }
+
+        if (strcmp(encoding, "utf-8") == 0) {
+            /*no need to check others*/
+        }
+        else if (strcmp(encoding, "utf-16") == 0) {
+            bytelength = 2;
+#ifdef WORDS_BIGENDIAN
+            le = 0;
+#endif
+        }
+        else if (strcmp(encoding, "utf-16-le") == 0)
+            bytelength = 2;
+        else if (strcmp(encoding, "utf-16-be") == 0) {
+            bytelength = 2;
+            le = 0;
+        }
+        else if (strcmp(encoding, "utf-32") == 0) {
+            bytelength = 4;
+#ifdef WORDS_BIGENDIAN
+            le = 0;
+#endif
+        }
+        else if (strcmp(encoding, "utf-32-le") == 0)
+            bytelength = 4;
+        else if (strcmp(encoding, "utf-32-be") == 0) {
+            bytelength = 4;
+            le = 0;
+        }
+        Py_DECREF(encode);
+
+        res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start));
         if (!res) {
             Py_DECREF(object);
             return NULL;
         }
-        outp = PyBytes_AsString(res);
+        outp = (unsigned char*)PyBytes_AsString(res);
         for (i = start; i < end; i++) {
             /* object is guaranteed to be "ready" */
             Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
@@ -768,9 +811,37 @@
                 Py_DECREF(object);
                 return NULL;
             }
-            *outp++ = (char)(0xe0 | (ch >> 12));
-            *outp++ = (char)(0x80 | ((ch >> 6) & 0x3f));
-            *outp++ = (char)(0x80 | (ch & 0x3f));
+            switch (bytelength) {
+            case 3:
+                *outp++ = (unsigned char)(0xe0 | (ch >> 12));
+                *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f));
+                *outp++ = (unsigned char)(0x80 | (ch & 0x3f));
+                break;
+            case 2:
+                if (le) {
+                    *outp++ = (unsigned char) ch;
+                    *outp++ = (unsigned char)(ch >> 8);
+                }
+                else {
+                    *outp++ = (unsigned char)(ch >> 8);
+                    *outp++ = (unsigned char) ch;
+                }
+                break;
+            case 4:
+                if (le) {
+                    *outp++ = (unsigned char) ch;
+                    *outp++ = (unsigned char)(ch >> 8);
+                    *outp++ = (unsigned char)(ch >> 16);
+                    *outp++ = (unsigned char)(ch >> 24);
+                }
+                else {
+                    *outp++ = (unsigned char)(ch >> 24);
+                    *outp++ = (unsigned char)(ch >> 16);
+                    *outp++ = (unsigned char)(ch >> 8);
+                    *outp++ = (unsigned char) ch;
+                }
+                break;
+            }
         }
         restuple = Py_BuildValue("(On)", res, end);
         Py_DECREF(res);
@@ -782,34 +853,79 @@
         Py_UCS4 ch = 0;
         if (PyUnicodeDecodeError_GetStart(exc, &start))
             return NULL;
+        if (PyUnicodeDecodeError_GetEnd(exc, &end))
+            return NULL;
         if (!(object = PyUnicodeDecodeError_GetObject(exc)))
             return NULL;
         if (!(p = (unsigned char*)PyBytes_AsString(object))) {
             Py_DECREF(object);
             return NULL;
         }
+        if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) {
+            Py_DECREF(object);
+            return NULL;
+        }
+        if (!(encoding = PyUnicode_AsUTF8(encode))) {
+            Py_DECREF(object);
+            Py_DECREF(encode);
+            return NULL;
+        }
+
+        if (strcmp(encoding, "utf-8") == 0) {
+            /*no need to check others*/
+        }
+        else if (strcmp(encoding, "utf-16-le") == 0)
+            bytelength = 2;
+        else if (strcmp(encoding, "utf-16-be") == 0) {
+            bytelength = 2;
+            le = 0;
+        }
+        else if (strcmp(encoding, "utf-32-le") == 0)
+            bytelength = 4;
+        else if (strcmp(encoding, "utf-32-be") == 0) {
+            bytelength = 4;
+            le = 0;
+        }
+        Py_DECREF(encode);
+
         /* Try decoding a single surrogate character. If
            there are more, let the codec call us again. */
         p += start;
-        if (PyBytes_GET_SIZE(object) - start >= 3 &&
-            (p[0] & 0xf0) == 0xe0 &&
-            (p[1] & 0xc0) == 0x80 &&
-            (p[2] & 0xc0) == 0x80) {
-            /* it's a three-byte code */
-            ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
-            if (!Py_UNICODE_IS_SURROGATE(ch))
-                /* it's not a surrogate - fail */
-                ch = 0;
+        if (PyBytes_GET_SIZE(object) - start >= bytelength) {
+            switch (bytelength) {
+            case 3:
+                if ((p[0] & 0xf0) == 0xe0 &&
+                    (p[1] & 0xc0) == 0x80 &&
+                    (p[2] & 0xc0) == 0x80) {
+                    /* it's a three-byte code */
+                    ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
+                }
+                break;
+            case 2:
+                if (le)
+                    ch = p[1] << 8 | p[0];
+                else
+                    ch = p[0] << 8 | p[1];
+                break;
+            case 4:
+                if (le)
+                    ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0];
+                else
+                    ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
+                break;
+            }
         }
+
         Py_DECREF(object);
-        if (ch == 0) {
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            /* it's not a surrogate - fail */
             PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
             return NULL;
         }
         res = PyUnicode_FromOrdinal(ch);
         if (res == NULL)
             return NULL;
-        return Py_BuildValue("(Nn)", res, start+3);
+        return Py_BuildValue("(Nn)", res, start + bytelength);
     }
     else {
         wrong_exception_type(exc);