diff -r f23d0a4278aa Objects/stringlib/codecs.h
--- a/Objects/stringlib/codecs.h	Fri May 15 12:55:20 2015 -0400
+++ b/Objects/stringlib/codecs.h	Sat May 16 15:51:28 2015 +0300
@@ -18,12 +18,15 @@
 Py_LOCAL_INLINE(Py_UCS4)
 STRINGLIB(utf8_decode)(const char **inptr, const char *end,
                        STRINGLIB_CHAR *dest,
-                       Py_ssize_t *outpos)
+                       Py_ssize_t *outpos,
+                       const char *errors)
 {
     Py_UCS4 ch;
     const char *s = *inptr;
     const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
     STRINGLIB_CHAR *p = dest + *outpos;
+    int len; /* the lenght of invalid bytes sequence */
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
 
     while (s < end) {
         ch = (unsigned char)*s;
@@ -153,7 +156,10 @@ STRINGLIB(utf8_decode)(const char **inpt
                    not valid UTF-8 so they are rejected.
                    See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
                    (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
-                goto InvalidContinuation1;
+                if (errorType == _Py_CODEC_ERROR_UNKNOWN)
+                    errorType = detect_standard_errorhandler(errors);
+                if (errorType != _Py_CODEC_ERROR_SURROGATEPASS)
+                    goto InvalidContinuation1;
             }
             if (!IS_CONTINUATION_BYTE(ch3)) {
                 /* invalid continuation byte */
@@ -228,24 +234,49 @@ STRINGLIB(utf8_decode)(const char **inpt
             continue;
         }
         goto InvalidStart;
+
+    InvalidStart:
+        len = 0;
+        goto Invalid;
+    InvalidContinuation1:
+        len = 1;
+        goto Invalid;
+    InvalidContinuation2:
+        len = 2;
+        goto Invalid;
+    InvalidContinuation3:
+        len = 3;
+        goto Invalid;
+    Invalid:
+        if (errorType == _Py_CODEC_ERROR_UNKNOWN)
+            errorType = detect_standard_errorhandler(errors);
+        if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE) {
+#if STRINGLIB_MAX_CHAR < 0xDCFF
+            assert ((ch >= 0x80) && (ch <= 0xFF));
+            ch += 0xDC00;
+            s++;
+            /* Out-of-range */
+            goto Return;
+#else
+            while (1) {
+                assert ((ch >= 0x80) && (ch <= 0xFF));
+                *p++ = ch + 0xDC00;
+                s++;
+                if (--len <= 0)
+                    break;
+                ch = (unsigned char)*s;
+            }
+            continue;
+#endif
+        }
+        ch = len;
+        goto Return;
     }
-    ch = 0;
+    ch = (Py_UCS4)-1;
 Return:
     *inptr = s;
     *outpos = p - dest;
     return ch;
-InvalidStart:
-    ch = 1;
-    goto Return;
-InvalidContinuation1:
-    ch = 2;
-    goto Return;
-InvalidContinuation2:
-    ch = 3;
-    goto Return;
-InvalidContinuation3:
-    ch = 4;
-    goto Return;
 }
 
 #undef ASCII_CHAR_MASK
@@ -271,6 +302,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicod
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     PyObject *rep = NULL;
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
 #endif
 #if STRINGLIB_SIZEOF_CHAR == 1
     const Py_ssize_t max_char_size = 2;
@@ -328,6 +360,22 @@ STRINGLIB(utf8_encoder)(PyObject *unicod
         else if (Py_UNICODE_IS_SURROGATE(ch)) {
             Py_ssize_t newpos;
             Py_ssize_t repsize, k, startpos;
+            if (errorType == _Py_CODEC_ERROR_UNKNOWN)
+                errorType = detect_standard_errorhandler(errors);
+            if (errorType == _Py_CODEC_ERROR_SURROGATEPASS)
+                goto surrogatepass;
+            if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE) {
+                while (ch >= 0xdc80 && ch <= 0xdcff) {
+                    *p++ = (char)(ch & 0xff);
+                    if (i >= size)
+                        break;
+                    ch = data[i++];
+                }
+                if (i >= size)
+                    break;
+                continue;
+            }
+
             startpos = i-1;
             rep = unicode_encode_call_errorhandler(
                   errors, &errorHandler, "utf-8", "surrogates not allowed",
@@ -398,6 +446,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicod
         if (ch < 0x10000)
 #endif
         {
+ surrogatepass:
             *p++ = (char)(0xe0 | (ch >> 12));
             *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
             *p++ = (char)(0x80 | (ch & 0x3f));
@@ -478,7 +527,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicod
 Py_LOCAL_INLINE(Py_UCS4)
 STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
                         STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
-                        int native_ordering)
+                        int native_ordering,
+                        const char *errors)
 {
     Py_UCS4 ch;
     const unsigned char *aligned_end =
@@ -491,6 +541,8 @@ STRINGLIB(utf16_decode)(const unsigned c
 #else
     int ihi = !native_ordering, ilo = !!native_ordering;
 #endif
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
+    int err; /* error code */
     --e;
 
     while (q < e) {
@@ -573,22 +625,40 @@ STRINGLIB(utf16_decode)(const unsigned c
         goto Return;
 #else
         *p++ = (STRINGLIB_CHAR)ch;
+        continue;
 #endif
+
+UnexpectedEnd:
+        err = 1;
+        goto Invalid;
+IllegalEncoding:
+        err = 2;
+        goto Invalid;
+IllegalSurrogate:
+        err = 3;
+        goto Invalid;
+Invalid:
+        if (errorType == _Py_CODEC_ERROR_UNKNOWN)
+            errorType = detect_standard_errorhandler(errors);
+        if (errorType == _Py_CODEC_ERROR_SURROGATEPASS) {
+            if (err == 3) /* illegal surrogate */
+                q -= 2;
+#if STRINGLIB_SIZEOF_CHAR < 2
+            if (ch > STRINGLIB_MAX_CHAR)
+                /* Out-of-range */
+                goto Return;
+#endif
+            *p++ = (STRINGLIB_CHAR)ch;
+            continue;
+        }
+        ch = err;
+        goto Return;
     }
     ch = 0;
 Return:
     *inptr = q;
     *outpos = p - dest;
     return ch;
-UnexpectedEnd:
-    ch = 1;
-    goto Return;
-IllegalEncoding:
-    ch = 2;
-    goto Return;
-IllegalSurrogate:
-    ch = 3;
-    goto Return;
 }
 #undef UCS2_REPEAT_MASK
 #undef FAST_CHAR_MASK
diff -r f23d0a4278aa Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri May 15 12:55:20 2015 -0400
+++ b/Objects/unicodeobject.c	Sat May 16 15:51:28 2015 +0300
@@ -3928,6 +3928,23 @@ PyUnicode_GetDefaultEncoding(void)
     return "utf-8";
 }
 
+#define _Py_CODEC_ERROR_UNKNOWN 0
+#define _Py_CODEC_ERROR_SURROGATEPASS 1
+#define _Py_CODEC_ERROR_SURROGATEESCAPE 2
+#define _Py_CODEC_ERROR_OTHER -1
+
+static int
+detect_standard_errorhandler(const char *errors)
+{
+    if (errors == NULL)
+        return _Py_CODEC_ERROR_OTHER;  /* strict */
+    if (strcmp(errors, "surrogatepass") == 0)
+        return _Py_CODEC_ERROR_SURROGATEPASS;
+    if (strcmp(errors, "surrogateescape") == 0)
+        return _Py_CODEC_ERROR_SURROGATEESCAPE;
+    return _Py_CODEC_ERROR_OTHER;
+}
+
 /* create or adjust a UnicodeDecodeError */
 static void
 make_decode_exception(PyObject **exceptionObject,
@@ -4684,6 +4701,7 @@ PyUnicode_DecodeUTF8Stateful(const char 
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
 
     if (size == 0) {
         if (consumed)
@@ -4710,35 +4728,35 @@ PyUnicode_DecodeUTF8Stateful(const char 
         int kind = writer.kind;
         if (kind == PyUnicode_1BYTE_KIND) {
             if (PyUnicode_IS_ASCII(writer.buffer))
-                ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos);
+                ch = asciilib_utf8_decode(&s, end, writer.data, &writer.pos, errors);
             else
-                ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos);
+                ch = ucs1lib_utf8_decode(&s, end, writer.data, &writer.pos, errors);
         } else if (kind == PyUnicode_2BYTE_KIND) {
-            ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos);
+            ch = ucs2lib_utf8_decode(&s, end, writer.data, &writer.pos, errors);
         } else {
             assert(kind == PyUnicode_4BYTE_KIND);
-            ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos);
+            ch = ucs4lib_utf8_decode(&s, end, writer.data, &writer.pos, errors);
         }
 
         switch (ch) {
-        case 0:
+        case (Py_UCS4)-1:
             if (s == end || consumed)
                 goto End;
             errmsg = "unexpected end of data";
             startinpos = s - starts;
             endinpos = end - starts;
             break;
-        case 1:
+        case 0:
             errmsg = "invalid start byte";
             startinpos = s - starts;
             endinpos = startinpos + 1;
             break;
+        case 1:
         case 2:
         case 3:
-        case 4:
             errmsg = "invalid continuation byte";
             startinpos = s - starts;
-            endinpos = startinpos + ch - 1;
+            endinpos = startinpos + ch;
             break;
         default:
             if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
@@ -4918,6 +4936,7 @@ PyUnicode_DecodeUTF32Stateful(const char
     const char *errmsg = "";
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
 
     q = (unsigned char *)s;
     e = q + size;
@@ -4998,6 +5017,14 @@ PyUnicode_DecodeUTF32Stateful(const char
         }
 
         if (Py_UNICODE_IS_SURROGATE(ch)) {
+            if (errorType == _Py_CODEC_ERROR_UNKNOWN)
+                errorType = detect_standard_errorhandler(errors);
+            if (errorType == _Py_CODEC_ERROR_SURROGATEPASS) {
+                if (_PyUnicodeWriter_WriteCharInline(&writer, ch) < 0)
+                    goto onError;
+                q += 4;
+                continue;
+            }
             errmsg = "code point in surrogate code point range(0xd800, 0xe000)";
             startinpos = ((const char *)q) - starts;
             endinpos = startinpos + 4;
@@ -5066,6 +5093,8 @@ PyObject *
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
     PyObject *rep = NULL;
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
+    int surrogatepass = 0;
 
     if (!PyUnicode_Check(str)) {
         PyErr_BadArgument();
@@ -5298,20 +5327,20 @@ PyUnicode_DecodeUTF16Stateful(const char
                 if (PyUnicode_IS_ASCII(writer.buffer))
                     ch = asciilib_utf16_decode(&q, e,
                             (Py_UCS1*)writer.data, &writer.pos,
-                            native_ordering);
+                            native_ordering, errors);
                 else
                     ch = ucs1lib_utf16_decode(&q, e,
                             (Py_UCS1*)writer.data, &writer.pos,
-                            native_ordering);
+                            native_ordering, errors);
             } else if (kind == PyUnicode_2BYTE_KIND) {
                 ch = ucs2lib_utf16_decode(&q, e,
                         (Py_UCS2*)writer.data, &writer.pos,
-                        native_ordering);
+                        native_ordering, errors);
             } else {
                 assert(kind == PyUnicode_4BYTE_KIND);
                 ch = ucs4lib_utf16_decode(&q, e,
                         (Py_UCS4*)writer.data, &writer.pos,
-                        native_ordering);
+                        native_ordering, errors);
             }
         }
 
@@ -6407,6 +6436,7 @@ unicode_encode_ucs1(PyObject *unicode,
     /* the following variable is used for caching string comparisons
      * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
     int known_errorHandler = -1;
+    int surrogateescape = detect_standard_errorhandler(errors) == _Py_CODEC_ERROR_SURROGATEESCAPE;
 
     if (PyUnicode_READY(unicode) == -1)
         return NULL;
@@ -6437,9 +6467,24 @@ unicode_encode_ucs1(PyObject *unicode,
             PyObject *repunicode;
             Py_ssize_t repsize, newpos, respos, i;
             /* startpos for collecting unencodable chars */
-            Py_ssize_t collstart = pos;
-            Py_ssize_t collend = pos;
+            Py_ssize_t collstart;
+            Py_ssize_t collend;
+
+            if (surrogateescape) {
+                while (c >= 0xdc80 && c <= 0xdcff) {
+                    *str++ = (char)c;
+                    ++pos;
+                    if (pos >= size)
+                        break;
+                    c = PyUnicode_READ(kind, data, pos);
+                }
+                if (pos >= size)
+                    break;
+                /* fallback to general error handling */
+            }
+
             /* find all unecodable characters */
+            collstart = collend = pos;
             while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit))
                 ++collend;
             /* cache callback name lookup (if not done yet, i.e. it's the first error) */
@@ -6658,6 +6703,7 @@ PyUnicode_DecodeASCII(const char *s,
     const char *e;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    int errorType = _Py_CODEC_ERROR_UNKNOWN;
 
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
@@ -6688,6 +6734,23 @@ PyUnicode_DecodeASCII(const char *s,
             ++s;
         }
         else {
+            if (errorType == _Py_CODEC_ERROR_UNKNOWN) {
+                errorType = detect_standard_errorhandler(errors);
+                if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE &&
+                    kind < PyUnicode_2BYTE_KIND) {
+                    if (_PyUnicodeWriter_Prepare(&writer, size - writer.pos, 0xffff) < 0)
+                        return NULL;
+                    kind = writer.kind;
+                    data = writer.data;
+                }
+            }
+            if (errorType == _Py_CODEC_ERROR_SURROGATEESCAPE) {
+                PyUnicode_WRITE(kind, data, writer.pos, c + 0xdc00);
+                writer.pos++;
+                ++s;
+                continue;
+            }
+
             startinpos = s-starts;
             endinpos = startinpos + 1;
             if (unicode_decode_call_errorhandler_writer(