diff -r b6c372147db4 Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py	Thu May 15 20:50:30 2014 -0400
+++ b/Lib/test/test_codecs.py	Fri May 16 15:01:14 2014 +0300
@@ -2834,19 +2834,19 @@
             ('\u0141', 'strict', None),
             ('\u0141', 'ignore', b''),
             ('\u0141', 'replace', b'L'),
-            ('\udc98', 'surrogateescape', b'\x98'),
-            ('\udc98', 'surrogatepass', None),
+            ('\udc9d', 'surrogateescape', b'\x9d'),
+            ('\udc9d', 'surrogatepass', None),
         ))
         self.check_decode(1252, (
             (b'abc', 'strict', 'abc'),
             (b'\xe9\x80', 'strict', '\xe9\u20ac'),
             (b'\xff', 'strict', '\xff'),
             # invalid bytes
-            (b'[\x98]', 'strict', None),
-            (b'[\x98]', 'ignore', '[]'),
-            (b'[\x98]', 'replace', '[\ufffd]'),
-            (b'[\x98]', 'surrogateescape', '[\udc98]'),
-            (b'[\x98]', 'surrogatepass', None),
+            (b'[\x9d]', 'strict', None),
+            (b'[\x9d]', 'ignore', '[]'),
+            (b'[\x9d]', 'replace', '[\ufffd]'),
+            (b'[\x9d]', 'surrogateescape', '[\udc9d]'),
+            (b'[\x9d]', 'surrogatepass', None),
         ))
 
     def test_cp_utf7(self):
diff -r b6c372147db4 Python/codecs.c
--- a/Python/codecs.c	Thu May 15 20:50:30 2014 -0400
+++ b/Python/codecs.c	Fri May 16 15:01:14 2014 +0300
@@ -915,7 +915,7 @@
         Py_TOLOWER(encoding[1]) == 't' &&
         Py_TOLOWER(encoding[2]) == 'f') {
         encoding += 3;
-        if (*encoding == '-' || *encoding == '_' )
+        if (*encoding == '-' || *encoding == '_')
             encoding++;
         if (encoding[0] == '8' && encoding[1] == '\0') {
             *bytelength = 3;
@@ -931,7 +931,7 @@
                 return ENC_UTF16LE;
 #endif
             }
-            if (*encoding == '-' || *encoding == '_' )
+            if (*encoding == '-' || *encoding == '_')
                 encoding++;
             if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
                 if (Py_TOLOWER(encoding[0]) == 'b')
@@ -950,7 +950,7 @@
                 return ENC_UTF32LE;
 #endif
             }
-            if (*encoding == '-' || *encoding == '_' )
+            if (*encoding == '-' || *encoding == '_')
                 encoding++;
             if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') {
                 if (Py_TOLOWER(encoding[0]) == 'b')
@@ -960,6 +960,12 @@
             }
         }
     }
+    else if (Py_TOLOWER(encoding[0]) == 'c' &&
+             Py_TOLOWER(encoding[1]) == 'p' &&
+             strcmp(encoding + 2, "65001") == 0) {
+        *bytelength = 3;
+        return ENC_UTF8;
+    }
     return ENC_UNKNOWN;
 }