diff -r b6c372147db4 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Thu May 15 20:50:30 2014 -0400 +++ b/Lib/test/test_codecs.py Fri May 16 15:01:14 2014 +0300 @@ -2834,19 +2834,19 @@ ('\u0141', 'strict', None), ('\u0141', 'ignore', b''), ('\u0141', 'replace', b'L'), - ('\udc98', 'surrogateescape', b'\x98'), - ('\udc98', 'surrogatepass', None), + ('\udc9d', 'surrogateescape', b'\x9d'), + ('\udc9d', 'surrogatepass', None), )) self.check_decode(1252, ( (b'abc', 'strict', 'abc'), (b'\xe9\x80', 'strict', '\xe9\u20ac'), (b'\xff', 'strict', '\xff'), # invalid bytes - (b'[\x98]', 'strict', None), - (b'[\x98]', 'ignore', '[]'), - (b'[\x98]', 'replace', '[\ufffd]'), - (b'[\x98]', 'surrogateescape', '[\udc98]'), - (b'[\x98]', 'surrogatepass', None), + (b'[\x9d]', 'strict', None), + (b'[\x9d]', 'ignore', '[]'), + (b'[\x9d]', 'replace', '[\ufffd]'), + (b'[\x9d]', 'surrogateescape', '[\udc9d]'), + (b'[\x9d]', 'surrogatepass', None), )) def test_cp_utf7(self): diff -r b6c372147db4 Python/codecs.c --- a/Python/codecs.c Thu May 15 20:50:30 2014 -0400 +++ b/Python/codecs.c Fri May 16 15:01:14 2014 +0300 @@ -915,7 +915,7 @@ Py_TOLOWER(encoding[1]) == 't' && Py_TOLOWER(encoding[2]) == 'f') { encoding += 3; - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (encoding[0] == '8' && encoding[1] == '\0') { *bytelength = 3; @@ -931,7 +931,7 @@ return ENC_UTF16LE; #endif } - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { if (Py_TOLOWER(encoding[0]) == 'b') @@ -950,7 +950,7 @@ return ENC_UTF32LE; #endif } - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { if (Py_TOLOWER(encoding[0]) == 'b') @@ -960,6 +960,12 @@ } } } + else if (Py_TOLOWER(encoding[0]) == 'c' && + Py_TOLOWER(encoding[1]) == 'p' && + strcmp(encoding + 2, "65001") == 0) { + *bytelength = 3; + return ENC_UTF8; + } return ENC_UNKNOWN; }