diff -r 5e98a50e0f55 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Thu May 15 14:37:42 2014 +0300 +++ b/Lib/test/test_codecs.py Thu May 15 17:57:10 2014 +0300 @@ -2834,19 +2834,19 @@ ('\u0141', 'strict', None), ('\u0141', 'ignore', b''), ('\u0141', 'replace', b'L'), - ('\udc98', 'surrogateescape', b'\x98'), - ('\udc98', 'surrogatepass', None), + ('\udc9d', 'surrogateescape', b'\x9d'), + ('\udc9d', 'surrogatepass', None), )) self.check_decode(1252, ( (b'abc', 'strict', 'abc'), (b'\xe9\x80', 'strict', '\xe9\u20ac'), (b'\xff', 'strict', '\xff'), # invalid bytes - (b'[\x98]', 'strict', None), - (b'[\x98]', 'ignore', '[]'), - (b'[\x98]', 'replace', '[\ufffd]'), - (b'[\x98]', 'surrogateescape', '[\udc98]'), - (b'[\x98]', 'surrogatepass', None), + (b'[\x9d]', 'strict', None), + (b'[\x9d]', 'ignore', '[]'), + (b'[\x9d]', 'replace', '[\ufffd]'), + (b'[\x9d]', 'surrogateescape', '[\udc9d]'), + (b'[\x9d]', 'surrogatepass', None), )) def test_cp_utf7(self): diff -r 5e98a50e0f55 Python/codecs.c --- a/Python/codecs.c Thu May 15 14:37:42 2014 +0300 +++ b/Python/codecs.c Thu May 15 17:57:10 2014 +0300 @@ -915,7 +915,7 @@ Py_TOLOWER(encoding[1]) == 't' && Py_TOLOWER(encoding[2]) == 'f') { encoding += 3; - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (encoding[0] == '8' && encoding[1] == '\0') { *bytelength = 3; @@ -931,7 +931,7 @@ return ENC_UTF16LE; #endif } - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { if (Py_TOLOWER(encoding[0]) == 'b') @@ -950,7 +950,7 @@ return ENC_UTF32LE; #endif } - if (*encoding == '-' || *encoding == '_' ) + if (*encoding == '-' || *encoding == '_') encoding++; if (Py_TOLOWER(encoding[1]) == 'e' && encoding[2] == '\0') { if (Py_TOLOWER(encoding[0]) == 'b') @@ -960,6 +960,23 @@ } } } + else if (Py_TOLOWER(encoding[0]) == 'c' && + Py_TOLOWER(encoding[1]) == 'p') { + encoding += 2; + if (*encoding == '-' || *encoding == '_') + encoding++; + if (Py_TOLOWER(encoding[0]) == 'u' && + Py_TOLOWER(encoding[1]) == 't' && + Py_TOLOWER(encoding[2]) == 'f') { + encoding += 3; + if (*encoding == '-' || *encoding == '_') + encoding++; + if (encoding[0] == '8' && encoding[1] == '\0') { + *bytelength = 3; + return ENC_UTF8; + } + } + } return ENC_UNKNOWN; }