diff -r 8e67d5dc069d Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Fri Oct 26 17:05:55 2012 -0700 +++ b/Lib/test/test_codecs.py Sat Oct 27 12:58:41 2012 +0300 @@ -666,8 +666,10 @@ self.assertEqual(b"\xf0\x90\xbf\xbf\xed\xa0\x80".decode("utf-8", "surrogatepass"), "\U00010fff\uD800") self.assertTrue(codecs.lookup_error("surrogatepass")) - with self.assertRaises(UnicodeDecodeError): - b"abc\xed\xa0".decode("utf-8", "surrogatepass") + self.assertRaises(UnicodeDecodeError, b"abc\xed\xa0".decode, + "utf-8", "surrogatepass") + self.assertRaises(UnicodeDecodeError, b"abc\xed\xa0z".decode, + "utf-8", "surrogatepass") @unittest.skipUnless(sys.platform == 'win32', 'cp65001 is a Windows-only codec') diff -r 8e67d5dc069d Python/codecs.c --- a/Python/codecs.c Fri Oct 26 17:05:55 2012 -0700 +++ b/Python/codecs.c Sat Oct 27 12:58:41 2012 +0300 @@ -791,10 +791,10 @@ /* Try decoding a single surrogate character. If there are more, let the codec call us again. */ p += start; - if (strlen(p) > 2 && - ((p[0] & 0xf0) == 0xe0 || - (p[1] & 0xc0) == 0x80 || - (p[2] & 0xc0) == 0x80)) { + if (PyBytes_GET_SIZE(object) - start >= 3 && + (p[0] & 0xf0) == 0xe0 && + (p[1] & 0xc0) == 0x80 && + (p[2] & 0xc0) == 0x80) { /* it's a three-byte code */ ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); if (ch < 0xd800 || ch > 0xdfff)