diff -r 3599798bcbd2 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Thu Mar 10 13:54:21 2011 +0200 +++ b/Lib/test/test_codecs.py Thu Mar 10 13:16:02 2011 +0100 @@ -476,7 +476,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00029e8a", [ "", # first byte of BOM read "", # second byte of BOM read => byteorder known @@ -488,6 +488,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", # First byte of first surrogate + "\x00\xff\u0100\uffff", # Second byte of first surrogate + "\x00\xff\u0100\uffff", # First byte of second surrogate + "\x00\xff\u0100\uffff\U00029e8a", ] ) diff -r 3599798bcbd2 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Mar 10 13:54:21 2011 +0200 +++ b/Objects/unicodeobject.c Thu Mar 10 13:16:02 2011 +0100 @@ -3573,7 +3573,12 @@ } /* UTF-16 code pair: */ - if (q > e) { + if (q >= e) { + if (consumed) + { + q -= 2; + break; + } errmsg = "unexpected end of data"; startinpos = (((const char *)q) - 2) - starts; endinpos = ((const char *)e) + 1 - starts;