diff -r 82ba1f46def5 Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Thu Sep 27 20:07:45 2012 +1000 +++ b/Lib/test/test_codecs.py Thu Sep 27 16:22:57 2012 +0300 @@ -494,7 +494,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00029e8a", [ "", # first byte of BOM read "", # second byte of BOM read => byteorder known @@ -506,6 +506,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", # First byte of first surrogate + "\x00\xff\u0100\uffff", # Second byte of first surrogate + "\x00\xff\u0100\uffff", # First byte of second surrogate + "\x00\xff\u0100\uffff\U00029e8a", ] ) diff -r 82ba1f46def5 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Sep 27 20:07:45 2012 +1000 +++ b/Objects/unicodeobject.c Thu Sep 27 16:22:57 2012 +0300 @@ -5289,6 +5289,10 @@ /* The remaining input chars are ignored if the callback chooses to skip the input */ case 1: + if (consumed) { + q -= 2; + goto End; + } errmsg = "unexpected end of data"; startinpos = ((const char *)q) - 2 - starts; endinpos = ((const char *)e) - starts;