diff -r 0d41a1b4c4fe Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Mon Oct 08 07:46:11 2012 +0200 +++ b/Lib/test/test_codecs.py Mon Oct 08 18:26:34 2012 +0300 @@ -330,7 +330,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read @@ -352,6 +352,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -386,7 +390,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -404,6 +408,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -426,7 +434,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "", @@ -444,6 +452,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -494,7 +506,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", # first byte of BOM read "", # second byte of BOM read => byteorder known @@ -506,6 +518,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -543,7 +559,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -553,6 +569,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -582,7 +602,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", [ "", "\x00", @@ -592,6 +612,10 @@ "\x00\xff\u0100", "\x00\xff\u0100", "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff", + "\x00\xff\u0100\uffff\U00010000", ] ) @@ -621,7 +645,7 @@ def test_partial(self): self.check_partial( - "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", [ "\x00", "\x00", @@ -634,6 +658,10 @@ "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800", "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff", + "\x00\xff\u07ff\u0800\uffff\U00010000", ] ) @@ -812,7 +840,7 @@ def test_partial(self): self.check_partial( - "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", [ "", "", @@ -831,6 +859,10 @@ "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800", "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff", + "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", ] ) diff -r 0d41a1b4c4fe Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Oct 08 07:46:11 2012 +0200 +++ b/Objects/unicodeobject.c Mon Oct 08 18:26:34 2012 +0300 @@ -5143,8 +5143,11 @@ /* The remaining input chars are ignored if the callback chooses to skip the input */ case 1: + q -= 2; + if (consumed) + goto End; errmsg = "unexpected end of data"; - startinpos = ((const char *)q) - 2 - starts; + startinpos = ((const char *)q) - starts; endinpos = ((const char *)e) - starts; break; case 2: