diff -r 3b484f53f91b Lib/test/test_codecs.py --- a/Lib/test/test_codecs.py Sun Oct 07 20:37:54 2012 -0700 +++ b/Lib/test/test_codecs.py Mon Oct 08 18:46:38 2012 +0300 @@ -281,7 +281,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", # first byte of BOM read u"", # second byte of BOM read @@ -303,6 +303,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -331,7 +335,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", u"", @@ -349,6 +353,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -371,7 +379,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", u"", @@ -389,6 +397,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -439,7 +451,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", # first byte of BOM read u"", # second byte of BOM read => byteorder known @@ -451,6 +463,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -481,7 +497,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", u"\x00", @@ -491,6 +507,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -514,7 +534,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", [ u"", u"\x00", @@ -524,6 +544,10 @@ u"\x00\xff\u0100", u"\x00\xff\u0100", u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff", + u"\x00\xff\u0100\uffff\U00010000", ] ) @@ -547,7 +571,7 @@ def test_partial(self): self.check_partial( - u"\x00\xff\u07ff\u0800\uffff", + u"\x00\xff\u07ff\u0800\uffff\U00010000", [ u"\x00", u"\x00", @@ -560,6 +584,10 @@ u"\x00\xff\u07ff\u0800", u"\x00\xff\u07ff\u0800", u"\x00\xff\u07ff\u0800\uffff", + u"\x00\xff\u07ff\u0800\uffff", + u"\x00\xff\u07ff\u0800\uffff", + u"\x00\xff\u07ff\u0800\uffff", + u"\x00\xff\u07ff\u0800\uffff\U00010000", ] ) @@ -619,7 +647,7 @@ def test_partial(self): self.check_partial( - u"\ufeff\x00\xff\u07ff\u0800\uffff", + u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", [ u"", u"", @@ -638,6 +666,10 @@ u"\ufeff\x00\xff\u07ff\u0800", u"\ufeff\x00\xff\u07ff\u0800", u"\ufeff\x00\xff\u07ff\u0800\uffff", + u"\ufeff\x00\xff\u07ff\u0800\uffff", + u"\ufeff\x00\xff\u07ff\u0800\uffff", + u"\ufeff\x00\xff\u07ff\u0800\uffff", + u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", ] ) diff -r 3b484f53f91b Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Oct 07 20:37:54 2012 -0700 +++ b/Objects/unicodeobject.c Mon Oct 08 18:46:38 2012 +0300 @@ -2565,8 +2565,11 @@ /* UTF-16 code pair: */ if (e - q < 2) { + q -= 2; + if (consumed) + break; errmsg = "unexpected end of data"; - startinpos = (((const char *)q)-2)-starts; + startinpos = ((const char *)q)-starts; endinpos = ((const char *)e)-starts; goto utf16Error; }