diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -198,11 +198,11 @@ def test_binary_subscr_on_unicode(self): # valid code get optimized asm = dis_single('"foo"[0]') - self.assertIn("('f')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('f')", asm) + self.assertIn('BINARY_SUBSCR', asm) asm = dis_single('"\u0061\uffff"[1]') - self.assertIn("('\\uffff')", asm) - self.assertNotIn('BINARY_SUBSCR', asm) + self.assertNotIn("('\\uffff')", asm) + self.assertIn('BINARY_SUBSCR', asm) # invalid code doesn't get optimized # out of range @@ -211,6 +211,8 @@ # non-BMP char (see #5057) asm = dis_single('"\U00012345"[0]') self.assertIn('BINARY_SUBSCR', asm) + asm = dis_single('"\U00012345abcdef"[3]') + self.assertIn('BINARY_SUBSCR', asm) def test_folding_of_unaryops_on_constants(self): diff --git a/Python/peephole.c b/Python/peephole.c --- a/Python/peephole.c +++ b/Python/peephole.c @@ -132,25 +132,14 @@ newconst = PyNumber_Subtract(v, w); break; case BINARY_SUBSCR: + /* #5057: if v is unicode, there might be differences between + wide and narrow builds in cases like '\U00012345'[0] or + '\U00012345abcdef'[3], so it's better to skip the optimization + in order to produce compatible pycs. + */ + if (PyUnicode_Check(v)) + return 0; newconst = PyObject_GetItem(v, w); - /* #5057: if v is unicode, there might be differences between - wide and narrow builds in cases like '\U00012345'[0]. - Wide builds will return a non-BMP char, whereas narrow builds - will return a surrogate. In both the cases skip the - optimization in order to produce compatible pycs. - */ - if (newconst != NULL && - PyUnicode_Check(v) && PyUnicode_Check(newconst)) { - Py_UNICODE ch = PyUnicode_AS_UNICODE(newconst)[0]; -#ifdef Py_UNICODE_WIDE - if (ch > 0xFFFF) { -#else - if (ch >= 0xD800 && ch <= 0xDFFF) { -#endif - Py_DECREF(newconst); - return 0; - } - } break; case BINARY_LSHIFT: newconst = PyNumber_Lshift(v, w);