diff -r ebec1a98ab81 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Mon Nov 23 16:44:30 2015 +0200 +++ b/Lib/test/pickletester.py Mon Nov 23 22:12:48 2015 +0200 @@ -1300,6 +1300,18 @@ class AbstractPickleTests(unittest.TestC t2 = self.loads(p) self.assert_is_copy(t, t2) + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + for proto in protocols: + with self.subTest(proto=proto): + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + t = '' + for i in range(5): + t += chr(k) + p = self.dumps(t, proto) + t2 = self.loads(p) + self.assert_is_copy(t, t2) + def test_bytes(self): for proto in protocols: for s in b'', b'xyz', b'xyz'*100: diff -r ebec1a98ab81 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Mon Nov 23 16:44:30 2015 +0200 +++ b/Lib/test/test_unicode.py Mon Nov 23 22:12:48 2015 +0200 @@ -2699,6 +2699,20 @@ class UnicodeTest(string_tests.CommonTes self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + t = '' + for i in range(5): + t += chr(k) + # Create UTF8 representation. + # Other ways are: float, complex, compile... + try: + int(t) + except ValueError: + pass + self.assertEqual(t.encode().decode(), t) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): @@ -2750,6 +2764,5 @@ class StringModuleTest(unittest.TestCase ]]) self.assertRaises(TypeError, _string.formatter_field_name_split, 1) - if __name__ == "__main__": unittest.main() diff -r ebec1a98ab81 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 23 16:44:30 2015 +0200 +++ b/Objects/unicodeobject.c Mon Nov 23 22:12:48 2015 +0200 @@ -862,7 +862,7 @@ resize_compact(PyObject *unicode, Py_ssi Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; - int share_wstr; + int share_wstr, share_utf8; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -878,6 +878,7 @@ resize_compact(PyObject *unicode, Py_ssi else struct_size = sizeof(PyCompactUnicodeObject); share_wstr = _PyUnicode_SHARE_WSTR(unicode); + share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -907,6 +908,15 @@ resize_compact(PyObject *unicode, Py_ssi PyObject_DEL(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; } + if (share_utf8) { + _PyUnicode_UTF8(unicode) = PyUnicode_DATA(unicode); + _PyUnicode_UTF8_LENGTH(unicode) = length; + } + else if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif