diff -r ebec1a98ab81 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Mon Nov 23 16:44:30 2015 +0200 +++ b/Lib/test/test_unicode.py Tue Nov 24 00:45:13 2015 +0200 @@ -2699,6 +2699,23 @@ class UnicodeTest(string_tests.CommonTes self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + t = '' + for i in range(5): + t += chr(k) + # The test has to call PyUnicode_AsUTF8AndSize() to create + # the UTF-8 encoded string cached in the Unicode object. + # Other ways are: float, complex, compile... + try: + int(t) + except ValueError: + pass + # t.encode() calls _PyUnicode_AsUTF8String() to retrieve + # the cached UTF-8 encode string. + self.assertEqual(t.encode().decode(), t) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): @@ -2750,6 +2767,5 @@ class StringModuleTest(unittest.TestCase ]]) self.assertRaises(TypeError, _string.formatter_field_name_split, 1) - if __name__ == "__main__": unittest.main() diff -r ebec1a98ab81 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 23 16:44:30 2015 +0200 +++ b/Objects/unicodeobject.c Tue Nov 24 00:45:13 2015 +0200 @@ -907,6 +907,11 @@ resize_compact(PyObject *unicode, Py_ssi PyObject_DEL(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; } + if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif