diff -r ebec1a98ab81 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Mon Nov 23 16:44:30 2015 +0200 +++ b/Lib/test/test_unicode.py Tue Nov 24 10:48:00 2015 +0200 @@ -2699,6 +2699,20 @@ class UnicodeTest(string_tests.CommonTes self.assertTrue(astral >= bmp2) self.assertFalse(astral >= astral2) + @support.cpython_only + def test_pep393_utf8_caching_bug(self): + # Issue #25709: Problem with string concatenation and utf-8 cache + from _testcapi import getargs_s_hash + for k in 0x24, 0xa4, 0x20ac, 0x1f40d: + s = '' + for i in range(5): + s += chr(k) + # Parsing with the "s#" format code calls indirectly + # PyUnicode_AsUTF8AndSize() which creates the UTF-8 + # encoded string cached in the Unicode object. + res = getargs_s_hash(s) + self.assertEqual(res, chr(k).encode() * (i + 1)) + class StringModuleTest(unittest.TestCase): def test_formatter_parser(self): @@ -2750,6 +2764,5 @@ class StringModuleTest(unittest.TestCase ]]) self.assertRaises(TypeError, _string.formatter_field_name_split, 1) - if __name__ == "__main__": unittest.main() diff -r ebec1a98ab81 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 23 16:44:30 2015 +0200 +++ b/Objects/unicodeobject.c Tue Nov 24 10:48:00 2015 +0200 @@ -885,6 +885,11 @@ resize_compact(PyObject *unicode, Py_ssi } new_size = (struct_size + (length + 1) * char_size); + if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } _Py_DEC_REFTOTAL; _Py_ForgetReference(unicode);