diff -r ebec1a98ab81 Lib/test/pickletester.py --- a/Lib/test/pickletester.py Mon Nov 23 16:44:30 2015 +0200 +++ b/Lib/test/pickletester.py Mon Nov 23 21:05:06 2015 +0200 @@ -1300,6 +1300,18 @@ class AbstractPickleTests(unittest.TestC t2 = self.loads(p) self.assert_is_copy(t, t2) + def test_unicode_cached_utf8_bug(self): + # Issue #25709 + for proto in protocols: + for k in 0xa4, 0x20ac, 0x1f40d: + with self.subTest(proto=proto, char=hex(k)): + t = '' + for i in range(5): + t += chr(k) + p = self.dumps(t, proto) + t2 = self.loads(p) + self.assert_is_copy(t, t2) + def test_bytes(self): for proto in protocols: for s in b'', b'xyz', b'xyz'*100: diff -r ebec1a98ab81 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Nov 23 16:44:30 2015 +0200 +++ b/Objects/unicodeobject.c Mon Nov 23 21:05:06 2015 +0200 @@ -862,7 +862,7 @@ resize_compact(PyObject *unicode, Py_ssi Py_ssize_t char_size; Py_ssize_t struct_size; Py_ssize_t new_size; - int share_wstr; + int share_wstr, share_utf8; PyObject *new_unicode; #ifdef Py_DEBUG Py_ssize_t old_length = _PyUnicode_LENGTH(unicode); @@ -878,6 +878,7 @@ resize_compact(PyObject *unicode, Py_ssi else struct_size = sizeof(PyCompactUnicodeObject); share_wstr = _PyUnicode_SHARE_WSTR(unicode); + share_utf8 = _PyUnicode_SHARE_UTF8(unicode); if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); @@ -907,6 +908,15 @@ resize_compact(PyObject *unicode, Py_ssi PyObject_DEL(_PyUnicode_WSTR(unicode)); _PyUnicode_WSTR(unicode) = NULL; } + if (share_utf8) { + _PyUnicode_UTF8(unicode) = PyUnicode_DATA(unicode); + _PyUnicode_UTF8_LENGTH(unicode) = length; + } + else if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { + PyObject_DEL(_PyUnicode_UTF8(unicode)); + _PyUnicode_UTF8(unicode) = NULL; + _PyUnicode_UTF8_LENGTH(unicode) = 0; + } #ifdef Py_DEBUG unicode_fill_invalid(unicode, old_length); #endif