# HG changeset patch # Parent e72aab0801650ae6ce20cee3e29b3f352a178efc Issue #1621: Avoid signed integer overflow in str.join() diff -r e72aab080165 Lib/test/test_unicode.py --- a/Lib/test/test_unicode.py Sat Jul 23 11:16:56 2016 -0400 +++ b/Lib/test/test_unicode.py Sun Jul 24 09:47:45 2016 +0000 @@ -464,6 +464,13 @@ self.checkraises(TypeError, ' ', 'join', [1, 2, 3]) self.checkraises(TypeError, ' ', 'join', ['1', '2', 3]) + @unittest.skipIf(sys.maxsize > 2**32, + 'needs too much memory on a 64-bit platform') + def test_join_overflow(self): + size = int(sys.maxsize**0.5) + 1 + seq = ('A' * size,) * size + self.assertRaises(OverflowError, ''.join, seq) + def test_replace(self): string_tests.CommonTest.test_replace(self) diff -r e72aab080165 Misc/NEWS --- a/Misc/NEWS Sat Jul 23 11:16:56 2016 -0400 +++ b/Misc/NEWS Sun Jul 24 09:47:45 2016 +0000 @@ -10,6 +10,8 @@ Core and Builtins ----------------- +- Issue #1621: Avoid signed integer overflow in the str.join() method. + - Issue #27507: Add integer overflow check in bytearray.extend(). Patch by Xiang Zhang. diff -r e72aab080165 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Jul 23 11:16:56 2016 -0400 +++ b/Objects/unicodeobject.c Sun Jul 24 09:47:45 2016 +0000 @@ -9913,7 +9913,7 @@ use_memcpy = 1; #endif for (i = 0; i < seqlen; i++) { - const Py_ssize_t old_sz = sz; + size_t add_sz; /* Maximum value is double that of Py_ssize_t */ item = items[i]; if (!PyUnicode_Check(item)) { PyErr_Format(PyExc_TypeError, @@ -9924,16 +9924,17 @@ } if (PyUnicode_READY(item) == -1) goto onError; - sz += PyUnicode_GET_LENGTH(item); + add_sz = PyUnicode_GET_LENGTH(item); item_maxchar = PyUnicode_MAX_CHAR_VALUE(item); maxchar = Py_MAX(maxchar, item_maxchar); if (i != 0) - sz += seplen; - if (sz < old_sz || sz > PY_SSIZE_T_MAX) { + add_sz += seplen; + if (add_sz > (size_t)(PY_SSIZE_T_MAX - sz)) { PyErr_SetString(PyExc_OverflowError, "join() result is too long for a Python string"); goto onError; } + sz += add_sz; if (use_memcpy && last_obj != NULL) { if (PyUnicode_KIND(last_obj) != PyUnicode_KIND(item)) use_memcpy = 0;