diff -r a404bf4db6a6 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Sep 30 00:33:24 2014 -0700 +++ b/Objects/unicodeobject.c Tue Sep 30 11:28:18 2014 +0300 @@ -698,9 +698,9 @@ unicode_fill_invalid(PyObject *unicode, static PyObject* resize_compact(PyObject *unicode, Py_ssize_t length) { - Py_ssize_t char_size; - Py_ssize_t struct_size; - Py_ssize_t new_size; + size_t char_size; + size_t struct_size; + size_t new_size; int share_wstr; PyObject *new_unicode; #ifdef Py_DEBUG @@ -710,15 +710,18 @@ resize_compact(PyObject *unicode, Py_ssi assert(unicode_modifiable(unicode)); assert(PyUnicode_IS_READY(unicode)); assert(PyUnicode_IS_COMPACT(unicode)); - - char_size = PyUnicode_KIND(unicode); + assert(length >= 0); + + char_size = (size_t)PyUnicode_KIND(unicode); + /* the string is ready, the kind cannot be PyUnicode_WCHAR_KIND (0) */ + assert(char_size != 0); if (PyUnicode_IS_ASCII(unicode)) struct_size = sizeof(PyASCIIObject); else struct_size = sizeof(PyCompactUnicodeObject); share_wstr = _PyUnicode_SHARE_WSTR(unicode); - if (length > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { + if ((size_t)length > (((size_t)PY_SSIZE_T_MAX - struct_size) / char_size - 1)) { PyErr_NoMemory(); return NULL; } @@ -759,12 +762,13 @@ static int resize_inplace(PyObject *unicode, Py_ssize_t length) { wchar_t *wstr; - Py_ssize_t new_size; + size_t new_size; assert(!PyUnicode_IS_COMPACT(unicode)); assert(Py_REFCNT(unicode) == 1); + assert(0 <= length); if (PyUnicode_IS_READY(unicode)) { - Py_ssize_t char_size; + size_t char_size; int share_wstr, share_utf8; void *data; #ifdef Py_DEBUG @@ -772,11 +776,13 @@ resize_inplace(PyObject *unicode, Py_ssi #endif data = _PyUnicode_DATA_ANY(unicode); - char_size = PyUnicode_KIND(unicode); + char_size = (size_t)PyUnicode_KIND(unicode); + /* the string is ready, the kind cannot be PyUnicode_WCHAR_KIND (0) */ + assert(char_size != 0); share_wstr = _PyUnicode_SHARE_WSTR(unicode); share_utf8 = _PyUnicode_SHARE_UTF8(unicode); - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + if ((size_t)length > ((size_t)PY_SSIZE_T_MAX / char_size - 1)) { PyErr_NoMemory(); return -1; } @@ -816,7 +822,7 @@ resize_inplace(PyObject *unicode, Py_ssi assert(_PyUnicode_WSTR(unicode) != NULL); /* check for integer overflow */ - if (length > PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) - 1) { + if ((size_t)length > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { PyErr_NoMemory(); return -1; } @@ -888,14 +894,14 @@ static PyUnicodeObject * } /* Ensure we won't overflow the size. */ - if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - return (PyUnicodeObject *)PyErr_NoMemory(); - } if (length < 0) { PyErr_SetString(PyExc_SystemError, "Negative size passed to _PyUnicode_New"); return NULL; } + if ((size_t)length > (((size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) - 1)) { + return (PyUnicodeObject *)PyErr_NoMemory(); + } unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); if (unicode == NULL) @@ -1037,8 +1043,8 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 m void *data; enum PyUnicode_Kind kind; int is_sharing, is_ascii; - Py_ssize_t char_size; - Py_ssize_t struct_size; + size_t char_size; + size_t struct_size; /* Optimization for empty strings */ if (size == 0 && unicode_empty != NULL) { @@ -1083,7 +1089,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 m "Negative size passed to PyUnicode_New"); return NULL; } - if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1)) + if ((size_t)size > (((size_t)PY_SSIZE_T_MAX - struct_size) / char_size - 1)) return PyErr_NoMemory(); /* Duplicated allocation code from _PyObject_New() instead of a call to @@ -1645,7 +1651,11 @@ unicode_resize(PyObject **p_unicode, Py_ assert(unicode != NULL); assert(PyUnicode_Check(unicode)); - assert(0 <= length); + + if (length < 0) { + PyErr_BadInternalCall(); + return NULL; + } if (_PyUnicode_KIND(unicode) == PyUnicode_WCHAR_KIND) old_length = PyUnicode_WSTR_LENGTH(unicode); @@ -2229,17 +2239,18 @@ as_ucs4(PyObject *string, Py_UCS4 *targe { int kind; void *data; - Py_ssize_t len, targetlen; + Py_ssize_t len; + size_t targetlen; if (PyUnicode_READY(string) == -1) return NULL; kind = PyUnicode_KIND(string); data = PyUnicode_DATA(string); len = PyUnicode_GET_LENGTH(string); - targetlen = len; + targetlen = (size_t)len; if (copy_null) targetlen++; if (!target) { - if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UCS4) < targetlen) { + if ((size_t)targetlen > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UCS4)) { PyErr_NoMemory(); return NULL; } @@ -2250,7 +2261,8 @@ as_ucs4(PyObject *string, Py_UCS4 *targe } } else { - if (targetsize < targetlen) { + assert(targetsize >= 0); + if ((size_t)targetsize < targetlen) { PyErr_Format(PyExc_SystemError, "string is longer than the buffer"); if (copy_null && 0 < targetsize) @@ -2852,7 +2864,7 @@ PyUnicode_AsWideCharString(PyObject *uni buflen = unicode_aswidechar(unicode, NULL, 0); if (buflen == -1) return NULL; - if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < buflen) { + if ((size_t)buflen > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) { PyErr_NoMemory(); return NULL; } @@ -3551,7 +3563,7 @@ PyUnicode_DecodeLocaleAndSize(const char wstr = smallbuf; } else { - if (wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) + if (wlen > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) return PyErr_NoMemory(); wstr = PyMem_Malloc((wlen+1) * sizeof(wchar_t)); @@ -4523,7 +4535,7 @@ PyObject * return PyBytes_FromStringAndSize(NULL, 0); /* It might be possible to tighten this worst case */ - if (len > PY_SSIZE_T_MAX / 8) + if ((size_t)len > (size_t)PY_SSIZE_T_MAX / 8) return PyErr_NoMemory(); v = PyBytes_FromStringAndSize(NULL, len * 8); if (v == NULL) @@ -5139,7 +5151,7 @@ PyObject * len = PyUnicode_GET_LENGTH(str); nsize = len + (byteorder == 0); - if (nsize > PY_SSIZE_T_MAX / 4) + if ((size_t)nsize > (size_t)PY_SSIZE_T_MAX / 4) return PyErr_NoMemory(); v = PyBytes_FromStringAndSize(NULL, nsize * 4); if (v == NULL) @@ -5220,8 +5232,8 @@ PyObject * /* four bytes are reserved for each surrogate */ if (moreunits > 1) { Py_ssize_t outpos = p - (unsigned char*) PyBytes_AS_STRING(v); - Py_ssize_t morebytes = 4 * (moreunits - 1); - if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) { + size_t morebytes = 4 * ((size_t)moreunits - 1); + if ((size_t)PyBytes_GET_SIZE(v) > (size_t)PY_SSIZE_T_MAX - morebytes) { /* integer overflow */ PyErr_NoMemory(); goto error; @@ -5461,7 +5473,7 @@ PyObject * Py_ssize_t len; PyObject *v; unsigned short *out; - Py_ssize_t pairs; + size_t pairs; #if PY_BIG_ENDIAN int native_ordering = byteorder >= 0; #else @@ -5491,7 +5503,7 @@ PyObject * if (*in++ >= 0x10000) pairs++; } - if (len > PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) + if ((size_t)len > (size_t)PY_SSIZE_T_MAX / 2 - pairs - (byteorder == 0)) return PyErr_NoMemory(); nsize = len + pairs + (byteorder == 0); v = PyBytes_FromStringAndSize(NULL, nsize * 2); @@ -5567,8 +5579,8 @@ PyObject * /* two bytes are reserved for each surrogate */ if (moreunits > 1) { Py_ssize_t outpos = out - (unsigned short*) PyBytes_AS_STRING(v); - Py_ssize_t morebytes = 2 * (moreunits - 1); - if (PyBytes_GET_SIZE(v) > PY_SSIZE_T_MAX - morebytes) { + size_t morebytes = 2 * ((size_t)moreunits - 1); + if ((size_t)PyBytes_GET_SIZE(v) > (size_t)PY_SSIZE_T_MAX - morebytes) { /* integer overflow */ PyErr_NoMemory(); goto error; @@ -5929,7 +5941,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject char *p; int kind; void *data; - Py_ssize_t expandsize = 0; + size_t expandsize = 0; /* Initial allocation is based on the longest-possible character escape. @@ -5957,7 +5969,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject if (len == 0) return PyBytes_FromStringAndSize(NULL, 0); - if (len > (PY_SSIZE_T_MAX - 2 - 1) / expandsize) + if ((size_t)len > ((size_t)PY_SSIZE_T_MAX - 2 - 1) / expandsize) return PyErr_NoMemory(); repr = PyBytes_FromStringAndSize(NULL, @@ -6167,7 +6179,8 @@ PyUnicode_AsRawUnicodeEscapeString(PyObj PyObject *repr; char *p; char *q; - Py_ssize_t expandsize, pos; + size_t expandsize; + Py_ssize_t pos; int kind; void *data; Py_ssize_t len; @@ -6183,9 +6196,9 @@ PyUnicode_AsRawUnicodeEscapeString(PyObj len = PyUnicode_GET_LENGTH(unicode); /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6 bytes, and 1 byte characters 4. */ - expandsize = kind * 2 + 2; - - if (len > PY_SSIZE_T_MAX / expandsize) + expandsize = (size_t)kind * 2 + 2; + + if ((size_t)len > (size_t)PY_SSIZE_T_MAX / expandsize) return PyErr_NoMemory(); repr = PyBytes_FromStringAndSize(NULL, expandsize * len); @@ -6265,11 +6278,16 @@ PyObject * 1)) return NULL; + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + if (size == 0) _Py_RETURN_UNICODE_EMPTY(); _PyUnicodeWriter_Init(&writer); - if (size / Py_UNICODE_SIZE > PY_SSIZE_T_MAX - 1) { + if ((size_t)size / Py_UNICODE_SIZE > (size_t)PY_SSIZE_T_MAX - 1) { PyErr_NoMemory(); goto onError; } @@ -6963,7 +6981,7 @@ decode_code_page_errors(UINT code_page, if (*v == NULL) { /* Create unicode object */ - if (size > PY_SSIZE_T_MAX / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + if ((size_t)size > (size_t)PY_SSIZE_T_MAX / Py_ARRAY_LENGTH(buffer)) { PyErr_NoMemory(); goto error; } @@ -6975,8 +6993,8 @@ decode_code_page_errors(UINT code_page, } else { /* Extend unicode object */ - Py_ssize_t n = PyUnicode_GET_SIZE(*v); - if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { + size_t n = (size_t)PyUnicode_GET_SIZE(*v); + if ((size_t)size > ((size_t)PY_SSIZE_T_MAX - n) / Py_ARRAY_LENGTH(buffer)) { PyErr_NoMemory(); goto error; } @@ -7068,6 +7086,11 @@ decode_code_page_stateful(int code_page, return NULL; } + if (size < 0) { + PyErr_BadInternalCall(); + return NULL; + } + if (consumed) *consumed = 0; @@ -7232,8 +7255,8 @@ encode_code_page_strict(UINT code_page, } else { /* Extend string object */ - const Py_ssize_t n = PyBytes_Size(*outbytes); - if (outsize > PY_SSIZE_T_MAX - n) { + const size_t n = (size_t)PyBytes_Size(*outbytes); + if ((size_t)outsize > (size_t)PY_SSIZE_T_MAX - n) { PyErr_NoMemory(); Py_DECREF(substring); return -1; @@ -7319,7 +7342,7 @@ encode_code_page_errors(UINT code_page, else pusedDefaultChar = NULL; - if (Py_ARRAY_LENGTH(buffer) > PY_SSIZE_T_MAX / insize) { + if ((size_t)insize > (size_t)PY_SSIZE_T_MAX / Py_ARRAY_LENGTH(buffer)) { PyErr_NoMemory(); goto error; } @@ -7334,8 +7357,8 @@ encode_code_page_errors(UINT code_page, } else { /* Extend string object */ - Py_ssize_t n = PyBytes_Size(*outbytes); - if (n > PY_SSIZE_T_MAX - outsize) { + size_t n = PyBytes_Size(*outbytes); + if (n > (size_t)PY_SSIZE_T_MAX - (size_t)outsize) { PyErr_NoMemory(); goto error; } @@ -13322,8 +13345,9 @@ int PyObject *newbuffer; assert(length > 0); - - if (length > PY_SSIZE_T_MAX - writer->pos) { + assert(writer->pos >= 0); + + if ((size_t)length > (size_t)PY_SSIZE_T_MAX - (size_t)writer->pos) { PyErr_NoMemory(); return -1; } @@ -14815,7 +14839,8 @@ static PyObject * unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *unicode, *self; - Py_ssize_t length, char_size; + Py_ssize_t length; + size_t char_size; int share_wstr, share_utf8; unsigned int kind; void *data; @@ -14876,7 +14901,7 @@ unicode_subtype_new(PyTypeObject *type, } /* Ensure we won't overflow the length. */ - if (length > (PY_SSIZE_T_MAX / char_size - 1)) { + if ((size_t)length > ((size_t)PY_SSIZE_T_MAX / char_size - 1)) { PyErr_NoMemory(); goto onError; } @@ -15430,7 +15455,7 @@ PyUnicode_AsUnicodeCopy(PyObject *unicod if (u == NULL) return NULL; /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { + if ((size_t)len > ((size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE) - 1)) { PyErr_NoMemory(); return NULL; }