diff -r 5b9ffea7e7c3 Include/unicodeobject.h --- a/Include/unicodeobject.h Mon Oct 05 13:49:26 2015 +0200 +++ b/Include/unicodeobject.h Mon Oct 05 14:05:17 2015 +0200 @@ -908,7 +908,7 @@ typedef struct { /* minimum character (default: 127, ASCII) */ Py_UCS4 min_char; - /* If non-zero, overallocate the buffer by 25% (default: 0). */ + /* If non-zero, overallocate the buffer (default: 0). */ unsigned char overallocate; /* If readonly is 1, buffer is a shared string (cannot be modified) diff -r 5b9ffea7e7c3 Objects/stringlib/codecs.h --- a/Objects/stringlib/codecs.h Mon Oct 05 13:49:26 2015 +0200 +++ b/Objects/stringlib/codecs.h Mon Oct 05 14:05:17 2015 +0200 @@ -263,10 +263,8 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */ Py_ssize_t i; /* index into s of next input byte */ - PyObject *result; /* result string object */ + _PyBytesWriter writer; char *p; /* next free byte in output buffer */ - Py_ssize_t nallocated; /* number of result bytes allocated */ - Py_ssize_t nneeded; /* number of result bytes needed */ #if STRINGLIB_SIZEOF_CHAR > 1 PyObject *error_handler_obj = NULL; PyObject *exc = NULL; @@ -285,28 +283,17 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif assert(size >= 0); + _PyBytesWriter_Init(&writer); + _PyBytesWriter_SetStackBuffer(&writer, stackbuf, sizeof(stackbuf)); - if (size <= MAX_SHORT_UNICHARS) { - /* Write into the stack buffer; nallocated can't overflow. - * At the end, we'll allocate exactly as much heap space as it - * turns out we need. - */ - nallocated = Py_SAFE_DOWNCAST(sizeof(stackbuf), size_t, int); - result = NULL; /* will allocate after we're done */ - p = stackbuf; + if (size > PY_SSIZE_T_MAX / max_char_size) { + /* integer overflow */ + return PyErr_NoMemory(); } - else { - if (size > PY_SSIZE_T_MAX / max_char_size) { - /* integer overflow */ - return PyErr_NoMemory(); - } - /* Overallocate on the heap, and give the excess back at the end. */ - nallocated = size * max_char_size; - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - return NULL; - p = PyBytes_AS_STRING(result); - } + + p = _PyBytesWriter_Alloc(&writer, size * max_char_size); + if (p == NULL) + return NULL; for (i = 0; i < size;) { Py_UCS4 ch = data[i++]; @@ -387,29 +374,10 @@ STRINGLIB(utf8_encoder)(PyObject *unicod repsize = PyUnicode_GET_LENGTH(rep); if (repsize > max_char_size) { - Py_ssize_t offset; - - if (result == NULL) - offset = p - stackbuf; - else - offset = p - PyBytes_AS_STRING(result); - - if (nallocated > PY_SSIZE_T_MAX - repsize + max_char_size) { - /* integer overflow */ - PyErr_NoMemory(); + p = _PyBytesWriter_Prepare(&writer, p, + repsize - max_char_size); + if (p == NULL) goto error; - } - nallocated += repsize - max_char_size; - if (result != NULL) { - if (_PyBytes_Resize(&result, nallocated) < 0) - goto error; - } else { - result = PyBytes_FromStringAndSize(NULL, nallocated); - if (result == NULL) - goto error; - Py_MEMCPY(PyBytes_AS_STRING(result), stackbuf, offset); - } - p = PyBytes_AS_STRING(result) + offset; } if (PyBytes_Check(rep)) { @@ -461,31 +429,18 @@ STRINGLIB(utf8_encoder)(PyObject *unicod #endif /* STRINGLIB_SIZEOF_CHAR > 1 */ } - if (result == NULL) { - /* This was stack allocated. */ - nneeded = p - stackbuf; - assert(nneeded <= nallocated); - result = PyBytes_FromStringAndSize(stackbuf, nneeded); - } - else { - /* Cut back to size actually needed. */ - nneeded = p - PyBytes_AS_STRING(result); - assert(nneeded <= nallocated); - _PyBytes_Resize(&result, nneeded); - } - #if STRINGLIB_SIZEOF_CHAR > 1 Py_XDECREF(error_handler_obj); Py_XDECREF(exc); #endif - return result; + return _PyBytesWriter_Finish(&writer, p); #if STRINGLIB_SIZEOF_CHAR > 1 error: Py_XDECREF(rep); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - Py_XDECREF(result); + _PyBytesWriter_Dealloc(&writer); return NULL; #endif diff -r 5b9ffea7e7c3 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon Oct 05 13:49:26 2015 +0200 +++ b/Objects/unicodeobject.c Mon Oct 05 14:05:17 2015 +0200 @@ -163,6 +163,14 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) +#ifdef MS_WINDOWS + /* On Windows, overallocate by 50% is the best factor */ +# define OVERALLOCATE_FACTOR 2 +#else + /* On Linux, overallocate by 25% is the best factor */ +# define OVERALLOCATE_FACTOR 4 +#endif + /* This dictionary holds all interned unicode strings. Note that references to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation @@ -338,6 +346,220 @@ PyUnicode_GetMax(void) #endif } +typedef struct { + /* bytes object */ + PyObject *buffer; + + /* Number of allocated size */ + Py_ssize_t allocated; + + /* Current size of the buffer (can be smaller than the allocated size) */ + Py_ssize_t size; + + /* If non-zero, overallocate the buffer (default: 0). */ + int overallocate; + + /* Stack buffer */ + int use_stack_buffer; + char *stack_buffer; + Py_ssize_t stack_buffer_size; +} _PyBytesWriter; + +static void +_PyBytesWriter_Init(_PyBytesWriter *writer) +{ + memset(writer, 0, sizeof(*writer)); +} + +static void +_PyBytesWriter_SetStackBuffer(_PyBytesWriter *writer, + char *buffer, + Py_ssize_t size) +{ + writer->stack_buffer = buffer; + writer->stack_buffer_size = size; +} + +static void +_PyBytesWriter_Dealloc(_PyBytesWriter *writer) +{ + Py_CLEAR(writer->buffer); +} + +static char* +_PyBytesWriter_AsString(_PyBytesWriter *writer) +{ + if (!writer->use_stack_buffer) { + assert(writer->buffer != NULL); + return PyBytes_AS_STRING(writer->buffer); + } + else { + assert(writer->stack_buffer != NULL); + return writer->stack_buffer; + } +} + +Py_LOCAL_INLINE(Py_ssize_t) +_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str) +{ + char *start = _PyBytesWriter_AsString(writer); + assert(str != NULL); + assert(str >= start); + return str - start; +} + +Py_LOCAL_INLINE(void) +_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str) +{ +#ifdef Py_DEBUG + char *start, *end; + + if (!writer->use_stack_buffer) { + assert(writer->buffer != NULL); + assert(PyBytes_CheckExact(writer->buffer)); + assert(Py_REFCNT(writer->buffer) == 1); + } + else { + assert(writer->buffer == NULL); + assert(writer->stack_buffer != NULL); + } + + start = _PyBytesWriter_AsString(writer); + assert(0 < writer->size && writer->size <= writer->allocated); + /* the last byte must always be null */ + assert(start[writer->allocated] == 0); + + end = start + writer->allocated; + assert(str != NULL); + assert(start <= str && str <= end); +#endif +} + +/* Allocate the buffer to write size bytes. + Return the pointer to the beginning of buffer data. + Raise an exception and return NULL on error. */ +static char* +_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size) +{ + char *str; + + assert(writer->buffer == NULL); + /* using _PyBytesWriter_Alloc() with 0 bytes is inefficient */ + assert(size != 0); + + writer->size = size; + if (size >= writer->stack_buffer_size) { + writer->buffer = PyBytes_FromStringAndSize(NULL, size); + if (writer->buffer == NULL) + return NULL; + writer->allocated = size; + } + else { + writer->use_stack_buffer = 1; + memset(writer->stack_buffer, 0xDD, writer->stack_buffer_size - 1); + writer->stack_buffer[writer->stack_buffer_size - 1] = 0; + /* the last byte is reversed, it must be '\0' */ + writer->allocated = writer->stack_buffer_size - 1; + } + + str = _PyBytesWriter_AsString(writer); + _PyBytesWriter_CheckConsistency(writer, str); + return str; +} + +/* Add *size* bytes to the buffer. + str is the current pointer inside the buffer. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. */ +static char* +_PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size) +{ + Py_ssize_t allocated, pos; + + _PyBytesWriter_CheckConsistency(writer, str); + assert(size >= 0); + + if (size == 0) { + /* nothing to do */ + return str; + } + + if (writer->size > PY_SSIZE_T_MAX - size) { + PyErr_NoMemory(); + _PyBytesWriter_Dealloc(writer); + return NULL; + } + writer->size += size; + + allocated = writer->allocated; + if (writer->size <= allocated) + return str; + + allocated = writer->size; + if (writer->overallocate + && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) { + /* overallocate to limit the number of realloc() */ + allocated += allocated / OVERALLOCATE_FACTOR; + } + + pos = _PyBytesWriter_GetPos(writer, str); + if (!writer->use_stack_buffer) { + /* Note: Don't use a bytearray object because the conversion from + byterray to bytes requires to copy all bytes. */ + if (_PyBytes_Resize(&writer->buffer, allocated)) { + assert(writer->buffer == NULL); + return NULL; + } + } + else { + /* convert from stack buffer to bytes object buffer */ + assert(writer->buffer == NULL); + + writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); + if (writer->buffer == NULL) + return NULL; + + Py_MEMCPY(PyBytes_AS_STRING(writer->buffer), + writer->stack_buffer, + pos); + } + writer->allocated = allocated; + + str = _PyBytesWriter_AsString(writer) + pos; + _PyBytesWriter_CheckConsistency(writer, str); + return str; +} + +/* Get the buffer content and reset the writer. + Return a bytes object. + Raise an exception and return NULL on error. */ +static PyObject * +_PyBytesWriter_Finish(_PyBytesWriter *writer, char *str) +{ + Py_ssize_t pos; + PyObject *result; + + _PyBytesWriter_CheckConsistency(writer, str); + + pos = _PyBytesWriter_GetPos(writer, str); + if (!writer->use_stack_buffer) { + if (pos != writer->allocated) { + if (_PyBytes_Resize(&writer->buffer, pos)) { + assert(writer->buffer == NULL); + return NULL; + } + } + + result = writer->buffer; + writer->buffer = NULL; + } + else { + result = PyBytes_FromStringAndSize(writer->stack_buffer, pos); + } + + return result; +} + #ifdef Py_DEBUG int _PyUnicode_CheckConsistency(PyObject *op, int check_content) @@ -6461,16 +6683,15 @@ unicode_encode_ucs1(PyObject *unicode, int kind; void *data; /* output object */ - PyObject *res; + _PyBytesWriter writer; /* pointer into the output */ char *str; - /* current output position */ - Py_ssize_t ressize; const char *encoding = (limit == 256) ? "latin-1" : "ascii"; const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)"; PyObject *error_handler_obj = NULL; PyObject *exc = NULL; _Py_error_handler error_handler = _Py_ERROR_UNKNOWN; + char stackbuf[256]; if (PyUnicode_READY(unicode) == -1) return NULL; @@ -6481,11 +6702,12 @@ unicode_encode_ucs1(PyObject *unicode, replacements, if we need more, we'll resize */ if (size == 0) return PyBytes_FromStringAndSize(NULL, 0); - res = PyBytes_FromStringAndSize(NULL, size); - if (res == NULL) - return NULL; - str = PyBytes_AS_STRING(res); - ressize = size; + + _PyBytesWriter_Init(&writer); + _PyBytesWriter_SetStackBuffer(&writer, stackbuf, sizeof(stackbuf)); + str = _PyBytesWriter_Alloc(&writer, size); + if (str == NULL) + return NULL; while (pos < size) { Py_UCS4 ch = PyUnicode_READ(kind, data, pos); @@ -6499,7 +6721,7 @@ unicode_encode_ucs1(PyObject *unicode, else { Py_ssize_t requiredsize; PyObject *repunicode; - Py_ssize_t repsize, newpos, respos, i; + Py_ssize_t repsize, newpos, i; /* startpos for collecting unencodable chars */ Py_ssize_t collstart = pos; Py_ssize_t collend = pos; @@ -6508,6 +6730,9 @@ unicode_encode_ucs1(PyObject *unicode, while ((collend < size) && (PyUnicode_READ(kind, data, collend) >= limit)) ++collend; + /* Only overallocate the buffer if it's not the last write */ + writer.overallocate = (collend < size); + /* cache callback name lookup (if not done yet, i.e. it's the first error) */ if (error_handler == _Py_ERROR_UNKNOWN) error_handler = get_error_handler(errors); @@ -6526,8 +6751,7 @@ unicode_encode_ucs1(PyObject *unicode, break; case _Py_ERROR_XMLCHARREFREPLACE: - respos = str - PyBytes_AS_STRING(res); - requiredsize = respos; + requiredsize = 0; /* determine replacement size */ for (i = collstart; i < collend; ++i) { Py_ssize_t incr; @@ -6553,17 +6777,11 @@ unicode_encode_ucs1(PyObject *unicode, goto overflow; requiredsize += incr; } - if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) - goto overflow; - requiredsize += size - collend; - if (requiredsize > ressize) { - if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) - goto onError; - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + + str = _PyBytesWriter_Prepare(&writer, str, requiredsize-1); + if (str == NULL) + goto onError; + /* generate replacement */ for (i = collstart; i < collend; ++i) { str += sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); @@ -6598,20 +6816,9 @@ unicode_encode_ucs1(PyObject *unicode, if (PyBytes_Check(repunicode)) { /* Directly copy bytes result to output. */ repsize = PyBytes_Size(repunicode); - if (repsize > 1) { - /* Make room for all additional bytes. */ - respos = str - PyBytes_AS_STRING(res); - if (ressize > PY_SSIZE_T_MAX - repsize - 1) { - Py_DECREF(repunicode); - goto overflow; - } - if (_PyBytes_Resize(&res, ressize+repsize-1)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize += repsize-1; - } + str = _PyBytesWriter_Prepare(&writer, str, repsize-1); + if (str == NULL) + goto onError; memcpy(str, PyBytes_AsString(repunicode), repsize); str += repsize; pos = newpos; @@ -6622,25 +6829,10 @@ unicode_encode_ucs1(PyObject *unicode, /* need more space? (at least enough for what we have+the replacement+the rest of the string, so we won't have to check space for encodable characters) */ - respos = str - PyBytes_AS_STRING(res); repsize = PyUnicode_GET_LENGTH(repunicode); - requiredsize = respos; - if (requiredsize > PY_SSIZE_T_MAX - repsize) - goto overflow; - requiredsize += repsize; - if (requiredsize > PY_SSIZE_T_MAX - (size - collend)) - goto overflow; - requiredsize += size - collend; - if (requiredsize > ressize) { - if (ressize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*ressize) - requiredsize = 2*ressize; - if (_PyBytes_Resize(&res, requiredsize)) { - Py_DECREF(repunicode); - goto onError; - } - str = PyBytes_AS_STRING(res) + respos; - ressize = requiredsize; - } + str = _PyBytesWriter_Prepare(&writer, str, repsize-1); + if (str == NULL) + goto onError; /* check if there is anything unencodable in the replacement and copy it to the output */ @@ -6657,26 +6849,23 @@ unicode_encode_ucs1(PyObject *unicode, pos = newpos; Py_DECREF(repunicode); } - } - } - /* Resize if we allocated to much */ - size = str - PyBytes_AS_STRING(res); - if (size < ressize) { /* If this falls res will be NULL */ - assert(size >= 0); - if (_PyBytes_Resize(&res, size) < 0) - goto onError; + + /* If overallocation was disabled, ensure that it was the last + write. Otherwise, we missed an optimization */ + assert(writer.overallocate || pos == size); + } } Py_XDECREF(error_handler_obj); Py_XDECREF(exc); - return res; + return _PyBytesWriter_Finish(&writer, str); overflow: PyErr_SetString(PyExc_OverflowError, "encoded result is too long for a Python string"); onError: - Py_XDECREF(res); + _PyBytesWriter_Dealloc(&writer); Py_XDECREF(error_handler_obj); Py_XDECREF(exc); return NULL; @@ -13366,13 +13555,6 @@ int _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar) { -#ifdef MS_WINDOWS - /* On Windows, overallocate by 50% is the best factor */ -# define OVERALLOCATE_FACTOR 2 -#else - /* On Linux, overallocate by 25% is the best factor */ -# define OVERALLOCATE_FACTOR 4 -#endif Py_ssize_t newlen; PyObject *newbuffer;