changeset: 98730:ff7e2ccfcd08 tag: tip user: Victor Stinner date: Wed Oct 14 02:33:00 2015 +0200 files: Include/bytesobject.h Objects/bytearrayobject.c Objects/bytesobject.c description: Optimize bytearray % args Don't create temporary bytes objects: modify _PyBytes_Format() to create work directly on bytearray objects. * _PyBytesWriter: add use_bytearray attribute to use a bytearray buffer * Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something outside CPython uses it * _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so bytearray_format() doesn't need tot create a temporary input bytes object * Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer diff -r 388483b53cde -r ff7e2ccfcd08 Include/bytesobject.h --- a/Include/bytesobject.h Wed Oct 14 00:21:35 2015 +0200 +++ b/Include/bytesobject.h Wed Oct 14 02:33:00 2015 +0200 @@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); +PyAPI_FUNC(PyObject*) _PyBytes_FormatEx( + const char *format, + Py_ssize_t format_len, + PyObject *args, + int use_bytearray); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, @@ -138,7 +142,11 @@ typedef struct { incremented by _PyBytesWriter_Prepare() */ Py_ssize_t min_size; - /* If non-zero, overallocate the buffer (default: 0). */ + /* If non-zero, use a bytearray instead of a bytes object for buffer. */ + int use_bytearray; + + /* If non-zero, overallocate the buffer (default: 0). + This flag must be zero if use_bytearray is non-zero. */ int overallocate; /* Stack buffer */ diff -r 388483b53cde -r ff7e2ccfcd08 Objects/bytearrayobject.c --- a/Objects/bytearrayobject.c Wed Oct 14 00:21:35 2015 +0200 +++ b/Objects/bytearrayobject.c Wed Oct 14 02:33:00 2015 +0200 @@ -282,26 +282,14 @@ PyByteArray_Concat(PyObject *a, PyObject static PyObject * bytearray_format(PyByteArrayObject *self, PyObject *args) { - PyObject *bytes_in, *bytes_out, *res; - char *bytestring; - - if (self == NULL || !PyByteArray_Check(self) || args == NULL) { + if (self == NULL || !PyByteArray_Check(self)) { PyErr_BadInternalCall(); return NULL; } - bytestring = PyByteArray_AS_STRING(self); - bytes_in = PyBytes_FromString(bytestring); - if (bytes_in == NULL) - return NULL; - bytes_out = _PyBytes_Format(bytes_in, args); - Py_DECREF(bytes_in); - if (bytes_out == NULL) - return NULL; - res = PyByteArray_FromObject(bytes_out); - Py_DECREF(bytes_out); - if (res == NULL) - return NULL; - return res; + + return _PyBytes_FormatEx(PyByteArray_AS_STRING(self), + PyByteArray_GET_SIZE(self), + args, 1); } /* Functions stuffed into the type object */ diff -r 388483b53cde -r ff7e2ccfcd08 Objects/bytesobject.c --- a/Objects/bytesobject.c Wed Oct 14 00:21:35 2015 +0200 +++ b/Objects/bytesobject.c Wed Oct 14 02:33:00 2015 +0200 @@ -568,28 +568,32 @@ format_obj(PyObject *v, const char **pbu /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */ PyObject * -_PyBytes_Format(PyObject *format, PyObject *args) +_PyBytes_FormatEx(const char *format, Py_ssize_t format_len, + PyObject *args, int use_bytearray) { - char *fmt, *res; + const char *fmt; + char *res; Py_ssize_t arglen, argidx; Py_ssize_t fmtcnt; int args_owned = 0; PyObject *dict = NULL; _PyBytesWriter writer; - if (format == NULL || !PyBytes_Check(format) || args == NULL) { + if (args == NULL) { PyErr_BadInternalCall(); return NULL; } - fmt = PyBytes_AS_STRING(format); - fmtcnt = PyBytes_GET_SIZE(format); + fmt = format; + fmtcnt = format_len; _PyBytesWriter_Init(&writer); + writer.use_bytearray = use_bytearray; res = _PyBytesWriter_Alloc(&writer, fmtcnt); if (res == NULL) return NULL; - writer.overallocate = 1; + if (!use_bytearray) + writer.overallocate = 1; if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); @@ -614,8 +618,7 @@ PyObject * if (pos != NULL) len = pos - fmt; else { - len = PyBytes_GET_SIZE(format); - len -= (fmt - PyBytes_AS_STRING(format)); + len = format_len - (fmt - format); } assert(len != 0); @@ -644,7 +647,7 @@ PyObject * fmt++; if (*fmt == '(') { - char *keystart; + const char *keystart; Py_ssize_t keylen; PyObject *key; int pcount = 1; @@ -924,8 +927,7 @@ PyObject * "unsupported format character '%c' (0x%x) " "at index %zd", c, c, - (Py_ssize_t)(fmt - 1 - - PyBytes_AsString(format))); + (Py_ssize_t)(fmt - 1 - format)); goto error; } @@ -1028,7 +1030,7 @@ PyObject * /* If overallocation was disabled, ensure that it was the last write. Otherwise, we missed an optimization */ - assert(writer.overallocate || fmtcnt < 0); + assert(writer.overallocate || fmtcnt < 0 || use_bytearray); } /* until end */ if (argidx < arglen && !dict) { @@ -3233,11 +3235,18 @@ bytes_methods[] = { }; static PyObject * -bytes_mod(PyObject *v, PyObject *w) +bytes_mod(PyObject *self, PyObject *args) { - if (!PyBytes_Check(v)) + if (!PyBytes_Check(self)) Py_RETURN_NOTIMPLEMENTED; - return _PyBytes_Format(v, w); + + if (self == NULL || !PyBytes_Check(self)) { + PyErr_BadInternalCall(); + return NULL; + } + + return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), + args, 0); } static PyNumberMethods bytes_as_number = { @@ -3856,6 +3865,7 @@ void writer->allocated = 0; writer->min_size = 0; writer->overallocate = 0; + writer->use_bytearray = 0; writer->use_small_buffer = 0; #ifdef Py_DEBUG memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer)); @@ -3871,14 +3881,18 @@ void Py_LOCAL_INLINE(char*) _PyBytesWriter_AsString(_PyBytesWriter *writer) { - if (!writer->use_small_buffer) { + if (writer->use_small_buffer) { + assert(writer->buffer == NULL); + return writer->small_buffer; + } + else if (writer->use_bytearray) { + assert(writer->buffer != NULL); + return PyByteArray_AS_STRING(writer->buffer); + } + else { assert(writer->buffer != NULL); return PyBytes_AS_STRING(writer->buffer); } - else { - assert(writer->buffer == NULL); - return writer->small_buffer; - } } Py_LOCAL_INLINE(Py_ssize_t) @@ -3897,18 +3911,28 @@ Py_LOCAL_INLINE(void) #ifdef Py_DEBUG char *start, *end; - if (!writer->use_small_buffer) { + if (writer->use_small_buffer) { + assert(writer->buffer == NULL); + } + else { assert(writer->buffer != NULL); - assert(PyBytes_CheckExact(writer->buffer)); + if (writer->use_bytearray) + assert(PyByteArray_CheckExact(writer->buffer)); + else + assert(PyBytes_CheckExact(writer->buffer)); assert(Py_REFCNT(writer->buffer) == 1); } - else { - assert(writer->buffer == NULL); + + if (writer->use_bytearray) { + /* bytearray has its own overallocation algorithm, + writer overallocation must be disabled */ + assert(!writer->overallocate); } - start = _PyBytesWriter_AsString(writer); + assert(0 <= writer->allocated); assert(0 <= writer->min_size && writer->min_size <= writer->allocated); /* the last byte must always be null */ + start = _PyBytesWriter_AsString(writer); assert(start[writer->allocated] == 0); end = start + writer->allocated; @@ -3932,8 +3956,7 @@ void* if (writer->min_size > PY_SSIZE_T_MAX - size) { PyErr_NoMemory(); - _PyBytesWriter_Dealloc(writer); - return NULL; + goto error; } writer->min_size += size; @@ -3950,23 +3973,33 @@ void* pos = _PyBytesWriter_GetPos(writer, str); if (!writer->use_small_buffer) { - /* Note: Don't use a bytearray object because the conversion from - byterray to bytes requires to copy all bytes. */ - if (_PyBytes_Resize(&writer->buffer, allocated)) { - assert(writer->buffer == NULL); - return NULL; + if (writer->use_bytearray) { + if (PyByteArray_Resize(writer->buffer, allocated)) + goto error; + } + else { + if (_PyBytes_Resize(&writer->buffer, allocated)) + goto error; } } else { /* convert from stack buffer to bytes object buffer */ assert(writer->buffer == NULL); - writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); + if (writer->use_bytearray) + writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated); + else + writer->buffer = PyBytes_FromStringAndSize(NULL, allocated); if (writer->buffer == NULL) - return NULL; + goto error; if (pos != 0) { - Py_MEMCPY(PyBytes_AS_STRING(writer->buffer), + char *dest; + if (writer->use_bytearray) + dest = PyByteArray_AS_STRING(writer->buffer); + else + dest = PyBytes_AS_STRING(writer->buffer); + Py_MEMCPY(dest, writer->small_buffer, pos); } @@ -3981,6 +4014,10 @@ void* str = _PyBytesWriter_AsString(writer) + pos; _PyBytesWriter_CheckConsistency(writer, str); return str; + +error: + _PyBytesWriter_Dealloc(writer); + return NULL; } /* Allocate the buffer to write size bytes. @@ -4013,7 +4050,7 @@ PyObject * _PyBytesWriter_CheckConsistency(writer, str); pos = _PyBytesWriter_GetPos(writer, str); - if (pos == 0) { + if (pos == 0 && !writer->use_bytearray) { Py_CLEAR(writer->buffer); /* Get the empty byte string singleton */ result = PyBytes_FromStringAndSize(NULL, 0); @@ -4026,9 +4063,17 @@ PyObject * writer->buffer = NULL; if (pos != writer->allocated) { - if (_PyBytes_Resize(&result, pos)) { - assert(result == NULL); - return NULL; + if (writer->use_bytearray) { + if (PyByteArray_Resize(result, pos)) { + Py_DECREF(result); + return NULL; + } + } + else { + if (_PyBytes_Resize(&result, pos)) { + assert(result == NULL); + return NULL; + } } } }