diff -r 8563cdf83bb9 Modules/_io/bytesio.c --- a/Modules/_io/bytesio.c Fri Jul 20 12:34:18 2012 +0200 +++ b/Modules/_io/bytesio.c Fri Jul 20 22:45:19 2012 +0300 @@ -4,10 +4,9 @@ typedef struct { PyObject_HEAD - char *buf; + PyObject *buf; Py_ssize_t pos; Py_ssize_t string_size; - size_t buf_size; PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; @@ -18,6 +17,12 @@ bytesio *source; } bytesiobuf; +/* The bytesio object can be in three states: + * Py_REFCNT(buf) == 1, exports == 0. + * Py_REFCNT(buf) > 1. exports == 0, string_size == PyBytes_GET_SIZE(buf), + first modification or export causes the internal buffer copying. + * exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden. +*/ #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ @@ -41,14 +46,16 @@ get_line(bytesio *self, char **output) { char *n; + char *str_start; const char *str_end; Py_ssize_t len; assert(self->buf != NULL); /* Move to the end of the line, up to the end of the string, s. */ - str_end = self->buf + self->string_size; - for (n = self->buf + self->pos; + str_start = PyBytes_AS_STRING(self->buf) + self->pos; + str_end = PyBytes_AS_STRING(self->buf) + self->string_size; + for (n = str_start; n < str_end && *n != '\n'; n++); @@ -57,8 +64,8 @@ n++; /* Get the length from the current position to the end of the line. */ - len = n - (self->buf + self->pos); - *output = self->buf + self->pos; + len = n - str_start; + *output = str_start; assert(len >= 0); assert(self->pos < PY_SSIZE_T_MAX - len); @@ -67,6 +74,26 @@ return len; } +/* Internal routine for detaching the shared buffer of BytesIO objects. + The caller should ensure that the 'size' argument is non-negative and + not lesser than self->string_size. Returns 0 on success, -1 otherwise. */ +static int +unshare_buffer(bytesio *self, size_t size) +{ + PyObject * new_buf; + assert(Py_REFCNT(self->buf) > 1); + assert(self->exports == 0); + assert(size >= self->string_size); + new_buf = PyBytes_FromStringAndSize(NULL, size); + if (new_buf == NULL) + return -1; + memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf), + self->string_size); + Py_DECREF(self->buf); + self->buf = new_buf; + return 0; +} + /* Internal routine for changing the size of the buffer of BytesIO objects. The caller should ensure that the 'size' argument is non-negative. Returns 0 on success, -1 otherwise. */ @@ -75,8 +102,7 @@ { /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ - size_t alloc = self->buf_size; - char *new_buf = NULL; + size_t alloc = PyBytes_GET_SIZE(self->buf); assert(self->buf != NULL); @@ -104,13 +130,15 @@ if (alloc > ((size_t)-1) / sizeof(char)) goto overflow; - new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); - if (new_buf == NULL) { - PyErr_NoMemory(); - return -1; + + if (Py_REFCNT(self->buf) > 1) { + if (unshare_buffer(self, alloc) < 0) + return -1; } - self->buf_size = alloc; - self->buf = new_buf; + else { + if (_PyBytes_Resize(&self->buf, alloc) < 0) + return -1; + } return 0; @@ -129,10 +157,14 @@ assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { + if ((size_t)self->pos + len > PyBytes_GET_SIZE(self->buf)) { if (resize_buffer(self, (size_t)self->pos + len) < 0) return -1; } + else if (Py_REFCNT(self->buf) > 1) { + if (unshare_buffer(self, self->string_size) < 0) + return -1; + } if (self->pos > self->string_size) { /* In case of overseek, pad with null bytes the buffer region between @@ -143,13 +175,13 @@ | | <--to pad-->|<---to write---> | 0 buf position */ - memset(self->buf + self->string_size, '\0', + memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0', (self->pos - self->string_size) * sizeof(char)); } /* Copy the data to the internal buffer, overwriting some of the existing data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, bytes, len); + memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len); self->pos += len; /* Set the new length of the internal string if it has changed. */ @@ -221,7 +253,22 @@ bytesio_getvalue(bytesio *self) { CHECK_CLOSED(self); - return PyBytes_FromStringAndSize(self->buf, self->string_size); + if (self->string_size <= 1 || self->exports > 0) + return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf), + self->string_size); + + if (self->string_size != PyBytes_GET_SIZE(self->buf)) { + if (Py_REFCNT(self->buf) > 1) { + if (unshare_buffer(self, self->string_size) < 0) + return NULL; + } + else { + if (_PyBytes_Resize(&self->buf, self->string_size) < 0) + return NULL; + } + } + Py_INCREF(self->buf); + return self->buf; } PyDoc_STRVAR(isatty_doc, @@ -289,7 +336,7 @@ } assert(self->buf != NULL); - output = self->buf + self->pos; + output = PyBytes_AS_STRING(self->buf) + self->pos; self->pos += size; return PyBytes_FromStringAndSize(output, size); @@ -445,7 +492,7 @@ len = 0; } - memcpy(raw_buffer, self->buf + self->pos, len); + memcpy(raw_buffer, PyBytes_AS_STRING(self->buf) + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); self->pos += len; @@ -645,10 +692,7 @@ static PyObject * bytesio_close(bytesio *self) { - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_RETURN_NONE; } @@ -776,10 +820,7 @@ "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -799,7 +840,7 @@ /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (char *)PyMem_Malloc(0); + self->buf = PyBytes_FromStringAndSize(NULL, 0); if (self->buf == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -823,12 +864,20 @@ self->pos = 0; if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; + if (PyBytes_CheckExact(initvalue) && self->exports == 0) { + Py_INCREF(initvalue); + Py_XDECREF(self->buf); + self->buf = initvalue; + self->string_size = PyBytes_GET_SIZE(initvalue); + } + else { + PyObject *res; + res = bytesio_write(self, initvalue); + if (res == NULL) + return -1; + Py_DECREF(res); + self->pos = 0; + } } return 0; @@ -939,11 +988,16 @@ { int ret; bytesio *b = (bytesio *) obj->source; + if (Py_REFCNT(b->buf) > 1) { + if (unshare_buffer(b, b->string_size) < 0) + return -1; + } if (view == NULL) { b->exports++; return 0; } - ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, + ret = PyBuffer_FillInfo(view, (PyObject*)obj, + PyBytes_AS_STRING(b->buf), b->string_size, 0, flags); if (ret >= 0) { b->exports++;