diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -11,6 +11,11 @@ typedef struct { PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; + Py_buffer initvalue; + /** If nonzero, `buf' is a read-only reference to a shared buffer owned by + * the object referenced by `initvalue'. It must be copied prior to + * mutation, and released during finalization */ + int shared; } bytesio; typedef struct { @@ -33,6 +38,106 @@ typedef struct { return NULL; \ } +/* Ensure we have a buffer suitable for writing, in the case that an initvalue + * object was provided, and we're currently borrowing its buffer. + * `preferred_size' indicates the total reserved buffer size allocated as part + * of unsharing, to avoid a redundant reallocation caused by any subsequent + * mutation. `truncate' indicates whether truncation should occur if + * preferred_sizeshared) { + Py_ssize_t copy_size; + char *new_buf; + + if((! truncate) && preferred_size < self->string_size) { + preferred_size = self->string_size; + } + + new_buf = (char *)PyMem_Malloc(preferred_size); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + + copy_size = self->string_size; + if (copy_size > preferred_size) { + copy_size = preferred_size; + } + + memcpy(new_buf, self->buf, copy_size); + PyBuffer_Release(&self->initvalue); + self->shared = 0; + self->buf = new_buf; + self->buf_size = preferred_size; + self->string_size = (Py_ssize_t) copy_size; + } + return 0; +} + +/* Reset the BytesIO by releasing or freeing any existing buffer, and returning + * it to the initial closed state. */ +static void +reset(bytesio *self) +{ + if (self->shared) { + PyBuffer_Release(&self->initvalue); + self->shared = 0; + } else if (self->buf != NULL) { + PyMem_Free(self->buf); + } + self->buf = NULL; + self->string_size = 0; + self->pos = 0; +} + +/* Internal version of BytesIO.__init__; resets the object to its initial + * (closed) state before repopulating it, optionally by sharing a buffer + * exported by `initvalue'. Returns 0 on success, or sets an exception and + * returns -1 on failure. */ +static int +reinit(bytesio *self, PyObject *initvalue) +{ + reset(self); + + if (initvalue && initvalue != Py_None) { + if (PyObject_GetBuffer(initvalue, &self->initvalue, + PyBUF_CONTIG_RO) < 0) { + return -1; + } + self->buf = self->initvalue.buf; + self->buf_size = (size_t)self->initvalue.len; + self->string_size = self->initvalue.len; + self->shared = 1; + + if (! self->initvalue.readonly) { + /* We asked for a read-only buffer, but the object provided a + * writable one, so we must unshare immediately to avoid potential + * corruption of BytesIO state later due to mutations to the source + * object. */ + if (unshare(self, 0, 0) < 0) { + reset(self); + return -1; + } + } + } + + /* If no initvalue provided, prepare a private buffer now. */ + if (self->buf == NULL) { + self->buf = (char *)PyMem_Malloc(0); + if (self->buf == NULL) { + PyErr_NoMemory(); + return -1; + } + } + + return 0; +} /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the @@ -125,11 +230,18 @@ resize_buffer(bytesio *self, size_t size static Py_ssize_t write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) { + size_t desired; + assert(self->buf != NULL); assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { + desired = (size_t)self->pos + len; + if (unshare(self, desired, 0) < 0) { + return -1; + } + + if (desired > self->buf_size) { if (resize_buffer(self, (size_t)self->pos + len) < 0) return -1; } @@ -212,6 +324,10 @@ bytesio_getbuffer(bytesio *self) CHECK_CLOSED(self); + if (unshare(self, 0, 0) < 0) { + return NULL; + } + buf = (bytesiobuf *) type->tp_alloc(type, 0); if (buf == NULL) return NULL; @@ -502,6 +618,10 @@ bytesio_truncate(bytesio *self, PyObject return NULL; } + if (unshare(self, size, 1) < 0) { + return NULL; + } + if (size < self->string_size) { self->string_size = size; if (resize_buffer(self, size) < 0) @@ -655,10 +775,7 @@ PyDoc_STRVAR(close_doc, static PyObject * bytesio_close(bytesio *self) { - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + reset(self); Py_RETURN_NONE; } @@ -706,11 +823,11 @@ bytesio_getstate(bytesio *self) static PyObject * bytesio_setstate(bytesio *self, PyObject *state) { - PyObject *result; PyObject *position_obj; PyObject *dict; Py_ssize_t pos; + CHECK_EXPORTS(self); assert(state != NULL); /* We allow the state tuple to be longer than 3, because we may need @@ -722,18 +839,13 @@ bytesio_setstate(bytesio *self, PyObject Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); return NULL; } - CHECK_EXPORTS(self); - /* Reset the object to its default state. This is only needed to handle - the case of repeated calls to __setstate__. */ - self->string_size = 0; - self->pos = 0; - /* Set the value of the internal buffer. If state[0] does not support the - buffer protocol, bytesio_write will raise the appropriate TypeError. */ - result = bytesio_write(self, PyTuple_GET_ITEM(state, 0)); - if (result == NULL) + /* Reset the object to its default state and set the value of the internal + * buffer. If state[0] does not support the buffer protocol, reinit() will + * raise the appropriate TypeError. */ + if (reinit(self, PyTuple_GET_ITEM(state, 0)) < 0) { return NULL; - Py_DECREF(result); + } /* Set carefully the position value. Alternatively, we could use the seek method instead of modifying self->pos directly to better protect the @@ -788,10 +900,9 @@ bytesio_dealloc(bytesio *self) "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + + reset(self); + Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -809,13 +920,8 @@ bytesio_new(PyTypeObject *type, PyObject return NULL; /* tp_alloc initializes all the fields to zero. So we don't have to - initialize them here. */ - - self->buf = (char *)PyMem_Malloc(0); - if (self->buf == NULL) { - Py_DECREF(self); - return PyErr_NoMemory(); - } + initialize them here. Since buf==NULL is how BytesIO knows that it is + closed, a valid closed BytesIO instance is returned by this function. */ return (PyObject *)self; } @@ -830,20 +936,7 @@ bytesio_init(bytesio *self, PyObject *ar &initvalue)) return -1; - /* In case, __init__ is called multiple times. */ - self->string_size = 0; - self->pos = 0; - - if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; - } - - return 0; + return reinit(self, initvalue); } static PyObject *