diff --git a/Lib/test/test_memoryio.py b/Lib/test/test_memoryio.py --- a/Lib/test/test_memoryio.py +++ b/Lib/test/test_memoryio.py @@ -9,6 +9,7 @@ from test import support import io import _pyio as pyio import pickle +import sys class MemorySeekTestMixin: @@ -677,6 +678,9 @@ class PyStringIOPickleTest(TextIOTestMix pass +skip_getrefcount = unittest.skipUnless(hasattr(sys, 'getrefcount'), + 'test needs sys.getrefcount()') + class CBytesIOTest(PyBytesIOTest): ioclass = io.BytesIO UnsupportedOperation = io.UnsupportedOperation @@ -711,12 +715,60 @@ class CBytesIOTest(PyBytesIOTest): @support.cpython_only def test_sizeof(self): - basesize = support.calcobjsize('P2nN2Pn') + basesize = support.calcobjsize('P2nN2Pn2P2n2i5Pi') check = self.check_sizeof self.assertEqual(object.__sizeof__(io.BytesIO()), basesize) check(io.BytesIO(), basesize ) - check(io.BytesIO(b'a'), basesize + 1 + 1 ) - check(io.BytesIO(b'a' * 1000), basesize + 1000 + 1 ) + check(io.BytesIO(b'a'), basesize + 1 ) + check(io.BytesIO(b'a' * 1000), basesize + 1000) + + # Various tests of copy-on-write behaviour for BytesIO. + + def _test_cow_mutation(self, mutation): + # Common code for all BytesIO copy-on-write mutation tests. + imm = b' ' * 1024 + old_rc = sys.getrefcount(imm) + memio = self.ioclass(imm) + assert sys.getrefcount(imm) == (old_rc + 1) + mutation(memio) + assert sys.getrefcount(imm) == old_rc + + @skip_getrefcount + def test_cow_truncate(self): + # Ensure truncate causes a copy. + def mutation(memio): + memio.truncate(1) + self._test_cow_mutation(mutation) + + @skip_getrefcount + def test_cow_write(self): + # Ensure write that would not cause a resize still results in a copy. + def mutation(memio): + memio.seek(0) + memio.write(b'foo') + self._test_cow_mutation(mutation) + + @skip_getrefcount + def test_cow_setstate(self): + # __setstate__ should cause buffer to be released. + memio = self.ioclass(b'foooooo') + state = memio.__getstate__() + def mutation(memio): + memio.__setstate__(state) + self._test_cow_mutation(mutation) + + @skip_getrefcount + def test_cow_mutable(self): + # BytesIO should ignore any buffer-supplying object that offers a + # writable buffer, such as bytearray(). + ba = bytearray(1024) + old_rc = sys.getrefcount(ba) + memio = self.ioclass(ba) + assert sys.getrefcount(ba) == old_rc + + # Mutation does not affect buffer. + ba[0] = 123 + assert memio.getvalue() == (b'\x00' * len(ba)) class CStringIOTest(PyStringIOTest): ioclass = io.StringIO diff --git a/Modules/_io/bytesio.c b/Modules/_io/bytesio.c --- a/Modules/_io/bytesio.c +++ b/Modules/_io/bytesio.c @@ -11,6 +11,11 @@ typedef struct { PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; + Py_buffer initvalue; + /** If nonzero, `buf' is a read-only reference to a shared buffer owned by + * the object referenced by `initvalue'. It must be copied prior to + * mutation, and released during finalization */ + int shared; } bytesio; typedef struct { @@ -33,6 +38,47 @@ typedef struct { return NULL; \ } +/* Ensure we have a buffer suitable for writing, in the case that an initvalue + * object was provided, and we're currently borrowing its buffer. + * `preferred_size' indicates the total reserved buffer size allocated as part + * of unsharing, to avoid a redundant reallocation caused by any subsequent + * mutation. `truncate' indicates whether truncation should occur if + * preferred_sizeshared) { + Py_ssize_t copy_size; + char *new_buf; + + if((! truncate) && preferred_size < self->string_size) { + preferred_size = self->string_size; + } + + new_buf = (char *)PyMem_Malloc(preferred_size); + if (new_buf == NULL) { + PyErr_NoMemory(); + return -1; + } + + copy_size = self->string_size; + if (copy_size > preferred_size) { + copy_size = preferred_size; + } + + memcpy(new_buf, self->buf, copy_size); + PyBuffer_Release(&self->initvalue); + self->shared = 0; + self->buf = new_buf; + self->buf_size = preferred_size; + self->string_size = (Py_ssize_t) copy_size; + } + return 0; +} /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the @@ -125,11 +171,18 @@ resize_buffer(bytesio *self, size_t size static Py_ssize_t write_bytes(bytesio *self, const char *bytes, Py_ssize_t len) { + size_t desired; + assert(self->buf != NULL); assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { + desired = (size_t)self->pos + len; + if (unshare(self, desired, 0) < 0) { + return -1; + } + + if (desired > self->buf_size) { if (resize_buffer(self, (size_t)self->pos + len) < 0) return -1; } @@ -160,6 +213,84 @@ write_bytes(bytesio *self, const char *b return len; } +/* Reset the BytesIO by releasing or freeing any existing buffer, and returning + * it to the initial closed state. */ +static void +reset(bytesio *self) +{ + if (self->shared) { + PyBuffer_Release(&self->initvalue); + self->shared = 0; + } else if (self->buf != NULL) { + PyMem_Free(self->buf); + } + self->buf = NULL; + self->string_size = 0; + self->pos = 0; +} + +static int +reinit_private(bytesio *self, Py_ssize_t initial_size) +{ + reset(self); + self->buf = (char *)PyMem_Malloc(initial_size); + if (self->buf == NULL) { + PyErr_NoMemory(); + return -1; + } + self->buf_size = initial_size; + return 0; +} + +/* Internal version of BytesIO.__init__; resets the object to its initial + * (closed) state before repopulating it, optionally by sharing a buffer + * exported by `initvalue'. Returns 0 on success, or sets an exception and + * returns -1 on failure. */ +static int +reinit(bytesio *self, PyObject *initvalue) +{ + Py_buffer buf; + + /* If no initvalue provided, prepare a private buffer now. */ + if(initvalue == NULL || initvalue == Py_None) { + return reinit_private(self, 0); + } + + if (PyObject_GetBuffer(initvalue, &buf, PyBUF_WRITABLE) >= 0) { + /* We want a read-only buffer, but the object is willing to supply a + * writable one, so we must copy immediately to avoid potential + * corruption of BytesIO state later due to mutations to the source + * object, which appears to be mutable. */ + int rc = reinit_private(self, buf.len); + if (rc != -1 && (write_bytes(self, buf.buf, buf.len) < 0)) { + reset(self); + rc = -1; + } + self->pos = 0; + PyBuffer_Release(&buf); + return rc; + } + + /* Objects that refuse to provide a writable buffer trigger an + * exception. */ + PyErr_Clear(); + + /* Must delay overwriting self->initvalue until we're sure we can update + * object state. Otherwise we potentially mutate before throwing an + * exception. */ + if (PyObject_GetBuffer(initvalue, &buf, PyBUF_CONTIG_RO) < 0) { + return -1; + } + + reset(self); + self->buf = buf.buf; + self->buf_size = (size_t)buf.len; + self->string_size = buf.len; + self->initvalue = buf; + self->shared = 1; + return 0; +} + static PyObject * bytesio_get_closed(bytesio *self) { @@ -212,6 +343,10 @@ bytesio_getbuffer(bytesio *self) CHECK_CLOSED(self); + if (unshare(self, 0, 0) < 0) { + return NULL; + } + buf = (bytesiobuf *) type->tp_alloc(type, 0); if (buf == NULL) return NULL; @@ -502,6 +637,10 @@ bytesio_truncate(bytesio *self, PyObject return NULL; } + if (unshare(self, size, 1) < 0) { + return NULL; + } + if (size < self->string_size) { self->string_size = size; if (resize_buffer(self, size) < 0) @@ -655,10 +794,7 @@ PyDoc_STRVAR(close_doc, static PyObject * bytesio_close(bytesio *self) { - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + reset(self); Py_RETURN_NONE; } @@ -706,11 +842,12 @@ bytesio_getstate(bytesio *self) static PyObject * bytesio_setstate(bytesio *self, PyObject *state) { - PyObject *result; PyObject *position_obj; PyObject *dict; Py_ssize_t pos; + CHECK_CLOSED(self); + CHECK_EXPORTS(self); assert(state != NULL); /* We allow the state tuple to be longer than 3, because we may need @@ -722,18 +859,13 @@ bytesio_setstate(bytesio *self, PyObject Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name); return NULL; } - CHECK_EXPORTS(self); - /* Reset the object to its default state. This is only needed to handle - the case of repeated calls to __setstate__. */ - self->string_size = 0; - self->pos = 0; - /* Set the value of the internal buffer. If state[0] does not support the - buffer protocol, bytesio_write will raise the appropriate TypeError. */ - result = bytesio_write(self, PyTuple_GET_ITEM(state, 0)); - if (result == NULL) + /* Reset the object to its default state and set the value of the internal + * buffer. If state[0] does not support the buffer protocol, reinit() will + * raise the appropriate TypeError. */ + if (reinit(self, PyTuple_GET_ITEM(state, 0)) < 0) { return NULL; - Py_DECREF(result); + } /* Set carefully the position value. Alternatively, we could use the seek method instead of modifying self->pos directly to better protect the @@ -788,10 +920,9 @@ bytesio_dealloc(bytesio *self) "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + + reset(self); + Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -811,10 +942,9 @@ bytesio_new(PyTypeObject *type, PyObject /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (char *)PyMem_Malloc(0); - if (self->buf == NULL) { + if (reinit_private(self, 0) < 0) { Py_DECREF(self); - return PyErr_NoMemory(); + return NULL; } return (PyObject *)self; @@ -830,20 +960,7 @@ bytesio_init(bytesio *self, PyObject *ar &initvalue)) return -1; - /* In case, __init__ is called multiple times. */ - self->string_size = 0; - self->pos = 0; - - if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; - } - - return 0; + return reinit(self, initvalue); } static PyObject *