diff -r 89873f3b18fd Include/sysmodule.h --- a/Include/sysmodule.h Wed Nov 06 20:25:50 2013 -0800 +++ b/Include/sysmodule.h Thu Nov 07 19:28:33 2013 +0200 @@ -31,6 +31,10 @@ PyAPI_FUNC(void) PySys_AddXOption(const wchar_t *); PyAPI_FUNC(PyObject *) PySys_GetXOptions(void); +#ifndef Py_LIMITED_API +PyAPI_DATA(size_t) _PySys_GetSizeOf(PyObject *); +#endif + #ifdef __cplusplus } #endif diff -r 89873f3b18fd Lib/test/test_memoryio.py --- a/Lib/test/test_memoryio.py Wed Nov 06 20:25:50 2013 -0800 +++ b/Lib/test/test_memoryio.py Thu Nov 07 19:28:33 2013 +0200 @@ -8,6 +8,7 @@ import io import _pyio as pyio +import sys import pickle class MemorySeekTestMixin: @@ -658,12 +659,11 @@ @support.cpython_only def test_sizeof(self): - basesize = support.calcobjsize('P2nN2Pn') + basesize = support.calcobjsize('P2n2Pn') check = self.check_sizeof self.assertEqual(object.__sizeof__(io.BytesIO()), basesize) - check(io.BytesIO(), basesize ) - check(io.BytesIO(b'a'), basesize + 1 + 1 ) - check(io.BytesIO(b'a' * 1000), basesize + 1000 + 1 ) + check(io.BytesIO(), basesize) + check(io.BytesIO(b'a' * 1000), basesize + sys.getsizeof(b'a' * 1000)) class CStringIOTest(PyStringIOTest): ioclass = io.StringIO diff -r 89873f3b18fd Modules/_io/bytesio.c --- a/Modules/_io/bytesio.c Wed Nov 06 20:25:50 2013 -0800 +++ b/Modules/_io/bytesio.c Thu Nov 07 19:28:33 2013 +0200 @@ -4,10 +4,9 @@ typedef struct { PyObject_HEAD - char *buf; + PyObject *buf; Py_ssize_t pos; Py_ssize_t string_size; - size_t buf_size; PyObject *dict; PyObject *weakreflist; Py_ssize_t exports; @@ -18,6 +17,12 @@ bytesio *source; } bytesiobuf; +/* The bytesio object can be in three states: + * Py_REFCNT(buf) == 1, exports == 0. + * Py_REFCNT(buf) > 1. exports == 0, string_size == PyBytes_GET_SIZE(buf), + first modification or export causes the internal buffer copying. + * exports > 0. Py_REFCNT(buf) == 1, any modifications are forbidden. +*/ #define CHECK_CLOSED(self) \ if ((self)->buf == NULL) { \ @@ -33,39 +38,58 @@ return NULL; \ } +#define SHARED_BUF(self) (Py_REFCNT(self->buf) > 1) + /* Internal routine to get a line from the buffer of a BytesIO object. Returns the length between the current position to the next newline character. */ static Py_ssize_t -get_line(bytesio *self, char **output) +scan_eol(bytesio *self, Py_ssize_t len) { - char *n; - const char *str_end; - Py_ssize_t len; + const char *start, *n; + Py_ssize_t maxlen; assert(self->buf != NULL); /* Move to the end of the line, up to the end of the string, s. */ - str_end = self->buf + self->string_size; - for (n = self->buf + self->pos; - n < str_end && *n != '\n'; - n++); + start = PyBytes_AS_STRING(self->buf) + self->pos; + maxlen = self->string_size - self->pos; + if (len < 0 || len > maxlen) + len = maxlen; - /* Skip the newline character */ - if (n < str_end) - n++; - - /* Get the length from the current position to the end of the line. */ - len = n - (self->buf + self->pos); - *output = self->buf + self->pos; - + if (len) { + n = memchr(start, '\n', len); + if (n) + /* Get the length from the current position to the end of + the line. */ + len = n - start + 1; + } assert(len >= 0); assert(self->pos < PY_SSIZE_T_MAX - len); - self->pos += len; return len; } +/* Internal routine for detaching the shared buffer of BytesIO objects. + The caller should ensure that the 'size' argument is non-negative and + not lesser than self->string_size. Returns 0 on success, -1 otherwise. */ +static int +unshare_buffer(bytesio *self, size_t size) +{ + PyObject *new_buf, *old_buf; + assert(SHARED_BUF(self)); + assert(self->exports == 0); + assert(size >= self->string_size); + new_buf = PyBytes_FromStringAndSize(NULL, size); + if (new_buf == NULL) + return -1; + memcpy(PyBytes_AS_STRING(new_buf), PyBytes_AS_STRING(self->buf), + self->string_size); + old_buf = self->buf; + self->buf = new_buf; + Py_DECREF(old_buf); + return 0; +} /* Internal routine for changing the size of the buffer of BytesIO objects. The caller should ensure that the 'size' argument is non-negative. Returns @@ -75,8 +99,7 @@ { /* Here, unsigned types are used to avoid dealing with signed integer overflow, which is undefined in C. */ - size_t alloc = self->buf_size; - char *new_buf = NULL; + size_t alloc = PyBytes_GET_SIZE(self->buf); assert(self->buf != NULL); @@ -87,30 +110,32 @@ if (size < alloc / 2) { /* Major downsize; resize down to exact size. */ - alloc = size + 1; + alloc = size; } else if (size < alloc) { /* Within allocated size; quick exit */ return 0; } - else if (size <= alloc * 1.125) { + else if (size <= alloc + (alloc / 2)) { /* Moderate upsize; overallocate similar to list_resize() */ - alloc = size + (size >> 3) + (size < 9 ? 3 : 6); + alloc = size + (size / 2) + (size < 9 ? 3 : 6); } else { /* Major upsize; resize up to exact size */ - alloc = size + 1; + alloc = size; } if (alloc > ((size_t)-1) / sizeof(char)) goto overflow; - new_buf = (char *)PyMem_Realloc(self->buf, alloc * sizeof(char)); - if (new_buf == NULL) { - PyErr_NoMemory(); - return -1; + + if (SHARED_BUF(self)) { + if (unshare_buffer(self, alloc) < 0) + return -1; } - self->buf_size = alloc; - self->buf = new_buf; + else { + if (_PyBytes_Resize(&self->buf, alloc) < 0) + return -1; + } return 0; @@ -129,10 +154,14 @@ assert(self->pos >= 0); assert(len >= 0); - if ((size_t)self->pos + len > self->buf_size) { + if ((size_t)self->pos + len > PyBytes_GET_SIZE(self->buf)) { if (resize_buffer(self, (size_t)self->pos + len) < 0) return -1; } + else if (SHARED_BUF(self)) { + if (unshare_buffer(self, self->string_size) < 0) + return -1; + } if (self->pos > self->string_size) { /* In case of overseek, pad with null bytes the buffer region between @@ -143,13 +172,13 @@ | | <--to pad-->|<---to write---> | 0 buf position */ - memset(self->buf + self->string_size, '\0', + memset(PyBytes_AS_STRING(self->buf) + self->string_size, '\0', (self->pos - self->string_size) * sizeof(char)); } /* Copy the data to the internal buffer, overwriting some of the existing data if self->pos < self->string_size. */ - memcpy(self->buf + self->pos, bytes, len); + memcpy(PyBytes_AS_STRING(self->buf) + self->pos, bytes, len); self->pos += len; /* Set the new length of the internal string if it has changed. */ @@ -231,7 +260,22 @@ bytesio_getvalue(bytesio *self) { CHECK_CLOSED(self); - return PyBytes_FromStringAndSize(self->buf, self->string_size); + if (self->string_size <= 1 || self->exports > 0) + return PyBytes_FromStringAndSize(PyBytes_AS_STRING(self->buf), + self->string_size); + + if (self->string_size != PyBytes_GET_SIZE(self->buf)) { + if (SHARED_BUF(self)) { + if (unshare_buffer(self, self->string_size) < 0) + return NULL; + } + else { + if (_PyBytes_Resize(&self->buf, self->string_size) < 0) + return NULL; + } + } + Py_INCREF(self->buf); + return self->buf; } PyDoc_STRVAR(isatty_doc, @@ -299,7 +343,15 @@ } assert(self->buf != NULL); - output = self->buf + self->pos; + if (size > 1 && + self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) && + self->exports == 0) { + self->pos += size; + Py_INCREF(self->buf); + return self->buf; + } + + output = PyBytes_AS_STRING(self->buf) + self->pos; self->pos += size; return PyBytes_FromStringAndSize(output, size); @@ -359,14 +411,18 @@ return NULL; } - n = get_line(self, &output); + n = scan_eol(self, size); - if (size >= 0 && size < n) { - size = n - size; - n -= size; - self->pos -= size; + if (n > 1 && + self->pos == n && n == PyBytes_GET_SIZE(self->buf) && + self->exports == 0) { + self->pos += n; + Py_INCREF(self->buf); + return self->buf; } + output = PyBytes_AS_STRING(self->buf) + self->pos; + self->pos += n; return PyBytes_FromStringAndSize(output, n); } @@ -410,7 +466,9 @@ if (!result) return NULL; - while ((n = get_line(self, &output)) != 0) { + output = PyBytes_AS_STRING(self->buf) + self->pos; + while ((n = scan_eol(self, -1)) != 0) { + self->pos += n; line = PyBytes_FromStringAndSize(output, n); if (!line) goto on_error; @@ -422,6 +480,7 @@ size += n; if (maxsize > 0 && size >= maxsize) break; + output += n; } return result; @@ -455,7 +514,7 @@ len = 0; } - memcpy(raw_buffer, self->buf + self->pos, len); + memcpy(raw_buffer, PyBytes_AS_STRING(self->buf) + self->pos, len); assert(self->pos + len < PY_SSIZE_T_MAX); assert(len >= 0); self->pos += len; @@ -514,16 +573,26 @@ static PyObject * bytesio_iternext(bytesio *self) { - char *next; + const char *next; Py_ssize_t n; CHECK_CLOSED(self); - n = get_line(self, &next); + n = scan_eol(self, -1); - if (!next || n == 0) + if (n == 0) return NULL; + if (n > 1 && + self->pos == 0 && n == PyBytes_GET_SIZE(self->buf) && + self->exports == 0) { + self->pos += n; + Py_INCREF(self->buf); + return self->buf; + } + + next = PyBytes_AS_STRING(self->buf) + self->pos; + self->pos += n; return PyBytes_FromStringAndSize(next, n); } @@ -655,10 +724,7 @@ static PyObject * bytesio_close(bytesio *self) { - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_RETURN_NONE; } @@ -788,10 +854,7 @@ "deallocated BytesIO object has exported buffers"); PyErr_Print(); } - if (self->buf != NULL) { - PyMem_Free(self->buf); - self->buf = NULL; - } + Py_CLEAR(self->buf); Py_CLEAR(self->dict); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -811,7 +874,7 @@ /* tp_alloc initializes all the fields to zero. So we don't have to initialize them here. */ - self->buf = (char *)PyMem_Malloc(0); + self->buf = PyBytes_FromStringAndSize(NULL, 0); if (self->buf == NULL) { Py_DECREF(self); return PyErr_NoMemory(); @@ -834,13 +897,26 @@ self->string_size = 0; self->pos = 0; + if (self->exports > 0) { + PyErr_SetString(PyExc_BufferError, + "Existing exports of data: object cannot be re-sized"); + return -1; + } if (initvalue && initvalue != Py_None) { - PyObject *res; - res = bytesio_write(self, initvalue); - if (res == NULL) - return -1; - Py_DECREF(res); - self->pos = 0; + if (PyBytes_CheckExact(initvalue)) { + Py_INCREF(initvalue); + Py_XDECREF(self->buf); + self->buf = initvalue; + self->string_size = PyBytes_GET_SIZE(initvalue); + } + else { + PyObject *res; + res = bytesio_write(self, initvalue); + if (res == NULL) + return -1; + Py_DECREF(res); + self->pos = 0; + } } return 0; @@ -852,8 +928,8 @@ Py_ssize_t res; res = sizeof(bytesio); - if (self->buf) - res += self->buf_size; + if (self->buf && !SHARED_BUF(self)) + res += _PySys_GetSizeOf(self->buf); return PyLong_FromSsize_t(res); } @@ -963,11 +1039,16 @@ { int ret; bytesio *b = (bytesio *) obj->source; + if (SHARED_BUF(b)) { + if (unshare_buffer(b, b->string_size) < 0) + return -1; + } if (view == NULL) { b->exports++; return 0; } - ret = PyBuffer_FillInfo(view, (PyObject*)obj, b->buf, b->string_size, + ret = PyBuffer_FillInfo(view, (PyObject*)obj, + PyBytes_AS_STRING(b->buf), b->string_size, 0, flags); if (ret >= 0) { b->exports++; diff -r 89873f3b18fd Python/sysmodule.c --- a/Python/sysmodule.c Wed Nov 06 20:25:50 2013 -0800 +++ b/Python/sysmodule.c Thu Nov 07 19:28:33 2013 +0200 @@ -846,30 +846,17 @@ } #endif /* USE_MALLOPT */ -static PyObject * -sys_getsizeof(PyObject *self, PyObject *args, PyObject *kwds) +size_t +_PySys_GetSizeOf(PyObject *o) { PyObject *res = NULL; - static PyObject *gc_head_size = NULL; - static char *kwlist[] = {"object", "default", 0}; - PyObject *o, *dflt = NULL; PyObject *method; + size_t size; _Py_IDENTIFIER(__sizeof__); - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:getsizeof", - kwlist, &o, &dflt)) - return NULL; - - /* Initialize static variable for GC head size */ - if (gc_head_size == NULL) { - gc_head_size = PyLong_FromSsize_t(sizeof(PyGC_Head)); - if (gc_head_size == NULL) - return NULL; - } - /* Make sure the type is initialized. float gets initialized late */ if (PyType_Ready(Py_TYPE(o)) < 0) - return NULL; + return (size_t)-1; method = _PyObject_LookupSpecial(o, &PyId___sizeof__); if (method == NULL) { @@ -883,24 +870,45 @@ Py_DECREF(method); } - /* Has a default value been given */ - if ((res == NULL) && (dflt != NULL) && - PyErr_ExceptionMatches(PyExc_TypeError)) - { - PyErr_Clear(); - Py_INCREF(dflt); - return dflt; - } - else if (res == NULL) - return res; + if (res == NULL) + return (size_t)-1; + + size = PyLong_AsSize_t(res); + Py_DECREF(res); + if (size == (size_t)-1 && PyErr_Occurred()) + return (size_t)-1; /* add gc_head size */ - if (PyObject_IS_GC(o)) { - PyObject *tmp = res; - res = PyNumber_Add(tmp, gc_head_size); - Py_DECREF(tmp); + if (PyObject_IS_GC(o)) + size += sizeof(PyGC_Head); + return size; +} + +static PyObject * +sys_getsizeof(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"object", "default", 0}; + size_t size; + PyObject *o, *dflt = NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:getsizeof", + kwlist, &o, &dflt)) + return NULL; + + size = _PySys_GetSizeOf(o); + + if (size == (size_t)-1 && PyErr_Occurred()) { + /* Has a default value been given */ + if (dflt != NULL && PyErr_ExceptionMatches(PyExc_TypeError)) { + PyErr_Clear(); + Py_INCREF(dflt); + return dflt; + } + else + return NULL; } - return res; + + return PyLong_FromSize_t(size); } PyDoc_STRVAR(getsizeof_doc,