diff -r 3983ca1a9897 Include/cStringIO.h --- a/Include/cStringIO.h Mon Jan 28 11:00:58 2013 +0200 +++ b/Include/cStringIO.h Mon Jan 28 13:09:30 2013 +0200 @@ -32,22 +32,22 @@ /* Read a string from an input object. If the last argument is -1, the remainder will be read. */ - int(*cread)(PyObject *, char **, Py_ssize_t); + Py_ssize_t(*cread)(PyObject *, char **, Py_ssize_t); /* Read a line from an input object. Returns the length of the read - line as an int and a pointer inside the object buffer as char** (so + line as a Py_ssize_t and a pointer inside the object buffer as char** (so the caller doesn't have to provide its own buffer as destination). */ - int(*creadline)(PyObject *, char **); + Py_ssize_t(*creadline)(PyObject *, char **); /* Write a string to an output object*/ - int(*cwrite)(PyObject *, const char *, Py_ssize_t); + Py_ssize_t(*cwrite)(PyObject *, const char *, Py_ssize_t); /* Get the output object as a Python string (returns new reference). */ PyObject *(*cgetvalue)(PyObject *); /* Create a new output object */ - PyObject *(*NewOutput)(int); + PyObject *(*NewOutput)(Py_ssize_t); /* Create an input object from a Python string (copies the Python string reference). diff -r 3983ca1a9897 Lib/test/test_StringIO.py --- a/Lib/test/test_StringIO.py Mon Jan 28 11:00:58 2013 +0200 +++ b/Lib/test/test_StringIO.py Mon Jan 28 13:09:30 2013 +0200 @@ -5,6 +5,7 @@ import cStringIO import types import array +import sys from test import test_support @@ -105,6 +106,24 @@ self._fp.close() self.assertRaises(ValueError, self._fp.getvalue) + # In worst case cStringIO requires 2 + 1 + 1/2 + 1/2**2 + ... = 4 + # bytes per input character. + @test_support.bigmemtest(test_support._2G, memuse=4) + def test_writes_large(self, size): + s = 'x' * 2**26 # 64 MiB + f = self.MODULE.StringIO() + n = size + while n > len(s): + f.write(s) + n -= len(s) + s = None + if n: + s = 'x' * n + f.write(s) + s = None + self.assertEqual(len(f.getvalue()), size) + + class TestStringIO(TestGenericStringIO): MODULE = StringIO diff -r 3983ca1a9897 Modules/cStringIO.c --- a/Modules/cStringIO.c Mon Jan 28 11:00:58 2013 +0200 +++ b/Modules/cStringIO.c Mon Jan 28 13:09:30 2013 +0200 @@ -1,4 +1,5 @@ +#define PY_SSIZE_T_CLEAN #include "Python.h" #include "import.h" #include "cStringIO.h" @@ -158,7 +159,7 @@ PyDoc_STRVAR(IO_read__doc__, "read([s]) -- Read s characters, or the rest of the string"); -static int +static Py_ssize_t IO_cread(PyObject *self, char **output, Py_ssize_t n) { Py_ssize_t l; @@ -190,7 +191,7 @@ PyDoc_STRVAR(IO_readline__doc__, "readline() -- Read one line"); -static int +static Py_ssize_t IO_creadline(PyObject *self, char **output) { char *n, *s; Py_ssize_t l; @@ -211,16 +212,16 @@ assert(IOOOBJECT(self)->string_size >= 0); ((IOobject*)self)->pos += l; - return (int)l; + return l; } static PyObject * IO_readline(IOobject *self, PyObject *args) { - int n, m=-1; + Py_ssize_t n, m=-1; char *output; if (args) - if (!PyArg_ParseTuple(args, "|i:readline", &m)) return NULL; + if (!PyArg_ParseTuple(args, "|n:readline", &m)) return NULL; if( (n=IO_creadline((PyObject*)self,&output)) < 0) return NULL; if (m >= 0 && m < n) { @@ -236,12 +237,12 @@ static PyObject * IO_readlines(IOobject *self, PyObject *args) { - int n; + Py_ssize_t n; char *output; PyObject *result, *line; - int hint = 0, length = 0; + Py_ssize_t hint = 0, length = 0; - if (!PyArg_ParseTuple(args, "|i:readlines", &hint)) return NULL; + if (!PyArg_ParseTuple(args, "|n:readlines", &hint)) return NULL; result = PyList_New(0); if (!result) @@ -376,32 +377,37 @@ "\n\nNote (hack:) writing None resets the buffer"); -static int -O_cwrite(PyObject *self, const char *c, Py_ssize_t l) { - Py_ssize_t newl; +static Py_ssize_t +O_cwrite(PyObject *self, const char *c, Py_ssize_t len) { + Py_ssize_t newpos; Oobject *oself; char *newbuf; if (!IO__opencheck(IOOOBJECT(self))) return -1; oself = (Oobject *)self; - newl = oself->pos+l; - if (newl >= oself->buf_size) { - oself->buf_size *= 2; - if (oself->buf_size <= newl) { - assert(newl + 1 < INT_MAX); - oself->buf_size = (int)(newl+1); + assert(len >= 0); + if (oself->pos >= PY_SSIZE_T_MAX - len) { + PyErr_SetString(PyExc_OverflowError, + "new position too large"); + return -1; + } + newpos = oself->pos + len; + if (newpos >= oself->buf_size) { + size_t newsize = oself->buf_size; + newsize *= 2; + if (newsize <= (size_t)newpos || newsize > PY_SSIZE_T_MAX) { + assert(newpos < PY_SSIZE_T_MAX - 1); + newsize = newpos + 1; } - newbuf = (char*)realloc(oself->buf, oself->buf_size); + newbuf = (char*)realloc(oself->buf, newsize); if (!newbuf) { PyErr_SetString(PyExc_MemoryError,"out of memory"); - free(oself->buf); - oself->buf = 0; - oself->buf_size = oself->pos = 0; return -1; - } + } + oself->buf_size = (Py_ssize_t)newsize; oself->buf = newbuf; - } + } if (oself->string_size < oself->pos) { /* In case of overseek, pad with null bytes the buffer region between @@ -416,22 +422,21 @@ (oself->pos - oself->string_size) * sizeof(char)); } - memcpy(oself->buf+oself->pos,c,l); + memcpy(oself->buf + oself->pos, c, len); - assert(oself->pos + l < INT_MAX); - oself->pos += (int)l; + oself->pos = newpos; if (oself->string_size < oself->pos) { oself->string_size = oself->pos; } - return (int)l; + return len; } static PyObject * O_write(Oobject *self, PyObject *args) { char *c; - int l; + Py_ssize_t l; if (!PyArg_ParseTuple(args, "t#:write", &c, &l)) return NULL; @@ -561,7 +566,7 @@ }; static PyObject * -newOobject(int size) { +newOobject(Py_ssize_t size) { Oobject *self; self = PyObject_New(Oobject, &Otype);