diff --git a/Doc/library/io.rst b/Doc/library/io.rst --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -465,10 +465,11 @@ .. method:: read1(size=-1) - Read and return up to *size* bytes, with at most one call to the underlying - raw stream's :meth:`~RawIOBase.read` method. This can be useful if you - are implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, with at most one call to the + underlying raw stream's :meth:`~RawIOBase.read` (or + :meth:`~RawIOBase.readinto`) method. This can be useful if you + are implementing your own buffering on top of a + :class:`BufferedIOBase` object. .. method:: readinto(b) @@ -481,6 +482,18 @@ A :exc:`BlockingIOError` is raised if the underlying raw stream is in non blocking-mode, and has no data available at the moment. + .. method:: readinto1(b) + + Read up to ``len(b)`` bytes into bytearray *b*, ,using at most + one call to the underlying raw stream's :meth:`~RawIOBase.read` + (or :meth:`~RawIOBase.readinto`) method. Return the number of + bytes read. + + A :exc:`BlockingIOError` is raised if the underlying raw stream is in + non blocking-mode, and has no data available at the moment. + + .. versionadded:: 3.5 + .. method:: write(b) Write the given :class:`bytes` or :class:`bytearray` object, *b* and @@ -596,6 +609,11 @@ In :class:`BytesIO`, this is the same as :meth:`read`. + .. method:: readinto1() + + In :class:`BytesIO`, this is the same as :meth:`readinto`. + + .. versionadded:: 3.5 .. class:: BufferedReader(raw, buffer_size=DEFAULT_BUFFER_SIZE) diff --git a/Lib/_pyio.py b/Lib/_pyio.py --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -6,6 +6,7 @@ import abc import codecs import errno +import array # Import _thread instead of threading to reduce startup cost try: from _thread import allocate_lock as Lock @@ -662,16 +663,33 @@ Raises BlockingIOError if the underlying raw stream has no data at the moment. """ - # XXX This ought to work with anything that supports the buffer API - data = self.read(len(b)) + + return self._readinto(b, read1=False) + + def readinto1(self, b): + """Read up to len(b) bytes into *b*, using at most one system call + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=True) + + def _readinto(self, b, read1): + if not isinstance(b, memoryview): + b = memoryview(b) + b = b.cast('B') + + if read1: + data = self.read1(len(b)) + else: + data = self.read(len(b)) n = len(data) - try: - b[:n] = data - except TypeError as err: - import array - if not isinstance(b, array.array): - raise err - b[:n] = array.array('b', data) + + b[:n] = data + return n def write(self, b): @@ -1065,6 +1083,58 @@ return self._read_unlocked( min(size, len(self._read_buf) - self._read_pos)) + # Implementing readinto() and readinto1() is not strictly necessary (we + # could rely on the base class that provides an implementation in terms of + # read() and read1()). We do it anyway to keep the _pyio implementation + # similar to the io implementation (which implements the methods for + # performance reasons). + def _readinto(self, buf, read1): + """Read data into *buf* with at most one system call.""" + + if len(buf) == 0: + return 0 + + # Need to create a memoryview object of type 'b', otherwise + # we may not be able to assign bytes to it, and slicing it + # would create a new object. + if not isinstance(buf, memoryview): + buf = memoryview(buf) + buf = buf.cast('B') + + written = 0 + with self._read_lock: + while written < len(buf): + + # First try to read from internal buffer + avail = min(len(self._read_buf) - self._read_pos, len(buf)) + if avail: + buf[written:written+avail] = \ + self._read_buf[self._read_pos:self._read_pos+avail] + self._read_pos += avail + written += avail + if written == len(buf): + break + + # If remaining space in callers buffer is larger than + # internal buffer, read directly into callers buffer + if len(buf) - written > self.buffer_size: + n = self.raw.readinto(buf[written:]) + if not n: + break # eof + written += n + + # Otherwise refill internal buffer - unless we're + # in read1 mode and already got some data + elif not (read1 and written): + if not self._peek_unlocked(1): + break # eof + + # In readinto1 mode, return as soon as we have some data + if read1 and written: + break + + return written + def tell(self): return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos @@ -1214,6 +1284,9 @@ def read1(self, size): return self.reader.read1(size) + def readinto1(self, b): + return self.reader.readinto1(b) + def readable(self): return self.reader.readable() @@ -1296,6 +1369,10 @@ self.flush() return BufferedReader.read1(self, size) + def readinto1(self, b): + self.flush() + return BufferedReader.readinto1(self, b) + def write(self, b): if self._read_buf: # Undo readahead diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -943,6 +943,71 @@ self.assertEqual(bufio.readinto(b), 1) self.assertEqual(b, b"cb") + def test_readinto1(self): + buffer_size = 10 + rawio = self.MockRawIO((b"abc", b"de", b"fgh", b"jkl")) + bufio = self.tp(rawio, buffer_size=buffer_size) + b = bytearray(2) + self.assertEqual(bufio.peek(3), b'abc') + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"ab") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 1) + self.assertEqual(b[:1], b"c") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"de") + self.assertEqual(rawio._reads, 2) + b = bytearray(2*buffer_size) + self.assertEqual(bufio.peek(3), b'fgh') + self.assertEqual(rawio._reads, 3) + self.assertEqual(bufio.readinto1(b), 6) + self.assertEqual(b[:6], b"fghjkl") + self.assertEqual(rawio._reads, 4) + + def test_readinto_array(self): + buffer_size = 60 + data = b"a" * 26 + rawio = self.MockRawIO((data,)) + bufio = self.tp(rawio, buffer_size=buffer_size) + + # Create an array with element size > 1 byte + b = array.array('i', b'x' * 32) + assert len(b) != 16 + + # Read into it. We should get as many *bytes* as we can fit into b + # (which is more than the number of elements) + n = bufio.readinto(b) + self.assertGreater(n, len(b)) + + # Check that old contents of b are preserved + bm = memoryview(b).cast('B') + self.assertLess(n, len(bm)) + self.assertEqual(bm[:n], data[:n]) + self.assertEqual(bm[n:], b'x' * (len(bm[n:]))) + + def test_readinto1_array(self): + buffer_size = 60 + data = b"a" * 26 + rawio = self.MockRawIO((data,)) + bufio = self.tp(rawio, buffer_size=buffer_size) + + # Create an array with element size > 1 byte + b = array.array('i', b'x' * 32) + assert len(b) != 16 + + # Read into it. We should get as many *bytes* as we can fit into b + # (which is more than the number of elements) + n = bufio.readinto1(b) + self.assertGreater(n, len(b)) + + # Check that old contents of b are preserved + bm = memoryview(b).cast('B') + self.assertLess(n, len(bm)) + self.assertEqual(bm[:n], data[:n]) + self.assertEqual(bm[n:], b'x' * (len(bm[n:]))) + def test_readlines(self): def bufio(): rawio = self.MockRawIO((b"abc\n", b"d\n", b"ef")) @@ -3050,6 +3115,8 @@ self.assertRaises(ValueError, f.readall) if hasattr(f, "readinto"): self.assertRaises(ValueError, f.readinto, bytearray(1024)) + if hasattr(f, "readinto1"): + self.assertRaises(ValueError, f.readinto1, bytearray(1024)) self.assertRaises(ValueError, f.readline) self.assertRaises(ValueError, f.readlines) self.assertRaises(ValueError, f.seek, 0) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -24,6 +24,7 @@ _Py_IDENTIFIER(read1); _Py_IDENTIFIER(readable); _Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(readinto1); _Py_IDENTIFIER(writable); _Py_IDENTIFIER(write); @@ -47,17 +48,21 @@ ); static PyObject * -bufferediobase_readinto(PyObject *self, PyObject *args) +_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t len; PyObject *data; - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) { return NULL; } - data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); + data = _PyObject_CallMethodId(self, + readinto1 ? &PyId_read1 : &PyId_read, + "n", buf.len); if (data == NULL) goto error; @@ -89,6 +94,18 @@ } static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 0); +} + +static PyObject * +bufferediobase_readinto1(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 1); +} + +static PyObject * bufferediobase_unsupported(const char *message) { _PyIO_State *state = IO_STATE(); @@ -167,6 +184,7 @@ {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL}, {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, {NULL, NULL} }; @@ -989,7 +1007,7 @@ } static PyObject * -buffered_readinto(buffered *self, PyObject *args) +_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t n, written = 0, remaining; @@ -997,7 +1015,9 @@ CHECK_INITIALIZED(self) - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) return NULL; n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); @@ -1035,7 +1055,10 @@ n = _bufferedreader_raw_read(self, (char *) buf.buf + written, remaining); } - else { + + /* In readinto1 mode, we do not want to fill the internal + buffer if we already have some data to return */ + else if (!(readinto1 && written)) { n = _bufferedreader_fill_buffer(self); if (n > 0) { if (n > remaining) @@ -1046,6 +1069,9 @@ continue; /* short circuit */ } } + else + n = 0; + if (n == 0 || (n == -2 && written > 0)) break; if (n < 0) { @@ -1055,6 +1081,12 @@ } goto end; } + + /* At most one read in readinto1 mode */ + if (readinto1) { + written += n; + break; + } } res = PyLong_FromSsize_t(written); @@ -1066,6 +1098,19 @@ } static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 0); +} + +static PyObject * +buffered_readinto1(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 1); +} + + +static PyObject * _buffered_readline(buffered *self, Py_ssize_t limit) { PyObject *res = NULL; @@ -1750,6 +1795,7 @@ {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2349,6 +2395,12 @@ } static PyObject * +bufferedrwpair_readinto1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, &PyId_readinto1, args); +} + +static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { return _forward_call(self->writer, &PyId_write, args); @@ -2413,6 +2465,7 @@ {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS}, {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, @@ -2561,6 +2614,7 @@ {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS},