diff --git a/Doc/library/io.rst b/Doc/library/io.rst --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -459,10 +459,11 @@ .. method:: read1(size=-1) - Read and return up to *size* bytes, with at most one call to the underlying - raw stream's :meth:`~RawIOBase.read` method. This can be useful if you - are implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, with at most one call to the + underlying raw stream's :meth:`~RawIOBase.read` (or + :meth:`~RawIOBase.readinto`) method. This can be useful if you + are implementing your own buffering on top of a + :class:`BufferedIOBase` object. .. method:: readinto(b) @@ -475,6 +476,18 @@ A :exc:`BlockingIOError` is raised if the underlying raw stream is in non blocking-mode, and has no data available at the moment. + .. method:: readinto1(b) + + Read up to ``len(b)`` bytes into bytearray *b*, ,using at most + one call to the underlying raw stream's :meth:`~RawIOBase.read` + (or :meth:`~RawIOBase.readinto`) method. Return the number of + bytes read. + + A :exc:`BlockingIOError` is raised if the underlying raw stream is in + non blocking-mode, and has no data available at the moment. + + .. versionadded:: 3.5 + .. method:: write(b) Write the given :class:`bytes` or :class:`bytearray` object, *b* and @@ -590,6 +603,11 @@ In :class:`BytesIO`, this is the same as :meth:`read`. + .. method:: readinto1() + + In :class:`BytesIO`, this is the same as :meth:`readinto`. + + .. versionadded:: 3.5 .. class:: BufferedReader(raw, buffer_size=DEFAULT_BUFFER_SIZE) diff --git a/Lib/_pyio.py b/Lib/_pyio.py --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -6,6 +6,7 @@ import abc import codecs import errno +import array # Import _thread instead of threading to reduce startup cost try: from _thread import allocate_lock as Lock @@ -655,8 +656,26 @@ Raises BlockingIOError if the underlying raw stream has no data at the moment. """ + + return self._readinto(b, read1=False) + + def readinto1(self, b): + """Read up to len(b) bytes into *b*, using at most one system call + + Returns an int representing the number of bytes read (0 for EOF). + + Raises BlockingIOError if the underlying raw stream has no + data at the moment. + """ + + return self._readinto(b, read1=True) + + def _readinto(self, b, read1): # XXX This ought to work with anything that supports the buffer API - data = self.read(len(b)) + if read1: + data = self.read1(len(b)) + else: + data = self.read(len(b)) n = len(data) try: b[:n] = data @@ -1058,6 +1077,82 @@ return self._read_unlocked( min(size, len(self._read_buf) - self._read_pos)) + # Implementing readinto() and readinto1() is not strictly necessary (we + # could rely on the base class that provides an implementation in terms of + # read() and read1()). We do ai anyway to keep the _pyio implementation + # similar to the io implementation (which implements the methods for + # performance reasons). + def readinto(self, buf): + """Read data into *buf*.""" + + # This function ought to work for any *buf* implementing the memoryview + # protocol, but the Python implementation is limited to bytearray + # objects. However, since BufferedIOBase contains a hardcoded + # special-case for array objects with format 'b', we special-case this + # situation here as well in order to not break backwards-compatibility. + if isinstance(buf, array.array): + buf2 = bytearray(buf.tobytes()) + len_ = self._readinto(buf2, read1=False) + buf[:len_] = array.array('b', buf2) + return len_ + + if not isinstance(buf, bytearray): + raise TypeError('_pyio.readinto() is limited to bytearray objects') + + return self._readinto(buf, read1=False) + + def readinto1(self, buf): + """Read data into *buf* with at most one system call.""" + + if not isinstance(buf, bytearray): + raise TypeError('_pyio.readinto1() is limited to bytearray objects') + + return self._readinto(buf, read1=True) + + def _readinto(self, buf, read1): + """Read data into *buf* with at most one system call.""" + + if len(buf) == 0: + return 0 + + written = 0 + with self._read_lock: + while written < len(buf): + + # First try to read from internal buffer + avail = min(len(self._read_buf) - self._read_pos, len(buf)) + if avail: + buf[written:written+avail] = \ + self._read_buf[self._read_pos:self._read_pos+avail] + self._read_pos += avail + written += avail + if written == len(buf): + break + + # If remaining space in callers buffer is larger than + # internal buffer, read directly into callers buffer + if len(buf) - written > self.buffer_size: + # If we don't use a memoryview, slicing buf will create + # a new object + if not isinstance(buf, memoryview): + buf = memoryview(buf) + n = self.raw.readinto(buf[written:]) + if not n: + break # eof + written += n + + # Otherwise refill internal buffer - unless we're + # in read1 mode and already got some data + elif not (read1 and written): + if not self._peek_unlocked(1): + break # eof + + # In readinto1 mode, return as soon as we have some data + if read1 and written: + break + + return written + def tell(self): return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos @@ -1207,6 +1302,9 @@ def read1(self, size): return self.reader.read1(size) + def readinto1(self, b): + return self.reader.readinto1(b) + def readable(self): return self.reader.readable() @@ -1289,6 +1387,10 @@ self.flush() return BufferedReader.read1(self, size) + def readinto1(self, b): + self.flush() + return BufferedReader.readinto1(self, b) + def write(self, b): if self._read_buf: # Undo readahead diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -911,6 +911,29 @@ self.assertEqual(bufio.readinto(b), 1) self.assertEqual(b, b"cb") + def test_readinto1(self): + buffer_size = 10 + rawio = self.MockRawIO((b"abc", b"de", b"fgh", b"jkl")) + bufio = self.tp(rawio, buffer_size=buffer_size) + b = bytearray(2) + self.assertEqual(bufio.peek(3), b'abc') + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"ab") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 1) + self.assertEqual(b[:1], b"c") + self.assertEqual(rawio._reads, 1) + self.assertEqual(bufio.readinto1(b), 2) + self.assertEqual(b, b"de") + self.assertEqual(rawio._reads, 2) + b = bytearray(2*buffer_size) + self.assertEqual(bufio.peek(3), b'fgh') + self.assertEqual(rawio._reads, 3) + self.assertEqual(bufio.readinto1(b), 6) + self.assertEqual(b[:6], b"fghjkl") + self.assertEqual(rawio._reads, 4) + def test_readlines(self): def bufio(): rawio = self.MockRawIO((b"abc\n", b"d\n", b"ef")) @@ -2985,6 +3008,8 @@ self.assertRaises(ValueError, f.readall) if hasattr(f, "readinto"): self.assertRaises(ValueError, f.readinto, bytearray(1024)) + if hasattr(f, "readinto1"): + self.assertRaises(ValueError, f.readinto1, bytearray(1024)) self.assertRaises(ValueError, f.readline) self.assertRaises(ValueError, f.readlines) self.assertRaises(ValueError, f.seek, 0) diff --git a/Modules/_io/bufferedio.c b/Modules/_io/bufferedio.c --- a/Modules/_io/bufferedio.c +++ b/Modules/_io/bufferedio.c @@ -24,6 +24,7 @@ _Py_IDENTIFIER(read1); _Py_IDENTIFIER(readable); _Py_IDENTIFIER(readinto); +_Py_IDENTIFIER(readinto1); _Py_IDENTIFIER(writable); _Py_IDENTIFIER(write); @@ -47,17 +48,21 @@ ); static PyObject * -bufferediobase_readinto(PyObject *self, PyObject *args) +_bufferediobase_readinto_generic(PyObject *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t len; PyObject *data; - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) { + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) { return NULL; } - data = _PyObject_CallMethodId(self, &PyId_read, "n", buf.len); + data = _PyObject_CallMethodId(self, + readinto1 ? &PyId_read1 : &PyId_read, + "n", buf.len); if (data == NULL) goto error; @@ -89,6 +94,18 @@ } static PyObject * +bufferediobase_readinto(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 0); +} + +static PyObject * +bufferediobase_readinto1(PyObject *self, PyObject *args) +{ + return _bufferediobase_readinto_generic(self, args, 1); +} + +static PyObject * bufferediobase_unsupported(const char *message) { _PyIO_State *state = IO_STATE(); @@ -167,6 +184,7 @@ {"read", bufferediobase_read, METH_VARARGS, bufferediobase_read_doc}, {"read1", bufferediobase_read1, METH_VARARGS, bufferediobase_read1_doc}, {"readinto", bufferediobase_readinto, METH_VARARGS, NULL}, + {"readinto1", bufferediobase_readinto1, METH_VARARGS, NULL}, {"write", bufferediobase_write, METH_VARARGS, bufferediobase_write_doc}, {NULL, NULL} }; @@ -988,7 +1006,7 @@ } static PyObject * -buffered_readinto(buffered *self, PyObject *args) +_buffered_readinto_generic(buffered *self, PyObject *args, char readinto1) { Py_buffer buf; Py_ssize_t n, written = 0, remaining; @@ -996,7 +1014,9 @@ CHECK_INITIALIZED(self) - if (!PyArg_ParseTuple(args, "w*:readinto", &buf)) + if (!PyArg_ParseTuple(args, + readinto1 ? "w*:readinto1" : "w*:readinto", + &buf)) return NULL; n = Py_SAFE_DOWNCAST(READAHEAD(self), Py_off_t, Py_ssize_t); @@ -1034,7 +1054,10 @@ n = _bufferedreader_raw_read(self, (char *) buf.buf + written, remaining); } - else { + + /* In readinto1 mode, we do not want to fill the internal + buffer if we already have some data to return */ + else if (!(readinto1 && written)) { n = _bufferedreader_fill_buffer(self); if (n > 0) { if (n > remaining) @@ -1045,6 +1068,9 @@ continue; /* short circuit */ } } + else + n = 0; + if (n == 0 || (n == -2 && written > 0)) break; if (n < 0) { @@ -1054,6 +1080,12 @@ } goto end; } + + /* At most one read in readinto1 mode */ + if (readinto1) { + written += n; + break; + } } res = PyLong_FromSsize_t(written); @@ -1065,6 +1097,19 @@ } static PyObject * +buffered_readinto(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 0); +} + +static PyObject * +buffered_readinto1(buffered *self, PyObject *args) +{ + return _buffered_readinto_generic(self, args, 1); +} + + +static PyObject * _buffered_readline(buffered *self, Py_ssize_t limit) { PyObject *res = NULL; @@ -1749,6 +1794,7 @@ {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, @@ -2348,6 +2394,12 @@ } static PyObject * +bufferedrwpair_readinto1(rwpair *self, PyObject *args) +{ + return _forward_call(self->reader, &PyId_readinto1, args); +} + +static PyObject * bufferedrwpair_write(rwpair *self, PyObject *args) { return _forward_call(self->writer, &PyId_write, args); @@ -2412,6 +2464,7 @@ {"peek", (PyCFunction)bufferedrwpair_peek, METH_VARARGS}, {"read1", (PyCFunction)bufferedrwpair_read1, METH_VARARGS}, {"readinto", (PyCFunction)bufferedrwpair_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)bufferedrwpair_readinto1, METH_VARARGS}, {"write", (PyCFunction)bufferedrwpair_write, METH_VARARGS}, {"flush", (PyCFunction)bufferedrwpair_flush, METH_NOARGS}, @@ -2560,6 +2613,7 @@ {"read", (PyCFunction)buffered_read, METH_VARARGS}, {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, + {"readinto1", (PyCFunction)buffered_readinto1, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, {"peek", (PyCFunction)buffered_peek, METH_VARARGS}, {"write", (PyCFunction)bufferedwriter_write, METH_VARARGS},