diff -r ec3ea80758a4 Doc/library/io.rst --- a/Doc/library/io.rst Wed Apr 16 01:22:52 2014 +0530 +++ b/Doc/library/io.rst Tue Apr 15 13:21:52 2014 -0700 @@ -623,6 +623,13 @@ If at least one byte is buffered, only buffered bytes are returned. Otherwise, one raw stream read call is made. + .. method:: readprevline([size]) + + Read backwards until newline or BOF and return a single ``bytes`` object. + If the stream is already at BOF, an empty string is returned. + + If *size* is specified, at most *size* characters will be read. + .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) diff -r ec3ea80758a4 Lib/_pyio.py --- a/Lib/_pyio.py Wed Apr 16 01:22:52 2014 +0530 +++ b/Lib/_pyio.py Tue Apr 15 13:21:52 2014 -0700 @@ -500,6 +500,16 @@ break return bytes(res) + def readprevline(self, size=-1): + """Read the previous line of bytes from the stream. + + If size is specified, at most size bytes will be read. + Size should be an int. + + The line terminator is always b'\n' for binary files. + """ + self._unsupported("readprevline") + def __iter__(self): self._checkClosed() return self @@ -1058,6 +1068,62 @@ return self._read_unlocked( min(size, len(self._read_buf) - self._read_pos)) + def readprevline(self, size=-1): + if self.closed: + raise ValueError("readprevline of closed file") + if size is None: + size = -1 + elif not isinstance(size, int): + raise TypeError("size must be an integer") + n = self._read_pos - size if size >= 0 and size < self._read_pos else 0 + with self._read_lock: + if self._read_pos > 0: + idx = self._read_buf.rfind(b'\n', n, self._read_pos-1) + if idx < 0 and size <= self._read_pos: + # Didn't find a match within our limit + idx = n-1 + # If the raw position is <= buffer_size we're at BOF + if idx != -1 or _BufferedIOMixin.tell(self) <= self.buffer_size: + res = self._read_buf[idx+1:self._read_pos] + self._read_pos = idx+1 + return res + + # Didn't find a match in the current buffer, rewind and keep looking + chunks = [] + if self._read_pos > 0: + chunks.append(self._read_buf[:self._read_pos]) + if size >= 0: + size -= self._read_pos + + while True: + try: + # Seek the raw buffer back one chunk before the start of the + # current chunk. + seekby = min(_BufferedIOMixin.tell(self), + self.buffer_size + len(self._read_buf)) + _BufferedIOMixin.seek(self, -seekby, 1) + except OSError: + # We've hit the BOF + break + + self._reset_read_buf() + self._read_buf = self.raw.read(self.buffer_size) + + n = (self.buffer_size - size + if size >= 0 and size < self.buffer_size else 0) + # If this is the first chunk, a newline in the last byte is not + # the one we stop at + end = self.buffer_size if chunks else self.buffer_size-1 + idx = self._read_buf.rfind(b'\n', n, end) + chunks.append(self._read_buf[idx+1:]) + if idx != -1 or _BufferedIOMixin.tell(self) <= self.buffer_size: + self._read_pos = idx+1 + break + + chunks.reverse() + res = b''.join(chunks) + return res + def tell(self): return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos diff -r ec3ea80758a4 Lib/test/test_io.py --- a/Lib/test/test_io.py Wed Apr 16 01:22:52 2014 +0530 +++ b/Lib/test/test_io.py Tue Apr 15 13:21:52 2014 -0700 @@ -408,6 +408,19 @@ with self.open(support.TESTFN, "r") as f: self.assertRaises(TypeError, f.readline, 5.3) + def test_readprevline(self): + with self.open(support.TESTFN, "wb") as f: + f.write(b"abc\ndef\nxyzzy\nfoo\x00bar\nanother line") + with self.open(support.TESTFN, "rb") as f: + f.seek(0, self.SEEK_END) + self.assertEqual(f.readprevline(None), b"another line") + self.assertEqual(f.readprevline(), b"foo\x00bar\n") + self.assertEqual(f.readprevline(4), b"zzy\n") + self.assertEqual(f.readprevline(2), b"xy") + self.assertEqual(f.readprevline(10), b"def\n") + self.assertEqual(f.readprevline(), b"abc\n") + self.assertRaises(TypeError, f.readprevline, 5.3) + def test_raw_bytes_io(self): f = self.BytesIO() self.write_ops(f) diff -r ec3ea80758a4 Modules/_io/bufferedio.c --- a/Modules/_io/bufferedio.c Wed Apr 16 01:22:52 2014 +0530 +++ b/Modules/_io/bufferedio.c Tue Apr 15 13:21:52 2014 -0700 @@ -1188,6 +1188,125 @@ static PyObject * +_buffered_readprevline(buffered *self, Py_ssize_t limit) +{ + PyObject *res = NULL; + PyObject *chunks = NULL; + Py_ssize_t n; + const char *s, *start; + + CHECK_CLOSED(self, "readprevline of closed file") + + if (VALID_READ_BUFFER(self)) { + n = limit >= 0 && limit < self->pos ? self->pos - limit : 0; + start = s = self->buffer + self->pos; + while (--s > self->buffer + n) { + if (*(s-1) == '\n') + break; + } + + n = start - s; + if (RAW_TELL(self) == self->read_end || n == limit || *(s-1) == '\n') { + res = PyBytes_FromStringAndSize(s, n); + if (res != NULL) + self->pos -= n; + goto end_unlocked; + } + } + + /* Rewind the buffer looking for a newline */ + if (!ENTER_BUFFERED(self)) + goto end_unlocked; + + chunks = PyList_New(0); + if (chunks == NULL) + goto end; + if (VALID_READ_BUFFER(self) && self->pos > 0) { + res = PyBytes_FromStringAndSize(self->buffer, self->pos); + if (res == NULL) + goto end; + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + self->pos = 0; + if (limit >= 0) + limit -= self->pos; + } + if (self->writable) { + PyObject *r = buffered_flush_and_rewind_unlocked(self); + if (r == NULL) + goto end; + Py_DECREF(r); + } + + for(;;) { + /* Seek from the end of the current chunk to the beginning of the + * previous chunk. If read_end is -1 there is no current chunk, so + * only read backwards by a single chunk. */ + n = self->read_end == -1 ? self->buffer_size : self->buffer_size*2; + if(RAW_TELL(self) < n) + n = RAW_TELL(self); + n = _buffered_raw_seek(self, -n, 1); + if (n == -1) + goto end; + _bufferedreader_reset_buf(self); + n = _bufferedreader_fill_buffer(self); + if (n == -1) + goto end; + self->pos = n; + + n = limit >= 0 && limit < self->pos ? self->pos - limit : 0; + start = s = self->buffer + self->pos; + while (--s > self->buffer + n) { + if (*(s-1) == '\n') + break; + } + + /* TODO: Verify edge case where the last byte of a chunk is \n */ + if(s+1 != start) { + res = PyBytes_FromStringAndSize(s, (start - s)); + if (res == NULL) + goto end; + self->pos -= start - s; + if (PyList_Append(chunks, res) < 0) { + Py_CLEAR(res); + goto end; + } + Py_CLEAR(res); + if (limit >= 0) + limit -= start - s; + if (*(s-1) == '\n' || limit == 0) + goto found; + } + } + +found: + if (PyList_Reverse(chunks) < 0) + goto end; + res = _PyBytes_Join(_PyIO_empty_bytes, chunks); + +end: + LEAVE_BUFFERED(self) +end_unlocked: + Py_XDECREF(chunks); + return res; +} + +static PyObject * +buffered_readprevline(buffered *self, PyObject *args) +{ + Py_ssize_t limit = -1; + + CHECK_INITIALIZED(self) + if (!PyArg_ParseTuple(args, "|O&:readprevline", &_PyIO_ConvertSsize_t, &limit)) + return NULL; + return _buffered_readprevline(self, limit); +} + + +static PyObject * buffered_tell(buffered *self, PyObject *args) { Py_off_t pos; @@ -1750,6 +1869,7 @@ {"read1", (PyCFunction)buffered_read1, METH_VARARGS}, {"readinto", (PyCFunction)buffered_readinto, METH_VARARGS}, {"readline", (PyCFunction)buffered_readline, METH_VARARGS}, + {"readprevline", (PyCFunction)buffered_readprevline, METH_VARARGS}, {"seek", (PyCFunction)buffered_seek, METH_VARARGS}, {"tell", (PyCFunction)buffered_tell, METH_NOARGS}, {"truncate", (PyCFunction)buffered_truncate, METH_VARARGS},