diff -r eb1b93fd68f5 Lib/_pyio.py --- a/Lib/_pyio.py Thu May 12 02:07:00 2011 +0200 +++ b/Lib/_pyio.py Sat May 28 14:42:43 2011 -0400 @@ -666,6 +666,78 @@ """ self._unsupported("detach") + def prefetch(self, b, skip=0, minread=0): + """ + Skip `skip` bytes from the stream. Try to read at least `minread` + bytes and write them into buffer `b`. + + The file pointer is advanced by at most `skip + minread`, or less if + the end of file was reached. The total number of bytes written + in `b` is returned, which can be more than `minread` + if additional bytes could be prefetched (but, of course, + cannot be more than len(b)). + """ + if skip < 0: + raise ValueError("skip must be positive or zero."); + if minread < 0: + raise ValueError("minread must be positive or zero."); + buflen = len(b) + if minread > buflen: + msg = "minread must not be greater than size of buffer" + raise ValueError(msg) + + if skip > 0: + if self.seekable(): + self.seek(skip, SEEK_CUR) + else: + # support unseekable implementations + while True: + # skip may be huge. keep a reasonable bound on read + read_size = min(DEFAULT_BUFFER_SIZE, skip) + data = self.read(read_size) + if not data: + return 0 + n = len(data) + if n >= skip: + break + skip -= n + + # for backwards compatibility + # XXX: should we consider adding peek() with b"" + # or raise Unsupported in BufferedIOBase? + if hasattr(self, "peek"): + def _peek(n): + return self.peek(n) + else: + def _peek(n): + return b"" + + if minread > 0: + data = self.read(minread) + if not data: + return 0 + written = len(data) + b[:written] = data + extra = buflen - written + if extra > 0 and written == minread: + # there is room in buffer and EOF not encountered yet + data = _peek(0) + n = len(data) + if n > 0: + b[written:written + n] = data[:extra] + written += min(n, extra) + return written + + # prefer reading some data (if possible) because minread is 0 + data = _peek(1) + n = len(data) + if n > 0: + written = min(n, buflen) + b[:written] = data[:written] + return written + return 0 + + io.BufferedIOBase.register(BufferedIOBase) @@ -914,6 +986,9 @@ self._reset_read_buf() self._read_lock = Lock() + def _bytes_available(self): + return len(self._read_buf) - self._read_pos + def _reset_read_buf(self): self._read_buf = b"" self._read_pos = 0 @@ -999,7 +1074,7 @@ def _peek_unlocked(self, n=0): want = min(n, self.buffer_size) - have = len(self._read_buf) - self._read_pos + have = self._bytes_available() if have < want or have <= 0: to_read = self.buffer_size - have while True: @@ -1015,6 +1090,95 @@ self._read_pos = 0 return self._read_buf[self._read_pos:] + def prefetch(self, b, skip=0, minread=0): + """ Skip `skip` bytes from the stream. Try to read at least `minread` + bytes and write them into buffer. The read position is advanced by at + most `skip + minread`, or less if the end of file was reached. The + total number of bytes written in `b` is returned, which can be more + than `minread` if additional bytes could be prefetched (but, of course, + cannot be more than `len(b)`). + """ + if skip < 0: + raise ValueError("skip must be positive or zero."); + if minread < 0: + raise ValueError("minread must be positive or zero."); + if minread > len(b): + msg = "minread must not be greater than size of buffer" + raise ValueError(msg) + with self._read_lock: + return self._prefetch_unlocked(b, skip, minread) + + def _prefetch_unlocked(self, b, skip=0, minread=0): + written = 0 + limit = len(b) + assert skip >= 0 and 0 <= minread <= limit + n = self._bytes_available() + if n > 0: + # try to work within the current buffer + if n >= skip: + n -= skip + written = min(n, limit) + offset = self._read_pos + skip + b[:written] = self._read_buf[offset:offset + written] + if n >= minread: + # Fast-path: current buffer has satisfied entire read + self._read_pos = offset + minread + return written + # skip offset was within buffer, minread was not + self._read_pos = offset + limit -= written + skip = 0 + else: + skip -= n + self._reset_read_buf() + if skip > 0: + # use seek to skip if available + if self.raw.seekable(): + self.raw.seek(skip, SEEK_CUR) + else: + # seek by reading buffer_size chunks at a time as + # skip may be large + while True: + try: + data = self.raw.read(self.buffer_size) + except IOError as e: + if e.errno != EINTR: + raise + continue + if not data: + return written + n = len(data) + if n >= skip: + # save data after skip in buffer for copying below + self._read_buf = data[skip:] + break + skip -= n + while True: + if self._bytes_available(): + data = self._read_buf[self._read_pos:] + else: + try: + data = self.raw.read(max(self.buffer_size, limit)) + except IOError as e: + if e.errno != EINTR: + raise + continue + if not data: + break + b[written:] = data[:limit] + n = len(data) + if n >= limit: + written += limit + offset = limit - (written - minread) + self._read_buf = data[self._read_pos + offset:] + self._read_pos = 0 + break + written += n + limit -= n + if written >= minread: + break + return written + def read1(self, n): """Reads up to n bytes, with at most one read() system call.""" # Returns up to n bytes. If at least one byte is buffered, we @@ -1185,6 +1349,9 @@ def readinto(self, b): return self.reader.readinto(b) + def prefetch(self, b, skip=0, minread=0): + return self.reader.prefetch(b, skip, minread) + def write(self, b): return self.writer.write(b) @@ -1271,6 +1438,10 @@ self.flush() return BufferedReader.readinto(self, b) + def prefetch(self, b, skip=0, minread=0): + self.flush() + return BufferedReader.prefetch(self, b, skip, minread) + def peek(self, n=0): self.flush() return BufferedReader.peek(self, n)