Index: Lib/_pyio.py =================================================================== --- Lib/_pyio.py (révision 88332) +++ Lib/_pyio.py (copie de travail) @@ -1488,6 +1488,7 @@ self._decoded_chars_used = 0 # offset into _decoded_chars for read() self._snapshot = None # info for reconstructing decoder state self._seekable = self._telling = self.buffer.seekable() + self._b2cratio = 0.0 if self._seekable and self.writable(): position = self.buffer.tell() @@ -1655,7 +1656,12 @@ # Read a chunk, decode it, and put the result in self._decoded_chars. input_chunk = self.buffer.read1(self._CHUNK_SIZE) eof = not input_chunk - self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) + decoded_chars = self._decoder.decode(input_chunk, eof) + self._set_decoded_chars(decoded_chars) + if decoded_chars: + self._b2cratio = len(input_chunk) / len(self._decoded_chars) + else: + self._b2cratio = 0.0 if self._telling: # At the snapshot point, len(dec_buffer) bytes before the read, @@ -1709,20 +1715,51 @@ # forward until it gives us enough decoded characters. saved_state = decoder.getstate() try: + # Fast search for an acceptable start point, close to our + # current pos + skip_bytes = int(self._b2cratio * chars_to_skip) + skip_back = 1 + assert skip_bytes <= len(next_input) + while skip_bytes > 0: + decoder.setstate((b'', dec_flags)) + # Decode up to temptative start point + n = len(decoder.decode(next_input[:skip_bytes])) + if n <= chars_to_skip: + b, d = decoder.getstate() + if not b: + # Before pos and no bytes buffered in decoder => OK + dec_flags = d + chars_to_skip -= n + break + # Skip back by buffered amount and reset heuristic + skip_bytes -= len(b) + skip_back = 1 + else: + # We're too far, skip back a bit + skip_bytes -= skip_back + skip_back = skip_back * 2 + else: + skip_bytes = 0 + decoder.setstate((b'', dec_flags)) + # Note our initial start point. - decoder.setstate((b'', dec_flags)) - start_pos = position - start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 - need_eof = 0 + start_pos = position + skip_bytes + start_flags = dec_flags + if chars_to_skip == 0: + # We haven't moved from the start point. + return self._pack_cookie(start_pos, start_flags) # Feed the decoder one byte at a time. As we go, note the # nearest "safe start point" before the current location # (a point where the decoder has nothing buffered, so seek() # can safely start from there and advance to this location). - next_byte = bytearray(1) - for next_byte[0] in next_input: + bytes_fed = 0 + need_eof = 0 + # Chars decoded since `start_pos` + chars_decoded = 0 + for i in range(skip_bytes, len(next_input)): bytes_fed += 1 - chars_decoded += len(decoder.decode(next_byte)) + chars_decoded += len(decoder.decode(next_input[i:i+1])) dec_buffer, dec_flags = decoder.getstate() if not dec_buffer and chars_decoded <= chars_to_skip: # Decoder buffer is empty, so this is a safe start point.