Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(32961)

Delta Between Two Patch Sets: Lib/bz2.py

Issue 16034: bz2 module appears slower in Python 3.x versus Python 2.x
Left Patch Set: Created 6 years, 12 months ago
Right Patch Set: Created 6 years, 11 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 """Interface to the libbzip2 compression library. 1 """Interface to the libbzip2 compression library.
2 2
3 This module provides a file interface, classes for incremental 3 This module provides a file interface, classes for incremental
4 (de)compression, and functions for one-shot (de)compression. 4 (de)compression, and functions for one-shot (de)compression.
5 """ 5 """
6 6
7 __all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor", 7 __all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8 "open", "compress", "decompress"] 8 "open", "compress", "decompress"]
9 9
10 __author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>" 10 __author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 warnings.warn("Use of 'buffering' argument is deprecated", 72 warnings.warn("Use of 'buffering' argument is deprecated",
73 DeprecationWarning) 73 DeprecationWarning)
74 74
75 if not (1 <= compresslevel <= 9): 75 if not (1 <= compresslevel <= 9):
76 raise ValueError("compresslevel must be between 1 and 9") 76 raise ValueError("compresslevel must be between 1 and 9")
77 77
78 if mode in ("", "r", "rb"): 78 if mode in ("", "r", "rb"):
79 mode = "rb" 79 mode = "rb"
80 mode_code = _MODE_READ 80 mode_code = _MODE_READ
81 self._decompressor = BZ2Decompressor() 81 self._decompressor = BZ2Decompressor()
82 self._buffer = b'' 82 self._buffer = b""
83 self._offset = 0 83 self._buffer_offset = 0
84 elif mode in ("w", "wb"): 84 elif mode in ("w", "wb"):
85 mode = "wb" 85 mode = "wb"
86 mode_code = _MODE_WRITE 86 mode_code = _MODE_WRITE
87 self._compressor = BZ2Compressor(compresslevel) 87 self._compressor = BZ2Compressor(compresslevel)
88 elif mode in ("a", "ab"): 88 elif mode in ("a", "ab"):
89 mode = "ab" 89 mode = "ab"
90 mode_code = _MODE_WRITE 90 mode_code = _MODE_WRITE
91 self._compressor = BZ2Compressor(compresslevel) 91 self._compressor = BZ2Compressor(compresslevel)
92 else: 92 else:
93 raise ValueError("Invalid mode: {!r}".format(mode)) 93 raise ValueError("Invalid mode: {!r}".format(mode))
(...skipping 24 matching lines...) Expand all
118 self._fp.write(self._compressor.flush()) 118 self._fp.write(self._compressor.flush())
119 self._compressor = None 119 self._compressor = None
120 finally: 120 finally:
121 try: 121 try:
122 if self._closefp: 122 if self._closefp:
123 self._fp.close() 123 self._fp.close()
124 finally: 124 finally:
125 self._fp = None 125 self._fp = None
126 self._closefp = False 126 self._closefp = False
127 self._mode = _MODE_CLOSED 127 self._mode = _MODE_CLOSED
128 self._buffer = b'' 128 self._buffer = b""
129 self._offset = 0 129 self._buffer_offset = 0
130 130
131 @property 131 @property
132 def closed(self): 132 def closed(self):
133 """True if this file is closed.""" 133 """True if this file is closed."""
134 return self._mode == _MODE_CLOSED 134 return self._mode == _MODE_CLOSED
135 135
136 def fileno(self): 136 def fileno(self):
137 """Return the file descriptor for the underlying file.""" 137 """Return the file descriptor for the underlying file."""
138 self._check_not_closed() 138 self._check_not_closed()
139 return self._fp.fileno() 139 return self._fp.fileno()
(...skipping 12 matching lines...) Expand all
152 self._check_not_closed() 152 self._check_not_closed()
153 return self._mode == _MODE_WRITE 153 return self._mode == _MODE_WRITE
154 154
155 # Mode-checking helper functions. 155 # Mode-checking helper functions.
156 156
157 def _check_not_closed(self): 157 def _check_not_closed(self):
158 if self.closed: 158 if self.closed:
159 raise ValueError("I/O operation on closed file") 159 raise ValueError("I/O operation on closed file")
160 160
161 def _check_can_read(self): 161 def _check_can_read(self):
162 if not self.readable(): 162 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
163 self._check_not_closed()
163 raise io.UnsupportedOperation("File not open for reading") 164 raise io.UnsupportedOperation("File not open for reading")
164 165
165 def _check_can_write(self): 166 def _check_can_write(self):
166 if not self.writable(): 167 if self._mode != _MODE_WRITE:
168 self._check_not_closed()
167 raise io.UnsupportedOperation("File not open for writing") 169 raise io.UnsupportedOperation("File not open for writing")
168 170
169 def _check_can_seek(self): 171 def _check_can_seek(self):
170 if not self.readable(): 172 if self._mode not in (_MODE_READ, _MODE_READ_EOF):
173 self._check_not_closed()
171 raise io.UnsupportedOperation("Seeking is only supported " 174 raise io.UnsupportedOperation("Seeking is only supported "
172 "on files open for reading") 175 "on files open for reading")
173 if not self._fp.seekable(): 176 if not self._fp.seekable():
174 raise io.UnsupportedOperation("The underlying file object " 177 raise io.UnsupportedOperation("The underlying file object "
175 "does not support seeking") 178 "does not support seeking")
176 179
177 # Non-buffered read and decompress next chunk of data. 180 # Fill the readahead buffer if it is empty. Returns False on EOF.
178 # Always returns at least one byte of data, unless at EOF. 181 def _fill_buffer(self):
179 def _read1(self): 182 if self._mode == _MODE_READ_EOF:
183 return False
180 # Depending on the input data, our call to the decompressor may not 184 # Depending on the input data, our call to the decompressor may not
181 # return any data. In this case, try again after reading another block. 185 # return any data. In this case, try again after reading another block.
182 if self._mode == _MODE_READ_EOF: 186 while self._buffer_offset == len(self._buffer):
183 return b'' 187 rawblock = (self._decompressor.unused_data or
184 while True: 188 self._fp.read(_BUFFER_SIZE))
185 if self._decompressor.unused_data:
186 rawblock = self._decompressor.unused_data
187 else:
188 rawblock = self._fp.read(_BUFFER_SIZE)
189 189
190 if not rawblock: 190 if not rawblock:
191 if self._decompressor.eof: 191 if self._decompressor.eof:
192 self._mode = _MODE_READ_EOF 192 self._mode = _MODE_READ_EOF
193 self._size = self._pos 193 self._size = self._pos
194 return b'' 194 return False
195 else: 195 else:
196 raise EOFError("Compressed file ended before the " 196 raise EOFError("Compressed file ended before the "
197 "end-of-stream marker was reached") 197 "end-of-stream marker was reached")
198 198
199 # Continue to next stream. 199 # Continue to next stream.
200 if self._decompressor.eof: 200 if self._decompressor.eof:
201 self._decompressor = BZ2Decompressor() 201 self._decompressor = BZ2Decompressor()
202 202
203 data = self._decompressor.decompress(rawblock) 203 self._buffer = self._decompressor.decompress(rawblock)
204 if data: 204 self._buffer_offset = 0
205 return data 205 return True
206 206
207 # Read data until EOF. 207 # Read data until EOF.
208 # If return_data is false, consume the data without returning it. 208 # If return_data is false, consume the data without returning it.
209 def _read_all(self, return_data=True): 209 def _read_all(self, return_data=True):
210 data = self._buffer[self._offset:] 210 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
211 self._buffer = self._buffer[self._buffer_offset:]
212 self._buffer_offset = 0
213
211 blocks = [] 214 blocks = []
212 self._buffer = b'' 215 while self._fill_buffer():
213 self._offset = 0
214 while True:
215 if return_data: 216 if return_data:
216 blocks.append(data) 217 blocks.append(self._buffer)
217 self._pos += len(data) 218 self._pos += len(self._buffer)
218 data = self._read1() 219 self._buffer = b""
219 if not data:
220 break
221 if return_data: 220 if return_data:
222 return b"".join(blocks) 221 return b"".join(blocks)
223 222
224 # Read a block of up to n bytes. 223 # Read a block of up to n bytes.
225 # If return_data is false, consume the data without returning it. 224 # If return_data is false, consume the data without returning it.
226 def _read_block(self, n, return_data=True): 225 def _read_block(self, n, return_data=True):
227 if n <= 0: 226 # If we have enough data buffered, return immediately.
228 return b'' 227 end = self._buffer_offset + n
229 end = n + self._offset
230 data = self._buffer[self._offset:end]
231 if end <= len(self._buffer): 228 if end <= len(self._buffer):
232 self._offset = end 229 data = self._buffer[self._buffer_offset : end]
230 self._buffer_offset = end
233 self._pos += len(data) 231 self._pos += len(data)
234 return data 232 return data if return_data else None
233
234 # The loop assumes that _buffer_offset is 0. Ensure that this is true.
235 self._buffer = self._buffer[self._buffer_offset:]
236 self._buffer_offset = 0
235 237
236 blocks = [] 238 blocks = []
237 self._buffer = b'' 239 while n > 0 and self._fill_buffer():
238 self._offset = 0 240 if n < len(self._buffer):
239 while True: 241 data = self._buffer[:n]
242 self._buffer_offset = n
243 else:
244 data = self._buffer
245 self._buffer = b""
240 if return_data: 246 if return_data:
241 blocks.append(data) 247 blocks.append(data)
242 self._pos += len(data) 248 self._pos += len(data)
243 n -= len(data) 249 n -= len(data)
244 if not n:
245 break
246 data = self._read1()
247 if not data:
248 break
249 if n < len(data):
250 self._buffer = data
251 self._offset = n
252 data = data[:n]
253
254 if return_data: 250 if return_data:
255 return b"".join(blocks) 251 return b"".join(blocks)
256 252
257 def peek(self, n=1): 253 def peek(self, n=0):
258 """Return buffered data without advancing the file position. 254 """Return buffered data without advancing the file position.
259 255
260 Always returns at least one byte of data, unless at EOF. 256 Always returns at least one byte of data, unless at EOF.
261 The exact number of bytes returned is unspecified. 257 The exact number of bytes returned is unspecified.
262 """ 258 """
263 with self._lock: 259 with self._lock:
264 self._check_can_read() 260 self._check_can_read()
265 data = self._buffer[self._offset:] 261 if not self._fill_buffer():
266 if not data: 262 return b""
267 self._buffer = data = self._read1() 263 return self._buffer[self._buffer_offset:]
268 self._offset = 0
269 return data
270 264
271 def read(self, size=-1): 265 def read(self, size=-1):
272 """Read up to size uncompressed bytes from the file. 266 """Read up to size uncompressed bytes from the file.
273 267
274 If size is negative or omitted, read until EOF is reached. 268 If size is negative or omitted, read until EOF is reached.
275 Returns b'' if the file is already at EOF. 269 Returns b'' if the file is already at EOF.
276 """ 270 """
277 with self._lock: 271 with self._lock:
278 self._check_can_read() 272 self._check_can_read()
279 if size < 0: 273 if size == 0:
274 return b""
275 elif size < 0:
280 return self._read_all() 276 return self._read_all()
281 else: 277 else:
282 return self._read_block(size) 278 return self._read_block(size)
283 279
284 def read1(self, size=-1): 280 def read1(self, size=-1):
285 """Read up to size uncompressed bytes, while trying to avoid 281 """Read up to size uncompressed bytes, while trying to avoid
286 making multiple reads from the underlying stream. 282 making multiple reads from the underlying stream.
287 283
288 Returns b'' if the file is at EOF. 284 Returns b'' if the file is at EOF.
289 """ 285 """
290 # Usually, read1() calls _fp.read() at most once. However, sometimes 286 # Usually, read1() calls _fp.read() at most once. However, sometimes
291 # this does not give enough data for the decompressor to make progress. 287 # this does not give enough data for the decompressor to make progress.
292 # In this case we make multiple reads, to avoid returning b"". 288 # In this case we make multiple reads, to avoid returning b"".
293 with self._lock: 289 with self._lock:
294 self._check_can_read() 290 self._check_can_read()
295 if size == 0: 291 if (size == 0 or
292 # Only call _fill_buffer() if the buffer is actually empty.
293 # This gives a significant speedup if *size* is small.
294 (self._buffer_offset == len(self._buffer) and not self._fill_buf fer())):
296 return b"" 295 return b""
297 if self._offset == len(self._buffer):
298 self._buffer = self._read1()
299 self._offset = 0
300 if size > 0: 296 if size > 0:
301 data = self._buffer[self._offset:self._offset + size] 297 data = self._buffer[self._buffer_offset :
302 self._offset += len(data) 298 self._buffer_offset + size]
299 self._buffer_offset += len(data)
303 else: 300 else:
304 data = self._buffer[self._offset:] 301 data = self._buffer[self._buffer_offset:]
305 self._buffer = b'' 302 self._buffer = b""
306 self._offset = 0 303 self._buffer_offset = 0
307 self._pos += len(data) 304 self._pos += len(data)
308 return data 305 return data
309 306
310 def readinto(self, b): 307 def readinto(self, b):
311 """Read up to len(b) bytes into b. 308 """Read up to len(b) bytes into b.
312 309
313 Returns the number of bytes read (0 for EOF). 310 Returns the number of bytes read (0 for EOF).
314 """ 311 """
315 with self._lock: 312 with self._lock:
316 return io.BufferedIOBase.readinto(self, b) 313 return io.BufferedIOBase.readinto(self, b)
317 314
318 def readline(self, size=-1): 315 def readline(self, size=-1):
319 """Read a line of uncompressed bytes from the file. 316 """Read a line of uncompressed bytes from the file.
320 317
321 The terminating newline (if present) is retained. If size is 318 The terminating newline (if present) is retained. If size is
322 non-negative, no more than size bytes will be read (in which 319 non-negative, no more than size bytes will be read (in which
323 case the line may be incomplete). Returns b'' if already at EOF. 320 case the line may be incomplete). Returns b'' if already at EOF.
324 """ 321 """
325 if not hasattr(size, "__index__"): 322 if not hasattr(size, "__index__"):
326 raise TypeError("Integer argument expected") 323 raise TypeError("Integer argument expected")
327 size = size.__index__() 324 size = size.__index__()
328 with self._lock: 325 with self._lock:
326 # Shortcut for the common case - the whole line is in the buffer.
329 if size < 0: 327 if size < 0:
330 # Shortcut common case - newline found in buffer. 328 end = self._buffer.find(b"\n", self._buffer_offset) + 1
331 i = self._buffer.find(b'\n', self._offset) + 1 329 if end > 0:
332 if i > 0: 330 line = self._buffer[self._buffer_offset : end]
333 line = self._buffer[self._offset: i] 331 self._buffer_offset = end
334 self._offset = i 332 self._pos += len(line)
335 return line 333 return line
336
337 return io.BufferedIOBase.readline(self, size) 334 return io.BufferedIOBase.readline(self, size)
338 335
339 def readlines(self, size=-1): 336 def readlines(self, size=-1):
340 """Read a list of lines of uncompressed bytes from the file. 337 """Read a list of lines of uncompressed bytes from the file.
341 338
342 size can be specified to control the number of lines read: no 339 size can be specified to control the number of lines read: no
343 further lines will be read once the total size of the lines read 340 further lines will be read once the total size of the lines read
344 so far equals or exceeds size. 341 so far equals or exceeds size.
345 """ 342 """
346 if not hasattr(size, "__index__"): 343 if not hasattr(size, "__index__"):
(...skipping 26 matching lines...) Expand all
373 """ 370 """
374 with self._lock: 371 with self._lock:
375 return io.BufferedIOBase.writelines(self, seq) 372 return io.BufferedIOBase.writelines(self, seq)
376 373
377 # Rewind the file to the beginning of the data stream. 374 # Rewind the file to the beginning of the data stream.
378 def _rewind(self): 375 def _rewind(self):
379 self._fp.seek(0, 0) 376 self._fp.seek(0, 0)
380 self._mode = _MODE_READ 377 self._mode = _MODE_READ
381 self._pos = 0 378 self._pos = 0
382 self._decompressor = BZ2Decompressor() 379 self._decompressor = BZ2Decompressor()
383 self._buffer = b'' 380 self._buffer = b""
384 self._offset = 0 381 self._buffer_offset = 0
385 382
386 def seek(self, offset, whence=0): 383 def seek(self, offset, whence=0):
387 """Change the file position. 384 """Change the file position.
388 385
389 The new position is specified by offset, relative to the 386 The new position is specified by offset, relative to the
390 position indicated by whence. Values for whence are: 387 position indicated by whence. Values for whence are:
391 388
392 0: start of stream (default); offset must not be negative 389 0: start of stream (default); offset must not be negative
393 1: current stream position 390 1: current stream position
394 2: end of stream; offset must not be positive 391 2: end of stream; offset must not be positive
(...skipping 19 matching lines...) Expand all
414 else: 411 else:
415 raise ValueError("Invalid value for whence: {}".format(whence)) 412 raise ValueError("Invalid value for whence: {}".format(whence))
416 413
417 # Make it so that offset is the number of bytes to skip forward. 414 # Make it so that offset is the number of bytes to skip forward.
418 if offset < self._pos: 415 if offset < self._pos:
419 self._rewind() 416 self._rewind()
420 else: 417 else:
421 offset -= self._pos 418 offset -= self._pos
422 419
423 # Read and discard data until we reach the desired position. 420 # Read and discard data until we reach the desired position.
424 if self._mode != _MODE_READ_EOF: 421 self._read_block(offset, return_data=False)
425 self._read_block(offset, return_data=False)
426 422
427 return self._pos 423 return self._pos
428 424
429 def tell(self): 425 def tell(self):
430 """Return the current file position.""" 426 """Return the current file position."""
431 with self._lock: 427 with self._lock:
432 self._check_not_closed() 428 self._check_not_closed()
433 return self._pos 429 return self._pos
434 430
435 431
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
496 while True: 492 while True:
497 decomp = BZ2Decompressor() 493 decomp = BZ2Decompressor()
498 results.append(decomp.decompress(data)) 494 results.append(decomp.decompress(data))
499 if not decomp.eof: 495 if not decomp.eof:
500 raise ValueError("Compressed data ended before the " 496 raise ValueError("Compressed data ended before the "
501 "end-of-stream marker was reached") 497 "end-of-stream marker was reached")
502 if not decomp.unused_data: 498 if not decomp.unused_data:
503 return b"".join(results) 499 return b"".join(results)
504 # There is unused data left over. Proceed to next stream. 500 # There is unused data left over. Proceed to next stream.
505 data = decomp.unused_data 501 data = decomp.unused_data
LEFTRIGHT
« no previous file | no next file » | Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Toggle Comments ('s')

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+