| LEFT | RIGHT |
| (no file at all) | |
| 1 """Interface to the liblzma compression library. | 1 """Interface to the liblzma compression library. |
| 2 | 2 |
| 3 This module provides a class for reading and writing compressed files, | 3 This module provides a class for reading and writing compressed files, |
| 4 classes for incremental (de)compression, and convenience functions for | 4 classes for incremental (de)compression, and convenience functions for |
| 5 one-shot (de)compression. | 5 one-shot (de)compression. |
| 6 | 6 |
| 7 These classes and functions support both the XZ and legacy LZMA | 7 These classes and functions support both the XZ and legacy LZMA |
| 8 container formats, as well as raw compressed data streams. | 8 container formats, as well as raw compressed data streams. |
| 9 """ | 9 """ |
| 10 | 10 |
| 11 __all__ = [ | 11 __all__ = [ |
| 12 "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", | 12 "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256", |
| 13 "CHECK_ID_MAX", "CHECK_UNKNOWN", | 13 "CHECK_ID_MAX", "CHECK_UNKNOWN", |
| 14 "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", | 14 "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64", |
| 15 "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", | 15 "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC", |
| 16 "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", | 16 "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW", |
| 17 "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", | 17 "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4", |
| 18 "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", | 18 "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME", |
| 19 | 19 |
| 20 "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", | 20 "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError", |
| 21 "open", "compress", "decompress", "is_check_supported", | 21 "compress", "decompress", "is_check_supported", |
| 22 "encode_filter_properties", "decode_filter_properties", | 22 "encode_filter_properties", "decode_filter_properties", |
| 23 ] | 23 ] |
| 24 | 24 |
| 25 import builtins | |
| 26 import io | 25 import io |
| 27 from _lzma import * | 26 from _lzma import * |
| 28 | 27 |
| 29 | 28 |
| 30 _MODE_CLOSED = 0 | 29 _MODE_CLOSED = 0 |
| 31 _MODE_READ = 1 | 30 _MODE_READ = 1 |
| 32 _MODE_READ_EOF = 2 | 31 _MODE_READ_EOF = 2 |
| 33 _MODE_WRITE = 3 | 32 _MODE_WRITE = 3 |
| 34 | 33 |
| 35 _BUFFER_SIZE = 8192 | 34 _BUFFER_SIZE = 8192 |
| 36 | 35 |
| 37 | 36 |
| 38 class LZMAFile(io.BufferedIOBase): | 37 class LZMAFile(io.BufferedIOBase): |
| 39 | 38 |
| 40 """A file object providing transparent LZMA (de)compression. | 39 """A file object providing transparent LZMA (de)compression. |
| 41 | 40 |
| 42 An LZMAFile can act as a wrapper for an existing file object, or | 41 An LZMAFile can act as a wrapper for an existing file object, or |
| 43 refer directly to a named file on disk. | 42 refer directly to a named file on disk. |
| 44 | 43 |
| 45 Note that LZMAFile provides a *binary* file interface - data read | 44 Note that LZMAFile provides a *binary* file interface - data read |
| 46 is returned as bytes, and data to be written must be given as bytes. | 45 is returned as bytes, and data to be written must be given as bytes. |
| 47 """ | 46 """ |
| 48 | 47 |
| 49 def __init__(self, filename=None, mode="r", *, | 48 def __init__(self, filename=None, mode="r", *, |
| 50 format=None, check=-1, preset=None, filters=None): | 49 fileobj=None, format=None, check=-1, |
| 51 """Open an LZMA-compressed file in binary mode. | 50 preset=None, filters=None): |
| 52 | 51 """Open an LZMA-compressed file. |
| 53 filename can be either an actual file name (given as a str or | 52 |
| 54 bytes object), in which case the named file is opened, or it can | 53 If filename is given, open the named file. Otherwise, operate on |
| 55 be an existing file object to read from or write to. | 54 the file object given by fileobj. Exactly one of these two |
| 55 parameters should be provided. |
| 56 | 56 |
| 57 mode can be "r" for reading (default), "w" for (over)writing, or | 57 mode can be "r" for reading (default), "w" for (over)writing, or |
| 58 "a" for appending. These can equivalently be given as "rb", "wb", | 58 "a" for appending. |
| 59 and "ab" respectively. | |
| 60 | 59 |
| 61 format specifies the container format to use for the file. | 60 format specifies the container format to use for the file. |
| 62 If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the | 61 If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the |
| 63 default is FORMAT_XZ. | 62 default is FORMAT_XZ. |
| 64 | 63 |
| 65 check specifies the integrity check to use. This argument can | 64 check specifies the integrity check to use. This argument can |
| 66 only be used when opening a file for writing. For FORMAT_XZ, | 65 only be used when opening a file for writing. For FORMAT_XZ, |
| 67 the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not | 66 the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not |
| 68 support integrity checks - for these formats, check must be | 67 support integrity checks - for these formats, check must be |
| 69 omitted, or be CHECK_NONE. | 68 omitted, or be CHECK_NONE. |
| (...skipping 18 matching lines...) Expand all Loading... |
| 88 filters (if provided) should be a sequence of dicts. Each dict | 87 filters (if provided) should be a sequence of dicts. Each dict |
| 89 should have an entry for "id" indicating ID of the filter, plus | 88 should have an entry for "id" indicating ID of the filter, plus |
| 90 additional entries for options to the filter. | 89 additional entries for options to the filter. |
| 91 """ | 90 """ |
| 92 self._fp = None | 91 self._fp = None |
| 93 self._closefp = False | 92 self._closefp = False |
| 94 self._mode = _MODE_CLOSED | 93 self._mode = _MODE_CLOSED |
| 95 self._pos = 0 | 94 self._pos = 0 |
| 96 self._size = -1 | 95 self._size = -1 |
| 97 | 96 |
| 98 if mode in ("r", "rb"): | 97 if mode == "r": |
| 99 if check != -1: | 98 if check != -1: |
| 100 raise ValueError("Cannot specify an integrity check " | 99 raise ValueError("Cannot specify an integrity check " |
| 101 "when opening a file for reading") | 100 "when opening a file for reading") |
| 102 if preset is not None: | 101 if preset is not None: |
| 103 raise ValueError("Cannot specify a preset compression " | 102 raise ValueError("Cannot specify a preset compression " |
| 104 "level when opening a file for reading") | 103 "level when opening a file for reading") |
| 105 if format is None: | 104 if format is None: |
| 106 format = FORMAT_AUTO | 105 format = FORMAT_AUTO |
| 107 mode_code = _MODE_READ | 106 mode_code = _MODE_READ |
| 108 # Save the args to pass to the LZMADecompressor initializer. | 107 # Save the args to pass to the LZMADecompressor initializer. |
| 109 # If the file contains multiple compressed streams, each | 108 # If the file contains multiple compressed streams, each |
| 110 # stream will need a separate decompressor object. | 109 # stream will need a separate decompressor object. |
| 111 self._init_args = {"format":format, "filters":filters} | 110 self._init_args = {"format":format, "filters":filters} |
| 112 self._decompressor = LZMADecompressor(**self._init_args) | 111 self._decompressor = LZMADecompressor(**self._init_args) |
| 113 self._buffer = None | 112 self._buffer = None |
| 114 elif mode in ("w", "wb", "a", "ab"): | 113 elif mode in ("w", "a"): |
| 115 if format is None: | 114 if format is None: |
| 116 format = FORMAT_XZ | 115 format = FORMAT_XZ |
| 117 mode_code = _MODE_WRITE | 116 mode_code = _MODE_WRITE |
| 118 self._compressor = LZMACompressor(format=format, check=check, | 117 self._compressor = LZMACompressor(format=format, check=check, |
| 119 preset=preset, filters=filters) | 118 preset=preset, filters=filters) |
| 120 else: | 119 else: |
| 121 raise ValueError("Invalid mode: {!r}".format(mode)) | 120 raise ValueError("Invalid mode: {!r}".format(mode)) |
| 122 | 121 |
| 123 if isinstance(filename, (str, bytes)): | 122 if filename is not None and fileobj is None: |
| 124 if "b" not in mode: | 123 mode += "b" |
| 125 mode += "b" | 124 self._fp = open(filename, mode) |
| 126 self._fp = builtins.open(filename, mode) | |
| 127 self._closefp = True | 125 self._closefp = True |
| 128 self._mode = mode_code | 126 self._mode = mode_code |
| 129 elif hasattr(filename, "read") or hasattr(filename, "write"): | 127 elif fileobj is not None and filename is None: |
| 130 self._fp = filename | 128 self._fp = fileobj |
| 131 self._mode = mode_code | 129 self._mode = mode_code |
| 132 else: | 130 else: |
| 133 raise TypeError("filename must be a str or bytes object, or a file") | 131 raise ValueError("Must give exactly one of filename and fileobj") |
| 134 | 132 |
| 135 def close(self): | 133 def close(self): |
| 136 """Flush and close the file. | 134 """Flush and close the file. |
| 137 | 135 |
| 138 May be called more than once without error. Once the file is | 136 May be called more than once without error. Once the file is |
| 139 closed, any other operation on it will raise a ValueError. | 137 closed, any other operation on it will raise a ValueError. |
| 140 """ | 138 """ |
| 141 if self._mode == _MODE_CLOSED: | 139 if self._mode == _MODE_CLOSED: |
| 142 return | 140 return |
| 143 try: | 141 try: |
| (...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 364 self._read_block(offset, return_data=False) | 362 self._read_block(offset, return_data=False) |
| 365 | 363 |
| 366 return self._pos | 364 return self._pos |
| 367 | 365 |
| 368 def tell(self): | 366 def tell(self): |
| 369 """Return the current file position.""" | 367 """Return the current file position.""" |
| 370 self._check_not_closed() | 368 self._check_not_closed() |
| 371 return self._pos | 369 return self._pos |
| 372 | 370 |
| 373 | 371 |
| 374 def open(filename, mode="rb", *, | |
| 375 format=None, check=-1, preset=None, filters=None, | |
| 376 encoding=None, errors=None, newline=None): | |
| 377 """Open an LZMA-compressed file in binary or text mode. | |
| 378 | |
| 379 filename can be either an actual file name (given as a str or bytes object), | |
| 380 in which case the named file is opened, or it can be an existing file object | |
| 381 to read from or write to. | |
| 382 | |
| 383 The mode argument can be "r", "rb" (default), "w", "wb", "a", or "ab" for | |
| 384 binary mode, or "rt", "wt" or "at" for text mode. | |
| 385 | |
| 386 The format, check, preset and filters arguments specify the compression | |
| 387 settings, as for LZMACompressor, LZMADecompressor and LZMAFile. | |
| 388 | |
| 389 For binary mode, this function is equivalent to the LZMAFile constructor: | |
| 390 LZMAFile(filename, mode, ...). In this case, the encoding, errors and | |
| 391 newline arguments must not be provided. | |
| 392 | |
| 393 For text mode, a LZMAFile object is created, and wrapped in an | |
| 394 io.TextIOWrapper instance with the specified encoding, error handling | |
| 395 behavior, and line ending(s). | |
| 396 | |
| 397 """ | |
| 398 if "t" in mode: | |
| 399 if "b" in mode: | |
| 400 raise ValueError("Invalid mode: %r" % (mode,)) | |
| 401 else: | |
| 402 if encoding is not None: | |
| 403 raise ValueError("Argument 'encoding' not supported in binary mode") | |
| 404 if errors is not None: | |
| 405 raise ValueError("Argument 'errors' not supported in binary mode") | |
| 406 if newline is not None: | |
| 407 raise ValueError("Argument 'newline' not supported in binary mode") | |
| 408 | |
| 409 lz_mode = mode.replace("t", "") | |
| 410 binary_file = LZMAFile(filename, lz_mode, format=format, check=check, | |
| 411 preset=preset, filters=filters) | |
| 412 | |
| 413 if "t" in mode: | |
| 414 return io.TextIOWrapper(binary_file, encoding, errors, newline) | |
| 415 else: | |
| 416 return binary_file | |
| 417 | |
| 418 | |
| 419 def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): | 372 def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None): |
| 420 """Compress a block of data. | 373 """Compress a block of data. |
| 421 | 374 |
| 422 Refer to LZMACompressor's docstring for a description of the | 375 Refer to LZMACompressor's docstring for a description of the |
| 423 optional arguments *format*, *check*, *preset* and *filters*. | 376 optional arguments *format*, *check*, *preset* and *filters*. |
| 424 | 377 |
| 425 For incremental compression, use an LZMACompressor object instead. | 378 For incremental compression, use an LZMACompressor object instead. |
| 426 """ | 379 """ |
| 427 comp = LZMACompressor(format, check, preset, filters) | 380 comp = LZMACompressor(format, check, preset, filters) |
| 428 return comp.compress(data) + comp.flush() | 381 return comp.compress(data) + comp.flush() |
| (...skipping 11 matching lines...) Expand all Loading... |
| 440 while True: | 393 while True: |
| 441 decomp = LZMADecompressor(format, memlimit, filters) | 394 decomp = LZMADecompressor(format, memlimit, filters) |
| 442 results.append(decomp.decompress(data)) | 395 results.append(decomp.decompress(data)) |
| 443 if not decomp.eof: | 396 if not decomp.eof: |
| 444 raise LZMAError("Compressed data ended before the " | 397 raise LZMAError("Compressed data ended before the " |
| 445 "end-of-stream marker was reached") | 398 "end-of-stream marker was reached") |
| 446 if not decomp.unused_data: | 399 if not decomp.unused_data: |
| 447 return b"".join(results) | 400 return b"".join(results) |
| 448 # There is unused data left over. Proceed to next stream. | 401 # There is unused data left over. Proceed to next stream. |
| 449 data = decomp.unused_data | 402 data = decomp.unused_data |
| LEFT | RIGHT |