diff -r 97b0cf9df420 Lib/zipfile.py --- a/Lib/zipfile.py Sat Mar 17 15:11:59 2012 -0400 +++ b/Lib/zipfile.py Mon Mar 19 01:04:55 2012 +0200 @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ Read and write ZIP files. @@ -22,7 +23,18 @@ zlib = None crc32 = binascii.crc32 -__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", +try: + import bz2 # We may need its compression method +except ImportError: + bz2 = None + +try: + import lzma # We may need its compression method +except ImportError: + lzma = None + +__all__ = ["BadZipFile", "BadZipfile", "error", + "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2", "ZIP_LZMA", "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] class BadZipFile(Exception): @@ -45,6 +57,8 @@ # constants for Zip file compression methods ZIP_STORED = 0 ZIP_DEFLATED = 8 +ZIP_BZIP2 = 12 +ZIP_LZMA = 14 # Other ZIP compression methods not supported # Below are some formats and associated data for reading/writing headers using @@ -461,6 +475,107 @@ self._UpdateKeys(c) return c + +class LZMACompressor(): + def __init__(self): + self._comp = None + + def _init(self): + specs = { + 'id': lzma.FILTER_LZMA1, + 'pb': 2, + 'lp': 0, + 'lc': 3, + 'dict_size': 8 << 20, + } + self._comp = lzma.LZMACompressor(lzma.FORMAT_RAW, filters=[specs]) + prop = (specs['pb'] * 5 + specs['lp']) * 9 + specs['lc'] + return struct.pack(' 0 and n > len_readbuffer + len(self._unconsumed): - nbytes = n - len_readbuffer - len(self._unconsumed) - nbytes = max(nbytes, self.MIN_READ_SIZE) - nbytes = min(nbytes, self._compress_left) + if n > len_readbuffer: + nbytes = n - len_readbuffer + if self._compress_type == ZIP_DEFLATED: + nbytes -= len(self._decompressor.unconsumed_tail) - data = self._fileobj.read(nbytes) - self._compress_left -= len(data) + # Read from file. + if self._compress_left > 0 and nbytes > 0: + nbytes = max(nbytes, self.MIN_READ_SIZE) + nbytes = min(nbytes, self._compress_left) - if data and self._decrypter is not None: - data = bytes(map(self._decrypter, data)) + data = self._fileobj.read(nbytes) + self._compress_left -= len(data) - if self._compress_type == ZIP_STORED: - self._update_crc(data, eof=(self._compress_left==0)) + if data and self._decrypter is not None: + data = bytes(map(self._decrypter, data)) + else: + data = b'' + + # Handle unconsumed data. + if self._compress_type == ZIP_DEFLATED: + data = self._decompressor.unconsumed_tail + data + if data: + if self._compress_type == ZIP_STORED: + eof = self._compress_left == 0 + elif self._compress_type == ZIP_DEFLATED: + nbytes = max(n - len_readbuffer, self.MIN_READ_SIZE) + data = self._decompressor.decompress(data, nbytes) + eof = (self._compress_left == 0 and + not self._decompressor.unconsumed_tail) + if eof: + data += self._decompressor.flush() + else: + data = self._decompressor.decompress(data) + eof = self._decompressor.eof or self._compress_left == 0 + + self._update_crc(data, eof=eof) self._readbuffer = self._readbuffer[self._offset:] + data self._offset = 0 - else: - # Prepare deflated bytes for decompression. - self._unconsumed += data - - # Handle unconsumed data. - if (len(self._unconsumed) > 0 and n > len_readbuffer and - self._compress_type == ZIP_DEFLATED): - data = self._decompressor.decompress( - self._unconsumed, - max(n - len_readbuffer, self.MIN_READ_SIZE) - ) - - self._unconsumed = self._decompressor.unconsumed_tail - eof = len(self._unconsumed) == 0 and self._compress_left == 0 - if eof: - data += self._decompressor.flush() - - self._update_crc(data, eof=eof) - self._readbuffer = self._readbuffer[self._offset:] + data - self._offset = 0 # Read from buffer. data = self._readbuffer[self._offset: self._offset + n] @@ -667,7 +782,8 @@ file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". - compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), + ZIP_BZIP2 (requires bz2) or ZIP_LZMA (requires lzma). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -681,14 +797,7 @@ if mode not in ("r", "w", "a"): raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') - if compression == ZIP_STORED: - pass - elif compression == ZIP_DEFLATED: - if not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - else: - raise RuntimeError("That compression method is not supported") + _check_compression(compression) self._allowZip64 = allowZip64 self._didModify = False @@ -1052,11 +1161,7 @@ if not self.fp: raise RuntimeError( "Attempt to write ZIP archive that was already closed") - if zinfo.compress_type == ZIP_DEFLATED and not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): - raise RuntimeError("That compression method is not supported") + _check_compression(zinfo.compress_type) if zinfo.file_size > ZIP64_LIMIT: if not self._allowZip64: raise LargeZipFile("Filesize would require ZIP64 extensions") @@ -1107,17 +1212,13 @@ self.fp.write(zinfo.FileHeader()) return + cmpr = _get_compressor(zinfo.compress_type) with open(filename, "rb") as fp: # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 zinfo.file_size = file_size = 0 self.fp.write(zinfo.FileHeader()) - if zinfo.compress_type == ZIP_DEFLATED: - cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - else: - cmpr = None while 1: buf = fp.read(1024 * 8) if not buf: @@ -1174,9 +1275,8 @@ self._writecheck(zinfo) self._didModify = True zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum - if zinfo.compress_type == ZIP_DEFLATED: - co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) + co = _get_compressor(zinfo.compress_type) + if co: data = co.compress(data) + co.flush() zinfo.compress_size = len(data) # Compressed size else: