diff -r 327495e4aae9 -r 305c43772f2a Doc/library/zipfile.rst --- a/Doc/library/zipfile.rst Tue Mar 20 11:06:45 2012 +0200 +++ b/Doc/library/zipfile.rst Tue Mar 20 12:37:49 2012 +0200 @@ -87,7 +87,15 @@ .. data:: ZIP_DEFLATED The numeric constant for the usual ZIP compression method. This requires the - zlib module. No other compression methods are currently supported. + zlib module. + + +.. data:: ZIP_BZIP2 + + The numeric constant for the BZIP2 compression method. This requires the + bz2 module. + + .. versionadded:: 3.3 .. seealso:: @@ -118,9 +126,11 @@ adding a ZIP archive to another file (such as :file:`python.exe`). If *mode* is ``a`` and the file does not exist at all, it is created. *compression* is the ZIP compression method to use when writing the archive, - and should be :const:`ZIP_STORED` or :const:`ZIP_DEFLATED`; unrecognized - values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` - is specified but the :mod:`zlib` module is not available, :exc:`RuntimeError` + and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`; or + :const:`ZIP_DEFLATED`; unrecognized + values will cause :exc:`RuntimeError` to be raised. If :const:`ZIP_DEFLATED` or + :const:`ZIP_BZIP2` is specified but the corresponded module + (:mod:`zlib` or :mod:`bz2`) is not available, :exc:`RuntimeError` is also raised. The default is :const:`ZIP_STORED`. If *allowZip64* is ``True`` zipfile will create ZIP files that use the ZIP64 extensions when the zipfile is larger than 2 GB. If it is false (the default) :mod:`zipfile` @@ -143,6 +153,9 @@ .. versionadded:: 3.2 Added the ability to use :class:`ZipFile` as a context manager. + .. versionchanged:: 3.3 + Added support for :mod:`bzip2` compression. + .. method:: ZipFile.close() diff -r 327495e4aae9 -r 305c43772f2a Lib/zipfile.py --- a/Lib/zipfile.py Tue Mar 20 11:06:45 2012 +0200 +++ b/Lib/zipfile.py Tue Mar 20 12:37:49 2012 +0200 @@ -22,7 +22,13 @@ zlib = None crc32 = binascii.crc32 -__all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", +try: + import bz2 # We may need its compression method +except ImportError: + bz2 = None + +__all__ = ["BadZipFile", "BadZipfile", "error", + "ZIP_STORED", "ZIP_DEFLATED", "ZIP_BZIP2" "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile"] class BadZipFile(Exception): @@ -45,8 +51,13 @@ # constants for Zip file compression methods ZIP_STORED = 0 ZIP_DEFLATED = 8 +ZIP_BZIP2 = 12 # Other ZIP compression methods not supported +DEFAULT_VERSION = 20 +ZIP64_VERSION = 45 +BZIP2_VERSION = 46 + # Below are some formats and associated data for reading/writing headers using # the struct module. The names and structures of headers/records are those used # in the PKWARE description of the ZIP file format: @@ -313,8 +324,8 @@ else: # Assume everything else is unix-y self.create_system = 3 # System which created ZIP archive - self.create_version = 20 # Version which created ZIP archive - self.extract_version = 20 # Version needed to extract archive + self.create_version = DEFAULT_VERSION # Version which created ZIP archive + self.extract_version = DEFAULT_VERSION # Version needed to extract archive self.reserved = 0 # Must be zero self.flag_bits = 0 # ZIP flag bits self.volume = 0 # Volume number of file header @@ -341,6 +352,7 @@ extra = self.extra + min_version = 0 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: # File is larger than what fits into a 4 byte integer, # fall back to the ZIP64 extension @@ -349,9 +361,13 @@ 1, struct.calcsize(fmt)-4, file_size, compress_size) file_size = 0xffffffff compress_size = 0xffffffff - self.extract_version = max(45, self.extract_version) - self.create_version = max(45, self.extract_version) + min_version = ZIP64_VERSION + if self.compress_type == ZIP_BZIP2: + min_version = max(BZIP2_VERSION, min_version) + + self.extract_version = max(min_version, self.extract_version) + self.create_version = max(min_version, self.create_version) filename, flag_bits = self._encodeFilenameFlags() header = struct.pack(structFileHeader, stringFileHeader, self.extract_version, self.reserved, flag_bits, @@ -461,6 +477,41 @@ self._UpdateKeys(c) return c + +def _check_compression(compression): + if compression == ZIP_STORED: + pass + elif compression == ZIP_DEFLATED: + if not zlib: + raise RuntimeError( + "Compression requires the (missing) zlib module") + elif compression == ZIP_BZIP2: + if not bz2: + raise RuntimeError( + "Compression requires the (missing) bz2 module") + else: + raise RuntimeError("That compression method is not supported") + + +def _get_compressor(compress_type): + if compress_type == ZIP_DEFLATED: + return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + elif compress_type == ZIP_BZIP2: + return bz2.BZ2Compressor() + else: + return None + + +def _get_decompressor(compress_type): + if compress_type == ZIP_DEFLATED: + return zlib.decompressobj(-15) + elif compress_type == ZIP_BZIP2: + return bz2.BZ2Decompressor() + else: + return None + + class ZipExtFile(io.BufferedIOBase): """File-like object for reading an archive member. Is returned by ZipFile.open(). @@ -485,9 +536,7 @@ self._compress_size = zipinfo.compress_size self._compress_left = zipinfo.compress_size - if self._compress_type == ZIP_DEFLATED: - self._decompressor = zlib.decompressobj(-15) - self._unconsumed = b'' + self._decompressor = _get_decompressor(self._compress_type) self._readbuffer = b'' self._offset = 0 @@ -609,42 +658,44 @@ # Bytes available in read buffer. len_readbuffer = len(self._readbuffer) - self._offset - # Read from file. - if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): - nbytes = n - len_readbuffer - len(self._unconsumed) - nbytes = max(nbytes, self.MIN_READ_SIZE) - nbytes = min(nbytes, self._compress_left) + if n > len_readbuffer: + nbytes = n - len_readbuffer + if self._compress_type == ZIP_DEFLATED: + nbytes -= len(self._decompressor.unconsumed_tail) - data = self._fileobj.read(nbytes) - self._compress_left -= len(data) + # Read from file. + if self._compress_left > 0 and nbytes > 0: + nbytes = max(nbytes, self.MIN_READ_SIZE) + nbytes = min(nbytes, self._compress_left) - if data and self._decrypter is not None: - data = bytes(map(self._decrypter, data)) + data = self._fileobj.read(nbytes) + self._compress_left -= len(data) - if self._compress_type == ZIP_STORED: - self._update_crc(data, eof=(self._compress_left==0)) + if data and self._decrypter is not None: + data = bytes(map(self._decrypter, data)) + else: + data = b'' + + # Handle unconsumed data. + if self._compress_type == ZIP_DEFLATED: + data = self._decompressor.unconsumed_tail + data + if data: + if self._compress_type == ZIP_STORED: + eof = self._compress_left == 0 + elif self._compress_type == ZIP_DEFLATED: + nbytes = max(n - len_readbuffer, self.MIN_READ_SIZE) + data = self._decompressor.decompress(data, nbytes) + eof = (self._compress_left == 0 and + not self._decompressor.unconsumed_tail) + if eof: + data += self._decompressor.flush() + else: + data = self._decompressor.decompress(data) + eof = self._decompressor.eof or self._compress_left == 0 + + self._update_crc(data, eof=eof) self._readbuffer = self._readbuffer[self._offset:] + data self._offset = 0 - else: - # Prepare deflated bytes for decompression. - self._unconsumed += data - - # Handle unconsumed data. - if (len(self._unconsumed) > 0 and n > len_readbuffer and - self._compress_type == ZIP_DEFLATED): - data = self._decompressor.decompress( - self._unconsumed, - max(n - len_readbuffer, self.MIN_READ_SIZE) - ) - - self._unconsumed = self._decompressor.unconsumed_tail - eof = len(self._unconsumed) == 0 and self._compress_left == 0 - if eof: - data += self._decompressor.flush() - - self._update_crc(data, eof=eof) - self._readbuffer = self._readbuffer[self._offset:] + data - self._offset = 0 # Read from buffer. data = self._readbuffer[self._offset: self._offset + n] @@ -667,7 +718,8 @@ file: Either the path to the file, or a file-like object. If it is a path, the file will be opened and closed by ZipFile. mode: The mode can be either read "r", write "w" or append "a". - compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib). + compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or + ZIP_BZIP2 (requires bz2). allowZip64: if True ZipFile will create files with ZIP64 extensions when needed, otherwise it will raise an exception when this would be necessary. @@ -681,14 +733,7 @@ if mode not in ("r", "w", "a"): raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') - if compression == ZIP_STORED: - pass - elif compression == ZIP_DEFLATED: - if not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - else: - raise RuntimeError("That compression method is not supported") + _check_compression(compression) self._allowZip64 = allowZip64 self._didModify = False @@ -1052,11 +1097,7 @@ if not self.fp: raise RuntimeError( "Attempt to write ZIP archive that was already closed") - if zinfo.compress_type == ZIP_DEFLATED and not zlib: - raise RuntimeError( - "Compression requires the (missing) zlib module") - if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED): - raise RuntimeError("That compression method is not supported") + _check_compression(zinfo.compress_type) if zinfo.file_size > ZIP64_LIMIT: if not self._allowZip64: raise LargeZipFile("Filesize would require ZIP64 extensions") @@ -1107,17 +1148,13 @@ self.fp.write(zinfo.FileHeader()) return + cmpr = _get_compressor(zinfo.compress_type) with open(filename, "rb") as fp: # Must overwrite CRC and sizes with correct data later zinfo.CRC = CRC = 0 zinfo.compress_size = compress_size = 0 zinfo.file_size = file_size = 0 self.fp.write(zinfo.FileHeader()) - if zinfo.compress_type == ZIP_DEFLATED: - cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - else: - cmpr = None while 1: buf = fp.read(1024 * 8) if not buf: @@ -1174,9 +1211,8 @@ self._writecheck(zinfo) self._didModify = True zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum - if zinfo.compress_type == ZIP_DEFLATED: - co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) + co = _get_compressor(zinfo.compress_type) + if co: data = co.compress(data) + co.flush() zinfo.compress_size = len(data) # Compressed size else: @@ -1228,18 +1264,20 @@ header_offset = zinfo.header_offset extra_data = zinfo.extra + min_version = 0 if extra: # Append a ZIP64 field to the extra's extra_data = struct.pack( '