diff -r c81dd9fe0d87 Lib/test/test_zipfile.py --- a/Lib/test/test_zipfile.py Mon Apr 13 11:30:56 2015 -0400 +++ b/Lib/test/test_zipfile.py Wed Apr 15 21:36:09 2015 -0400 @@ -361,6 +361,43 @@ self.assertIn('[closed]', repr(zipopen)) self.assertIn('[closed]', repr(zipfp)) + def zip_filter_file_from_existing_test(self, file_to_remove, all_files_in_zip, compression): + self.make_test_archive(TESTFN, compression) + with zipfile.ZipFile(TESTFN, mode="r") as zipfp: + zipfp.filter(TESTFN2, lambda x: str(x) != str(file_to_remove)) + remaining = all_files_in_zip[:] + remaining.remove(file_to_remove) + remaining.sort() + + # Check result does not have removed filed + with zipfile.ZipFile(TESTFN2) as test_zip: + # Check the namelist + self.assertListEqual(remaining, sorted(test_zip.namelist())) + + # Check the infolist + self.assertListEqual(remaining, sorted([i.filename for i in test_zip.infolist()])) + + # Check that testzip doesn't have errors + self.assertIsNone(test_zip.testzip()) + + # Check file got smaller + self.assertLess(os.path.getsize(TESTFN2), os.path.getsize(TESTFN)) + + + def test_filter_file_from_existing(self): + filename_list = [TESTFN, 'another.name', 'strfile'] + for file_to_remove in filename_list: + self.zip_filter_file_from_existing_test(file_to_remove, filename_list, self.compression) + + def test_filter_unfound_file_from_existing(self): + self.make_test_archive(TESTFN, self.compression) + with zipfile.ZipFile(TESTFN, mode="r") as zipfp: + zipfp.filter(TESTFN2, lambda x: True) + + # Check file is exactly the same + self.assertEqual(os.path.getsize(TESTFN2), os.path.getsize(TESTFN)) + self.assertEqual(open(TESTFN2, 'rb').read(), open(TESTFN, 'rb').read()) + def tearDown(self): unlink(TESTFN) unlink(TESTFN2) diff -r c81dd9fe0d87 Lib/zipfile.py --- a/Lib/zipfile.py Mon Apr 13 11:30:56 2015 -0400 +++ b/Lib/zipfile.py Wed Apr 15 21:36:09 2015 -0400 @@ -3,6 +3,7 @@ XXX references to utf-8 need further investigation. """ +import copy import io import os import re @@ -947,6 +948,15 @@ data = bytes(map(self._decrypter, data)) return data + def read_raw(self, n): + """Read up to n bytes of the file data without applying any filters""" + if n < 0: + raise ValueError('n must be positive') + amount = min(self._compress_left, n) + self._compress_left -= amount + self._eol = not self._compress_left + return self._fileobj.read(amount) + def close(self): try: if self._close_fileobj: @@ -975,6 +985,9 @@ fp = None # Set here since __del__ checks it _windows_illegal_name_trans_table = None + # Copy data 16 blocks at a time + COPY_READ_SIZE = 16 * ZipExtFile.MIN_READ_SIZE + def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True): """Open the ZIP file with mode read 'r', write 'w', exclusive create 'x', or append 'a'.""" @@ -1343,6 +1356,59 @@ for zipinfo in members: self.extract(zipinfo, path, pwd) + def filter(self, pathname, filterfunc): + """Copy all the members of the zipfile to the target pathname that + return True when passed to the filter_function. + + pathname: A path for a new zipfile. + + filter_function: filterfunc(member_pathname) returns a boolean if the + member should be added to the destination. + """ + with ZipFile(pathname, 'w', compression=self.compression, allowZip64=self._allowZip64) as new_zip: + for zinfo in self.filelist: + if filterfunc(zinfo.filename): + self._copy_compressed(zinfo, new_zip) + + def _copy_compressed(self, member, target_zip): + """copy ZipInfo member from the archive using the already + compressed bytes. + + member: a ZipInfo of this ZipFile + + target_zip: An existing ZipFile to receive the member + """ + target_zip._didModify = True + central_zinfo = copy.copy(member) + central_zinfo.header_offset = target_zip.fp.tell() # update start of header + target_zip.filelist.append(central_zinfo) + file_zinfo = copy.copy(central_zinfo) + + zip64 = file_zinfo.file_size > ZIP64_LIMIT or file_zinfo.compress_size > ZIP64_LIMIT + if zip64 and not self._allowZip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + if file_zinfo.file_size <= ZIP64_LIMIT: + # extra is optional outside the master contents, so like many + # implementations we omit it when we can. It is only needed + # for recovery of large files. + file_zinfo.extra = b'' + + target_zip.fp.write(file_zinfo.FileHeader(zip64)) + with self.open(member, 'r') as zipped_file: + while True: + data = zipped_file.read_raw(self.COPY_READ_SIZE) + if not data: + break + target_zip.fp.write(data) + + if file_zinfo.flag_bits & 0x08: + # Write CRC and file sizes after the file data + fmt = '