Index: Misc/NEWS =================================================================== --- Misc/NEWS (revision 74701) +++ Misc/NEWS (working copy) @@ -366,6 +366,8 @@ Library ------- +- Issue #6856: Add a filter keyword argument to TarFile.add(). + - Issue #4937: plat-mac/bundlebuilder revers to non-existing version.plist - Issue #6838: Use a list to accumulate the value instead of Index: Doc/library/tarfile.rst =================================================================== --- Doc/library/tarfile.rst (revision 74701) +++ Doc/library/tarfile.rst (working copy) @@ -389,7 +389,7 @@ and :meth:`close`, and also supports iteration over its lines. -.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None) +.. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, filter=None) Add the file *name* to the archive. *name* may be any type of file (directory, fifo, symbolic link, etc.). If given, *arcname* specifies an alternative name @@ -397,12 +397,22 @@ can be avoided by setting *recursive* to :const:`False`. If *exclude* is given it must be a function that takes one filename argument and returns a boolean value. Depending on this value the respective file is either excluded - (:const:`True`) or added (:const:`False`). + (:const:`True`) or added (:const:`False`). If *filter* is specified it must + be a function that takes a :class:`TarInfo` object argument and returns the + changed TarInfo object. If it instead returns :const:`None` the TarInfo + object will be excluded from the archive. .. versionchanged:: 2.6 Added the *exclude* parameter. + .. deprecated:: 2.7 + The *exclude* parameter is deprecated, please use the *filter* parameter + instead. + .. versionchanged:: 2.7 + Added the *filter* parameter. + + .. method:: TarFile.addfile(tarinfo, fileobj=None) Add the :class:`TarInfo` object *tarinfo* to the archive. If *fileobj* is given, @@ -653,7 +663,18 @@ print "something else." tar.close() +How create an archive and reset the user information:: + import tarfile + def reset(tarinfo): + tarinfo.uid = tarinfo.gid = 0 + tarinfo.uname = tarinfo.gname = "root" + return tarinfo + tar = tarfile.open("sample.tar.gz", "w:gz") + tar.add("foo", filter=reset) + tar.close() + + .. _tar-formats: Supported tar formats Index: Lib/tarfile.py =================================================================== --- Lib/tarfile.py (revision 74701) +++ Lib/tarfile.py (working copy) @@ -1918,13 +1918,16 @@ print "link to", tarinfo.linkname, print - def add(self, name, arcname=None, recursive=True, exclude=None): + def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): """Add the file `name' to the archive. `name' may be any type of file (directory, fifo, symbolic link, etc.). If given, `arcname' specifies an alternative name for the file in the archive. Directories are added recursively by default. This can be avoided by setting `recursive' to False. `exclude' is a function that should - return True for each filename to be excluded. + return True for each filename to be excluded. `filter' is a function + that expects a TarInfo object as argument and allows changing it + before it will be added, if `filter' returns None the TarInfo will + be excluded from the archive. """ self._check("aw") @@ -1932,9 +1935,13 @@ arcname = name # Exclude pathnames. - if exclude is not None and exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return + if exclude is not None: + import warnings + warnings.warn("use the format attribute instead", + DeprecationWarning, 2) + if exclude(name): + self._dbg(2, "tarfile: Excluded %r" % name) + return # Skip if somebody tries to archive the archive... if self.name is not None and os.path.abspath(name) == self.name: @@ -1950,6 +1957,13 @@ self._dbg(1, "tarfile: Unsupported type %r" % name) return + # Change or exclude the TarInfo object. + if filter is not None: + tarinfo = filter(tarinfo) + if tarinfo is None: + self._dbg(2, "tarfile: Excluded %r" % name) + return + # Append the tar header and data to the archive. if tarinfo.isreg(): f = bltn_open(name, "rb") @@ -1960,7 +1974,7 @@ self.addfile(tarinfo) if recursive: for f in os.listdir(name): - self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude) + self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude, filter) else: self.addfile(tarinfo) Index: Lib/test/test_tarfile.py =================================================================== --- Lib/test/test_tarfile.py (revision 74701) +++ Lib/test/test_tarfile.py (working copy) @@ -660,6 +660,34 @@ finally: shutil.rmtree(tempdir) + def test_filter(self): + tempdir = os.path.join(TEMPDIR, "filter") + os.mkdir(tempdir) + try: + for name in ("foo", "bar", "baz"): + name = os.path.join(tempdir, name) + open(name, "wb").close() + + def filter(tarinfo): + if os.path.basename(tarinfo.name) == "bar": + return + tarinfo.uid = 123 + tarinfo.uname = "foo" + return tarinfo + + tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1") + tar.add(tempdir, arcname="empty_dir", filter=filter) + tar.close() + + tar = tarfile.open(tmpname, "r") + for tarinfo in tar: + self.assertEqual(tarinfo.uid, 123) + self.assertEqual(tarinfo.uname, "foo") + self.assertEqual(len(tar.getmembers()), 3) + tar.close() + finally: + shutil.rmtree(tempdir) + # Guarantee that stored pathnames are not modified. Don't # remove ./ or ../ or double slashes. Still make absolute # pathnames relative.