diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1731,21 +1731,21 @@ class TarFile(object): finally: if not self._extfileobj: self.fileobj.close() def getmember(self, name): """Return a TarInfo object for member `name'. If `name' can not be found in the archive, KeyError is raised. If a member occurs more than once in the archive, its last occurrence is assumed to be the most up-to-date version. """ - tarinfo = self._getmember(name) + tarinfo = self._getmember(os.fspath(name)) if tarinfo is None: raise KeyError("filename %r not found" % name) return tarinfo def getmembers(self): """Return the members of the archive as a list of TarInfo objects. The list has the same order as the members in the archive. """ self._check() if not self._loaded: # if we want to obtain a list of @@ -1758,25 +1758,30 @@ class TarFile(object): the same order as the list returned by getmembers(). """ return [tarinfo.name for tarinfo in self.getmembers()] def gettarinfo(self, name=None, arcname=None, fileobj=None): """Create a TarInfo object from the result of os.stat or equivalent on an existing file. The file is either named by `name', or specified as a file object `fileobj' with a file descriptor. If given, `arcname' specifies an alternative name for the file in the archive, otherwise, the name is taken from the 'name' attribute of - 'fileobj', or the 'name' argument. The name should be a text - string. + 'fileobj', or the 'name' argument. 'name' and 'arcname' should be a + text string or os.PathLike. """ self._check("awx") + # coerce name and arcname to str + if isinstance(name, os.PathLike): + name = os.fspath(name) + if isinstance(arcname, os.PathLike): + arcname = os.fspath(arcname) # When fileobj is given, replace name by # fileobj's real name. if fileobj is not None: name = fileobj.name # Building the name of the member in the archive. # Backward slashes are converted to forward slashes, # Absolute paths are turned to relative paths. if arcname is None: arcname = name @@ -2021,20 +2026,22 @@ class TarFile(object): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` is True, only the numbers for user/group names are used and not the names. """ self._check("r") + if isinstance(member, os.PathLike): + member = str(member) if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member # Prepare the link target for makelink(). if tarinfo.islnk(): tarinfo._link_target = os.path.join(path, tarinfo.linkname) try: @@ -2056,20 +2063,22 @@ class TarFile(object): self._dbg(1, "tarfile: %s" % e) def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be a filename or a TarInfo object. If `member' is a regular file or a link, an io.BufferedReader object is returned. Otherwise, None is returned. """ self._check("r") + if isinstance(member, os.PathLike): + member = str(member) if isinstance(member, str): tarinfo = self.getmember(member) else: tarinfo = member if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: # Members with unknown types are treated as regular files. return self.fileobject(self, tarinfo) elif tarinfo.islnk() or tarinfo.issym(): diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,15 +1,16 @@ import sys import os import io from hashlib import md5 from contextlib import contextmanager +import pathlib import unittest import unittest.mock import tarfile from test import support from test.support import script_helper # Check for our compression modules. try: @@ -89,38 +90,65 @@ class UstarReadTest(ReadTest, unittest.T def test_fileobj_regular_file(self): tarinfo = self.tar.getmember("ustar/regtype") with self.tar.extractfile(tarinfo) as fobj: data = fobj.read() self.assertEqual(len(data), tarinfo.size, "regular file extraction failed") self.assertEqual(md5sum(data), md5_regtype, "regular file extraction failed") + def test_fileobj_pathlike(self): + tarinfo = self.tar.getmember(pathlib.Path("ustar/regtype")) + with self.tar.extractfile(tarinfo) as fobj: + data = fobj.read() + self.assertEqual(len(data), tarinfo.size, + "regular file extraction failed") + self.assertEqual(md5sum(data), md5_regtype, + "regular file extraction failed") + def test_fileobj_readlines(self): self.tar.extract("ustar/regtype", TEMPDIR) tarinfo = self.tar.getmember("ustar/regtype") with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: lines1 = fobj1.readlines() with self.tar.extractfile(tarinfo) as fobj: fobj2 = io.TextIOWrapper(fobj) lines2 = fobj2.readlines() self.assertEqual(lines1, lines2, "fileobj.readlines() failed") self.assertEqual(len(lines2), 114, "fileobj.readlines() failed") self.assertEqual(lines2[83], "I will gladly admit that Python is not the fastest " "running scripting language.\n", "fileobj.readlines() failed") + def test_fileobj_readlines_pathlike(self): + self.tar.extract(pathlib.Path("ustar/regtype"), TEMPDIR) + tarinfo = self.tar.getmember("ustar/regtype") + with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: + lines1 = fobj1.readlines() + + with self.tar.extractfile(tarinfo) as fobj: + fobj2 = io.TextIOWrapper(fobj) + lines2 = fobj2.readlines() + self.assertEqual(lines1, lines2, + "fileobj.readlines() failed") + self.assertEqual(len(lines2), 114, + "fileobj.readlines() failed") + self.assertEqual(lines2[83], + "I will gladly admit that Python is not the fastest " + "running scripting language.\n", + "fileobj.readlines() failed") + def test_fileobj_iter(self): self.tar.extract("ustar/regtype", TEMPDIR) tarinfo = self.tar.getmember("ustar/regtype") with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: lines1 = fobj1.readlines() with self.tar.extractfile(tarinfo) as fobj2: lines2 = list(io.TextIOWrapper(fobj2)) self.assertEqual(lines1, lines2, "fileobj.__iter__() failed") @@ -1066,20 +1094,36 @@ class WriteTest(WriteTestBase, unittest. tarinfo = tar.gettarinfo(path) self.assertEqual(tarinfo.size, 0) with open(path, "wb") as fobj: fobj.write(b"aaa") tarinfo = tar.gettarinfo(path) self.assertEqual(tarinfo.size, 3) finally: tar.close() + def test_file_size_with_pathlike(self): + tar = tarfile.open(tmpname, self.mode) + try: + path = os.path.join(TEMPDIR, "file") + with open(path, "wb"): + pass + tarinfo = tar.gettarinfo(pathlib.Path(path)) + self.assertEqual(tarinfo.size, 0) + + with open(path, "wb") as fobj: + fobj.write(b"aaa") + tarinfo = tar.gettarinfo(pathlib.Path(path)) + self.assertEqual(tarinfo.size, 3) + finally: + tar.close() + def test_directory_size(self): path = os.path.join(TEMPDIR, "directory") os.mkdir(path) try: tar = tarfile.open(tmpname, self.mode) try: tarinfo = tar.gettarinfo(path) self.assertEqual(tarinfo.size, 0) finally: tar.close() @@ -1195,20 +1239,57 @@ class WriteTest(WriteTestBase, unittest. try: for tarinfo in tar: self.assertEqual(tarinfo.uid, 123) self.assertEqual(tarinfo.uname, "foo") self.assertEqual(len(tar.getmembers()), 3) finally: tar.close() finally: support.rmtree(tempdir) + def test_filter_with_pathlike(self): + tempdir = os.path.join(TEMPDIR, "filter") + os.mkdir(tempdir) + try: + for name in ("foo", "bar", "baz"): + name = os.path.join(tempdir, name) + support.create_empty_file(name) + + def filter(tarinfo): + if os.path.basename(tarinfo.name) == "bar": + return + tarinfo.uid = 123 + tarinfo.uname = "foo" + return tarinfo + + tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1") + try: + tar.add(tempdir, arcname=pathlib.Path("empty_dir"), + filter=filter) + finally: + tar.close() + + # Verify that filter is a keyword-only argument + with self.assertRaises(TypeError): + tar.add(tempdir, "empty_dir", True, None, filter) + + tar = tarfile.open(tmpname, "r") + try: + for tarinfo in tar: + self.assertEqual(tarinfo.uid, 123) + self.assertEqual(tarinfo.uname, "foo") + self.assertEqual(len(tar.getmembers()), 3) + finally: + tar.close() + finally: + support.rmtree(tempdir) + # Guarantee that stored pathnames are not modified. Don't # remove ./ or ../ or double slashes. Still make absolute # pathnames relative. # For details see bug #6054. def _test_pathname(self, path, cmp_path=None, dir=False): # Create a tarfile with an empty member named path # and compare the stored name with the original. foo = os.path.join(TEMPDIR, "foo") if not dir: support.create_empty_file(foo)