diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -390,19 +390,20 @@ be finalized; only the internally used f .. versionchanged:: 3.5 Added the *numeric_only* parameter. .. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False) Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* - may be a filename or a :class:`TarInfo` object. You can specify a different - directory using *path*. File attributes (owner, mtime, mode) are set unless - *set_attrs* is false. + may be a filename, a :class:`TarInfo` object or :term:`path-like object`. + You can specify a different directory using *path*. *path* may be a + :term:`path-like object`. + File attributes (owner, mtime, mode) are set unless *set_attrs* is false. If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile are used to set the owner/group for the extracted files. Otherwise, the named values from the tarfile are used. .. note:: The :meth:`extract` method does not take care of several extraction issues. @@ -413,26 +414,33 @@ be finalized; only the internally used f See the warning for :meth:`extractall`. .. versionchanged:: 3.2 Added the *set_attrs* parameter. .. versionchanged:: 3.5 Added the *numeric_only* parameter. + .. versionchanged:: 3.6 + Accepts a :term:`path-like object`. + .. method:: TarFile.extractfile(member) - Extract a member from the archive as a file object. *member* may be a filename - or a :class:`TarInfo` object. If *member* is a regular file or a link, an + Extract a member from the archive as a file object. *member* may be a filename, + a :class:`TarInfo` object or :term:`path-like object`. If *member* is a + regular file or a link, an :class:`io.BufferedReader` object is returned. Otherwise, :const:`None` is returned. .. versionchanged:: 3.3 Return an :class:`io.BufferedReader` object. + .. versionchanged:: 3.6 + Accepts a :term:`path-like object`. + .. method:: TarFile.add(name, arcname=None, recursive=True, exclude=None, *, filter=None) Add the file *name* to the archive. *name* may be any type of file (directory, fifo, symbolic link, etc.). If given, *arcname* specifies an alternative name for the file in the archive. Directories are added recursively by default. This can be avoided by setting *recursive* to :const:`False`. If *exclude* is given, it must be a function that takes one @@ -463,26 +471,28 @@ be finalized; only the internally used f .. method:: TarFile.gettarinfo(name=None, arcname=None, fileobj=None) Create a :class:`TarInfo` object from the result of :func:`os.stat` or equivalent on an existing file. The file is either named by *name*, or specified as a :term:`file object` *fileobj* with a file descriptor. If given, *arcname* specifies an alternative name for the file in the archive, otherwise, the name is taken from *fileobj*’s :attr:`~io.FileIO.name` attribute, or the *name* argument. The name - should be a text string. + should be a text string or :term:`path-like object`. You can modify some of the :class:`TarInfo`’s attributes before you add it using :meth:`addfile`. If the file object is not an ordinary file object positioned at the beginning of the file, attributes such as :attr:`~TarInfo.size` may need modifying. This is the case for objects such as :class:`~gzip.GzipFile`. The :attr:`~TarInfo.name` may also be modified, in which case *arcname* could be a dummy string. + .. versionchanged:: 3.6 + Accepts a :term:`path-like object`. .. method:: TarFile.close() Close the :class:`TarFile`. In write mode, two finishing zero blocks are appended to the archive. .. attribute:: TarFile.pax_headers diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1733,17 +1733,17 @@ class TarFile(object): self.fileobj.close() def getmember(self, name): """Return a TarInfo object for member `name'. If `name' can not be found in the archive, KeyError is raised. If a member occurs more than once in the archive, its last occurrence is assumed to be the most up-to-date version. """ - tarinfo = self._getmember(name) + tarinfo = self._getmember(os.fspath(name)) if tarinfo is None: raise KeyError("filename %r not found" % name) return tarinfo def getmembers(self): """Return the members of the archive as a list of TarInfo objects. The list has the same order as the members in the archive. """ @@ -1760,21 +1760,25 @@ class TarFile(object): return [tarinfo.name for tarinfo in self.getmembers()] def gettarinfo(self, name=None, arcname=None, fileobj=None): """Create a TarInfo object from the result of os.stat or equivalent on an existing file. The file is either named by `name', or specified as a file object `fileobj' with a file descriptor. If given, `arcname' specifies an alternative name for the file in the archive, otherwise, the name is taken from the 'name' attribute of - 'fileobj', or the 'name' argument. The name should be a text - string. + 'fileobj', or the 'name' argument. 'name' and 'arcname' should be a + text string or os.PathLike. """ self._check("awx") + if isinstance(name, os.PathLike): + name = os.fspath(name) + if isinstance(arcname, os.PathLike): + arcname = os.fspath(arcname) # When fileobj is given, replace name by # fileobj's real name. if fileobj is not None: name = fileobj.name # Building the name of the member in the archive. # Backward slashes are converted to forward slashes, # Absolute paths are turned to relative paths. @@ -2015,25 +2019,25 @@ class TarFile(object): if self.errorlevel > 1: raise else: self._dbg(1, "tarfile: %s" % e) def extract(self, member, path="", set_attrs=True, *, numeric_owner=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately - as possible. `member' may be a filename or a TarInfo object. You can - specify a different directory using `path'. File attributes (owner, + as possible. `member' may be a filename, a TarInfo or PathLike object. + You can specify a different directory using `path'. File attributes (owner, mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` is True, only the numbers for user/group names are used and not the names. """ self._check("r") - if isinstance(member, str): + if isinstance(member, (str, os.PathLike)): tarinfo = self.getmember(member) else: tarinfo = member # Prepare the link target for makelink(). if tarinfo.islnk(): tarinfo._link_target = os.path.join(path, tarinfo.linkname) @@ -2058,17 +2062,17 @@ class TarFile(object): def extractfile(self, member): """Extract a member from the archive as a file object. `member' may be a filename or a TarInfo object. If `member' is a regular file or a link, an io.BufferedReader object is returned. Otherwise, None is returned. """ self._check("r") - if isinstance(member, str): + if isinstance(member, (str, os.PathLike)): tarinfo = self.getmember(member) else: tarinfo = member if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: # Members with unknown types are treated as regular files. return self.fileobject(self, tarinfo) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,13 +1,14 @@ import sys import os import io from hashlib import md5 from contextlib import contextmanager +import pathlib import unittest import unittest.mock import tarfile from test import support from test.support import script_helper @@ -91,16 +92,34 @@ class UstarReadTest(ReadTest, unittest.T tarinfo = self.tar.getmember("ustar/regtype") with self.tar.extractfile(tarinfo) as fobj: data = fobj.read() self.assertEqual(len(data), tarinfo.size, "regular file extraction failed") self.assertEqual(md5sum(data), md5_regtype, "regular file extraction failed") + def test_extractfile_pathlike(self): + path = pathlib.Path("ustar/regtype") + tarinfo = self.tar.getmember(path) + with self.tar.extractfile(path) as fobj: + data = fobj.read() + self.assertEqual(len(data), tarinfo.size) + self.assertEqual(md5sum(data), md5_regtype) + + def test_extract_pathlike(self): + path = pathlib.Path("ustar/regtype") + tmpdir = pathlib.Path(TEMPDIR) + extracted_file = tmpdir / path + tarinfo = self.tar.getmember(path) + self.tar.extract(path, tmpdir) + with open(extracted_file) as fobj: + data = fobj.read() + self.assertEqual(len(data), tarinfo.size) + def test_fileobj_readlines(self): self.tar.extract("ustar/regtype", TEMPDIR) tarinfo = self.tar.getmember("ustar/regtype") with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1: lines1 = fobj1.readlines() with self.tar.extractfile(tarinfo) as fobj: fobj2 = io.TextIOWrapper(fobj) @@ -1068,16 +1087,25 @@ class WriteTest(WriteTestBase, unittest. with open(path, "wb") as fobj: fobj.write(b"aaa") tarinfo = tar.gettarinfo(path) self.assertEqual(tarinfo.size, 3) finally: tar.close() + def test_file_size_with_pathlike(self): + with tarfile.open(tmpname, self.mode) as tar: + path = pathlib.Path(TEMPDIR) / "file" + with open(path, "wb") as fobj: + fobj.write(b"aaa") + tarinfo = tar.gettarinfo(path) + self.assertEqual(tarinfo.size, 3) + self.assertIsInstance(tarinfo.name, str) + def test_directory_size(self): path = os.path.join(TEMPDIR, "directory") os.mkdir(path) try: tar = tarfile.open(tmpname, self.mode) try: tarinfo = tar.gettarinfo(path) self.assertEqual(tarinfo.size, 0)