diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -367,7 +367,7 @@ available. -.. method:: TarFile.extractall(path=".", members=None) +.. method:: TarFile.extractall(path=".", members=None, numeric_owner=False) Extract all members from the archive to the current working directory or directory *path*. If optional *members* is given, it must be a subset of the @@ -377,6 +377,9 @@ reset each time a file is created in it. And, if a directory's permissions do not allow writing, extracting files to it will fail. + If *numeric_owner* is :const:`True`, only the (uid, gid) numbers are used to + set the owner/group for the extracted files. + .. warning:: Never extract archives from untrusted sources without prior inspection. @@ -384,14 +387,18 @@ that have absolute filenames starting with ``"/"`` or filenames with two dots ``".."``. + .. versionchanged:: 3.5 + Added the *numeric_only* parameter. -.. method:: TarFile.extract(member, path="", set_attrs=True) + +.. method:: TarFile.extract(member, path="", set_attrs=True, numeric_owner=False) Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* may be a filename or a :class:`TarInfo` object. You can specify a different directory using *path*. File attributes (owner, mtime, mode) are set unless - *set_attrs* is false. + *set_attrs* is false. If *numeric_owner* is :const:`True`, only the (uid, gid) + numbers are used to set the owner/group for the extracted files. .. note:: @@ -405,6 +412,9 @@ .. versionchanged:: 3.2 Added the *set_attrs* parameter. + .. versionchanged:: 3.5 + Added the *numeric_only* parameter. + .. method:: TarFile.extractfile(member) Extract a member from the archive as a file object. *member* may be a filename @@ -827,4 +837,3 @@ because all the metadata is stored using *UTF-8*. *encoding* is only used in the rare cases when binary pax headers are decoded or when strings with surrogate characters are stored. - diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -492,6 +492,13 @@ * The :func:`tarfile.open` function now supports ``'x'`` (exclusive creation) mode. (Contributed by Berker Peksag in :issue:`21717`.) +* The :meth:`~tarfile.TarFile.extractall` and :meth:`~tarfile.TarFile.extract` + methods now take a keyword parameter *numeric_only*. If set to ``True``, + the extracted files and directories will be owned by the numeric uid and gid + from the tarfile. If set to ``False`` (the default, and the behavior in + versions prior to 3.5), they will be owned bythe named user and group in the + tarfile. (Contributed by Michael Vogt and Eric Smith in :issue:`23193`.) + time ---- diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1972,12 +1972,13 @@ self.members.append(tarinfo) - def extractall(self, path=".", members=None): + def extractall(self, path=".", members=None, numeric_owner=False): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). + list returned by getmembers(). If `numeric_owner` is True, only + the numbers for user/group names are used and not the names. """ directories = [] @@ -1991,7 +1992,8 @@ tarinfo = copy.copy(tarinfo) tarinfo.mode = 0o700 # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. directories.sort(key=lambda a: a.name) @@ -2001,7 +2003,7 @@ for tarinfo in directories: dirpath = os.path.join(path, tarinfo.name) try: - self.chown(tarinfo, dirpath) + self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: @@ -2010,12 +2012,14 @@ else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True): + def extract(self, member, path="", set_attrs=True, numeric_owner=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is True, only the numbers for user/group names are used and not + the names. """ self._check("r") @@ -2030,7 +2034,8 @@ try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) + set_attrs=set_attrs, + numeric_owner=numeric_owner) except OSError as e: if self.errorlevel > 0: raise @@ -2076,7 +2081,8 @@ # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath, set_attrs=True): + def _extract_member(self, tarinfo, targetpath, set_attrs=True, + numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2114,7 +2120,7 @@ self.makefile(tarinfo, targetpath) if set_attrs: - self.chown(tarinfo, targetpath) + self.chown(tarinfo, targetpath, numeric_owner) if not tarinfo.issym(): self.chmod(tarinfo, targetpath) self.utime(tarinfo, targetpath) @@ -2203,19 +2209,24 @@ except KeyError: raise ExtractError("unable to resolve link inside archive") - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. + def chown(self, tarinfo, targetpath, numeric_owner=False): + """Set owner of targetpath according to tarinfo. If numeric_owner + is True, use .gid/.uid instead of .gname/.uname. """ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: + if numeric_owner: g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: u = tarinfo.uid + else: + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -2,8 +2,10 @@ import os import io from hashlib import md5 +from contextlib import contextmanager import unittest +import unittest.mock import tarfile from test import support, script_helper @@ -2264,6 +2266,129 @@ self._test_partial_input("r:bz2") +def root_is_uid_gid_0(): + try: + import pwd, grp + except ImportError: + return False + if pwd.getpwuid(0)[0] != 'root': + return False + if grp.getgrgid(0)[0] != 'root': + return False + return True + + +class NumericOwnerTest(unittest.TestCase): + # mock the following: + # os.chown: so we can test what's being called + # os.chmod: so the modes are not actually changed. if they are, we can't + # delete the files/directories + # os.geteuid: so we can lie and say we're root (uid = 0) + + @staticmethod + def _make_test_archive(filename_1, dirname_1, filename_2): + # the file contents to write + fobj = io.BytesIO(b"content") + + # create a tar file with a file, a directory, and a file within that + # directory. Assign various .uid/.gid values to them + items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj), + (dirname_1, 77, 76, tarfile.DIRTYPE, None), + (filename_2, 88, 87, tarfile.REGTYPE, fobj), + ] + with tarfile.open(tmpname, 'w') as tarfl: + for name, uid, gid, typ, contents in items: + t = tarfile.TarInfo(name) + t.uid = uid + t.gid = gid + t.uname = 'root' + t.gname = 'root' + t.type = typ + tarfl.addfile(t, contents) + + # return the full pathname to the tar file + return tmpname + + @staticmethod + @contextmanager + def _setup_test(mock_geteuid): + mock_geteuid.return_value = 0 # lie and say we're root + fname = 'numeric-owner-testfile' + dirname = 'dir' + + # the names we want stored in the tarfile + filename_1 = fname + dirname_1 = dirname + filename_2 = os.path.join(dirname, fname) + + # create the tarfile with the contents we're after + tar_filename = NumericOwnerTest._make_test_archive(filename_1, + dirname_1, + filename_2) + + # open the tarfile for reading. yield it and the names of the items + # we stored into the file + with tarfile.open(tar_filename) as tarfl: + yield tarfl, filename_1, dirname_1, filename_2 + + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, + filename_2): + tarfl.extract(filename_1, TEMPDIR, numeric_owner=True) + tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + f_filename_2 = os.path.join(TEMPDIR, filename_2) + + mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98), + unittest.mock.call(f_filename_2, 88, 87), + ], + any_order=True) + + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1, + filename_2): + tarfl.extractall(TEMPDIR, numeric_owner=True) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + f_dirname_1 = os.path.join(TEMPDIR, dirname_1) + f_filename_2 = os.path.join(TEMPDIR, filename_2) + + mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98), + unittest.mock.call(f_dirname_1, 77, 76), + unittest.mock.call(f_filename_2, 88, 87), + ], + any_order=True) + + # this test requires that uid=0 and gid=0 really be named 'root'. that's + # because the uname and gname in the test file are 'root', and extract() + # will look them up using pwd and grp to find their uid and gid. + @unittest.skipUnless(root_is_uid_gid_0(), + 'uid=0,gid=0 must be named "root"') + @unittest.mock.patch('os.chown') + @unittest.mock.patch('os.chmod') + @unittest.mock.patch('os.geteuid') + def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod, + mock_chown): + with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _): + tarfl.extract(filename_1, TEMPDIR, numeric_owner=False) + + # convert to filesystem paths + f_filename_1 = os.path.join(TEMPDIR, filename_1) + + mock_chown.assert_called_with(f_filename_1, 0, 0) + + def setUpModule(): support.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/ACKS b/Misc/ACKS --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1456,6 +1456,7 @@ Pauli Virtanen Frank Visser Johannes Vogel +Michael Vogt Radu Voicilas Alex Volkov Martijn Vries