diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -367,7 +367,7 @@ available. -.. method:: TarFile.extractall(path=".", members=None) +.. method:: TarFile.extractall(path=".", members=None, numeric_owner=False) Extract all members from the archive to the current working directory or directory *path*. If optional *members* is given, it must be a subset of the @@ -377,6 +377,9 @@ reset each time a file is created in it. And, if a directory's permissions do not allow writing, extracting files to it will fail. + If *numeric_owner* is :const:`True`, only the (uid, gid) numbers are used to + set the owner/group for the extracted files. + .. warning:: Never extract archives from untrusted sources without prior inspection. @@ -385,13 +388,14 @@ dots ``".."``. -.. method:: TarFile.extract(member, path="", set_attrs=True) +.. method:: TarFile.extract(member, path="", set_attrs=True, numeric_owner=False) Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* may be a filename or a :class:`TarInfo` object. You can specify a different directory using *path*. File attributes (owner, mtime, mode) are set unless - *set_attrs* is false. + *set_attrs* is false. If *numeric_owner* is :const:`True`, only the (uid, gid) + numbers are used to set the owner/group for the extracted files. .. note:: @@ -827,4 +831,3 @@ because all the metadata is stored using *UTF-8*. *encoding* is only used in the rare cases when binary pax headers are decoded or when strings with surrogate characters are stored. - diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1972,12 +1972,13 @@ self.members.append(tarinfo) - def extractall(self, path=".", members=None): + def extractall(self, path=".", members=None, numeric_owner=False): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). + list returned by getmembers(). If `numeric_owner` is True, only + the numbers for user/group names are used and not the names. """ directories = [] @@ -1991,7 +1992,8 @@ tarinfo = copy.copy(tarinfo) tarinfo.mode = 0o700 # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. directories.sort(key=lambda a: a.name) @@ -2001,7 +2003,7 @@ for tarinfo in directories: dirpath = os.path.join(path, tarinfo.name) try: - self.chown(tarinfo, dirpath) + self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) self.utime(tarinfo, dirpath) self.chmod(tarinfo, dirpath) except ExtractError as e: @@ -2010,12 +2012,14 @@ else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True): + def extract(self, member, path="", set_attrs=True, numeric_owner=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is True, only the numbers for user/group names are used and not + the names. """ self._check("r") @@ -2030,7 +2034,8 @@ try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) + set_attrs=set_attrs, + numeric_owner=numeric_owner) except OSError as e: if self.errorlevel > 0: raise @@ -2076,7 +2081,8 @@ # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath, set_attrs=True): + def _extract_member(self, tarinfo, targetpath, set_attrs=True, + numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2114,7 +2120,7 @@ self.makefile(tarinfo, targetpath) if set_attrs: - self.chown(tarinfo, targetpath) + self.chown(tarinfo, targetpath, numeric_owner) if not tarinfo.issym(): self.chmod(tarinfo, targetpath) self.utime(tarinfo, targetpath) @@ -2203,19 +2209,24 @@ except KeyError: raise ExtractError("unable to resolve link inside archive") - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. + def chown(self, tarinfo, targetpath, numeric_owner=False): + """Set owner of targetpath according to tarinfo. If numeric_owner + is True, use .gid/.uid instead of .gname/.uname. """ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: + if numeric_owner: g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: u = tarinfo.uid + else: + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4,6 +4,7 @@ from hashlib import md5 import unittest +import unittest.mock import tarfile from test import support, script_helper @@ -2264,6 +2265,92 @@ self._test_partial_input("r:bz2") +def root_is_uid_gid_0(): + try: + import pwd, grp + except ImportError: + return False + if pwd.getpwuid(0)[0] != "root": + return False + if grp.getgrgid(0)[0] != "root": + return False + return True + + +@unittest.skipUnless(root_is_uid_gid_0(), "uid=0,gid=0 must be named 'root'") +class NumericOwnerTest(unittest.TestCase): + + def _make_test_archive(self, fname, dirname): + fobj = io.BytesIO(b"content") + + # ceate a tar file with a file, a directory, and a file within that directory + with tarfile.open(tmpname, "w") as w: + t = tarfile.TarInfo(fname) + t.uid = 99 + t.gid = 98 + t.uname = 'root' + t.gname = 'root' + w.addfile(t, fobj) + + t = tarfile.TarInfo(dirname + '/') + t.uid = 77 + t.gid = 76 + t.uname = 'root' + t.gname = 'root' + t.type = tarfile.DIRTYPE + w.addfile(t) + + t = tarfile.TarInfo(dirname + '/' + fname) + t.uid = 88 + t.gid = 87 + t.uname = 'root' + t.gname = 'root' + w.addfile(t, fobj) + return tmpname + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extract_with_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = 'numeric-owner-testfile' + dirname = 'dir' + testtar = self._make_test_archive(fname, dirname) + with tarfile.open(testtar) as r: + r.extract(fname, TEMPDIR, numeric_owner=True) + r.extract(dirname + '/' + fname , TEMPDIR, numeric_owner=True) + mock_chown.assert_has_calls([unittest.mock.call(os.path.join(TEMPDIR, fname), 99, 98), + unittest.mock.call(os.path.join(TEMPDIR, dirname, fname), 88, 87), + ], + any_order=True) + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = 'numeric-owner-testfile' + dirname = 'dir' + testtar = self._make_test_archive(fname, dirname) + with tarfile.open(testtar) as r: + r.extractall(TEMPDIR, numeric_owner=True) + targetpath = os.path.join(TEMPDIR, fname) + mock_chown.assert_has_calls([unittest.mock.call(os.path.join(TEMPDIR, fname), 99, 98), + unittest.mock.call(os.path.join(TEMPDIR, dirname), 77, 76), + unittest.mock.call(os.path.join(TEMPDIR, dirname, fname), 88, 87), + ], + any_order=True) + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extract_without_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = 'numeric-owner-testfile' + dirname = 'dir' + testtar = self._make_test_archive(fname, dirname) + with tarfile.open(testtar) as r: + r.extract(fname, TEMPDIR, numeric_owner=False) + mock_chown.assert_called_with(os.path.join(TEMPDIR, fname), 0, 0) + + def setUpModule(): support.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/ACKS b/Misc/ACKS --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1451,6 +1451,7 @@ Pauli Virtanen Frank Visser Johannes Vogel +Michael Vogt Radu Voicilas Alex Volkov Martijn Vries