Index: Doc/library/tarfile.rst =================================================================== --- Doc/library/tarfile.rst (revision 57571) +++ Doc/library/tarfile.rst (working copy) @@ -168,6 +168,12 @@ :attr:`TarFile.errorlevel`\ ``== 2``. +.. exception:: SecurityError + + Is a subclass of :exc:`ExtractError` and is raised when insecure pathnames + are found on extraction. + + .. exception:: HeaderError Is raised by :meth:`frombuf` if the buffer it gets is invalid. @@ -327,16 +333,22 @@ available. -.. method:: TarFile.extractall([path[, members]]) +.. method:: TarFile.extractall([path[, members[, check_paths]]]) Extract all members from the archive to the current working directory or - directory *path*. If optional *members* is given, it must be a subset of the - list returned by :meth:`getmembers`. Directory information like owner, - modification time and permissions are set after all members have been extracted. - This is done to work around two problems: A directory's modification time is - reset each time a file is created in it. And, if a directory's permissions do - not allow writing, extracting files to it will fail. + directory *path*. If optional *members* is given, it must be an iterator of + :class:`TarInfo` objects (e.g. a subset of the list returned by + :meth:`getmembers`). If *check_paths* is :const:`True` (default), insecure + pathnames that are absolute or point to a destination outside of the + archive's scope are rejected. Depending on :attr:`TarFile.errorlevel` a + :exc:`SecurityError` is raised. + Directory information like owner, modification time and permissions are set + after all members have been extracted. This is done to work around two + problems: A directory's modification time is reset each time a file is + created in it. And, if a directory's permissions do not allow writing, + extracting files to it will fail. + .. versionadded:: 2.5 @@ -349,9 +361,8 @@ .. note:: - Because the :meth:`extract` method allows random access to a tar archive there - are some issues you must take care of yourself. See the description for - :meth:`extractall` above. + The :meth:`extract` method does not take care of several extraction issues. + In most cases you should consider using the :meth:`extractall` method. .. method:: TarFile.extractfile(member) Index: Lib/tarfile.py =================================================================== --- Lib/tarfile.py (revision 57571) +++ Lib/tarfile.py (working copy) @@ -340,6 +340,9 @@ class ExtractError(TarError): """General exception for extract errors.""" pass +class SecurityError(ExtractError): + """Exception for insecure pathnames.""" + pass class ReadError(TarError): """Exception for unreadble tar archives.""" pass @@ -2006,12 +2009,13 @@ self.members.append(tarinfo) - def extractall(self, path=".", members=None): + def extractall(self, path=".", members=None, check_paths=True): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). + list returned by getmembers(). If `check_paths' is True insecure + pathnames are not extracted. """ directories = [] @@ -2019,6 +2023,20 @@ members = self for tarinfo in members: + if check_paths: + try: + self._check_path(tarinfo.name) + if tarinfo.islnk(): + self._check_path(tarinfo.linkname) + if tarinfo.issym(): + self._check_path(os.path.join(tarinfo.name, tarinfo.linkname)) + except SecurityError, e: + if self.errorlevel > 1: + raise + else: + self._dbg(1, "tarfile: %s" % e) + continue + if tarinfo.isdir(): # Extract directory with a safe mode, so that # all files below can be extracted as well. @@ -2329,6 +2347,15 @@ #-------------------------------------------------------------------------- # Little helper methods: + def _check_path(self, path): + """Raise an SecurityError if `path' is an insecure pathname. + """ + path = normpath(path) + if path.startswith("/"): + raise SecurityError("found insecure absolute path %r" % path) + if path.startswith("../"): + raise SecurityError("found insecure relative path %r" % path) + def _getmember(self, name, tarinfo=None): """Find an archive member by name from bottom to top. If tarinfo is given, it is used as the starting point. Index: Lib/test/test_tarfile.py =================================================================== --- Lib/test/test_tarfile.py (revision 57571) +++ Lib/test/test_tarfile.py (working copy) @@ -1029,6 +1029,57 @@ tarinfo.tobuf(tarfile.PAX_FORMAT) +class SecurityTest(unittest.TestCase): + + class MockTarFile(tarfile.TarFile): + def _extract_member(self, tarinfo, path): + pass + + def setUp(self): + fobj = StringIO.StringIO() + self.tar = tarfile.open(fileobj=fobj, mode="w") + self.tar.addfile(tarfile.TarInfo("/foo")) + self.tar.addfile(tarfile.TarInfo("../../../foo")) + self.tar.addfile(tarfile.TarInfo("bar/../../foo")) + + names = ["bar1", "bar2", "bar3", "baz1", "baz2", "baz3"] + for tp in (tarfile.LNKTYPE, tarfile.SYMTYPE): + for target in ("/foo", "../../foo", "../foo"): + t = tarfile.TarInfo(names.pop(0)) + t.type = tp + t.linkname = target + self.tar.addfile(t) + self.tar.close() + + fobj.seek(0) + self.tar = self.MockTarFile(fileobj=fobj) + self.tar.errorlevel = 2 + + def tearDown(self): + self.tar.close() + + def _extract(self, name): + tarinfo = self.tar.getmember(name) + self.tar.extractall(members=[tarinfo]) + + def test_absolute_pathnames(self): + self.assertRaises(tarfile.ExtractError, self._extract, "/foo") + + def test_directory_traversal(self): + self.assertRaises(tarfile.ExtractError, self._extract, "../../../foo") + self.assertRaises(tarfile.ExtractError, self._extract, "bar/../../foo") + + def test_absolute_link(self): + self.assertRaises(tarfile.ExtractError, self._extract, "bar1") + self.assertRaises(tarfile.ExtractError, self._extract, "bar2") + self.assertRaises(tarfile.ExtractError, self._extract, "bar3") + + def test_absolute_symlink(self): + self.assertRaises(tarfile.ExtractError, self._extract, "baz1") + self.assertRaises(tarfile.ExtractError, self._extract, "baz2") + self._extract("baz3") + + class GzipMiscReadTest(MiscReadTest): tarname = gzipname mode = "r:gz" @@ -1079,6 +1130,7 @@ PaxUnicodeTest, AppendTest, LimitsTest, + SecurityTest, ] if hasattr(os, "link"):