# HG changeset patch # Parent a14012352f65731e697d2f9f813df9e35b315168 diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -122,6 +122,10 @@ | | writing. | +-------------+--------------------------------------------+ +.. function:: safe_open(name=None, mode='r', fileobj=None, bufsize=10240, \*\*kwargs) + + Return a :class:`SafeTarFile` object for the pathname *name*. For detailed + information, see :ref:`safetarfile-objects`. .. class:: TarFile @@ -129,6 +133,13 @@ better use :func:`tarfile.open` instead. See :ref:`tarfile-objects`. +.. class:: SafeTarFile + + Class for reading and writing tar archives in a safe manner. Do not use + this class directly, better use :func:`tarfile.safe_open` instead. See + :ref:`safetarfile-objects`. + + .. function:: is_tarfile(name) Return :const:`True` if *name* is a tar archive file, that the :mod:`tarfile` @@ -172,6 +183,24 @@ Is raised by :meth:`TarInfo.frombuf` if the buffer it gets is invalid. +.. exception:: SecurityError + + Is raised by :class:`SafeTarFile` during iteration if it encounters a bad + archive member. The member's :class:`TarInfo` object is stored in the + :attr:`tarinfo` attribute of the exception, the :attr:`warning` attribute + contains one of the constants described in the first part of + :ref:`safetarfile-configuration`. + + +.. exception:: LimitError + + Is raised by :class:`SafeTarFile` if one of the user-defined limits is + exceeded. The exception's :attr:`warning` attribute contains one of the + constants described in the second part of :ref:`safetarfile-configuration`. + It is a subclass of :exc:`SecurityError`, its :attr:`tarinfo` attribute is + always :const:`None`. + + Each of the following constants defines a tar archive format that the :mod:`tarfile` module is able to create. See section :ref:`tar-formats` for @@ -350,7 +379,8 @@ Never extract archives from untrusted sources without prior inspection. It is possible that files are created outside of *path*, e.g. members that have absolute filenames starting with ``"/"`` or filenames with two - dots ``".."``. + dots ``".."``. Use :class:`SafeTarFile` for processing archives in a safe + manner. .. method:: TarFile.extract(member, path="", set_attrs=True) @@ -439,6 +469,141 @@ +.. _safetarfile-objects: + +SafeTarFile Objects +------------------- + +In general, it is no good idea to extract tar archives from sources you do not +completely trust. Archives that were created carelessly or maliciously may +contain file system objects in configurations that pose a variety of risks to +the system if they are extracted, for example overwriting existing files in +unanticipated locations. See the warning for :meth:`TarFile.extractall`. + +The :class:`SafeTarFile` class is a replacement for the :class:`TarFile` class +that can be used identically but tries to safeguard against a number of +unwanted side-effects. :class:`SafeTarFile` does this by identifying bad +archives and preventing the bad parts from being extracted. The default +behaviour of the :class:`SafeTarFile` class is to raise a :exc:`SecurityError` +exception in case of a bad archive member or a :exc:`LimitError` in case of an +exceeded limit. + +.. note:: + + There is no additional benefit in using :class:`SafeTarFile` for the + creation of tar archives. + +.. versionadded:: 3.5 + Added the :class:`SafeTarFile` class. + +.. class:: SafeTarFile(..., ignore_warnings=None, max_files=100000, max_total=1073741824) + + :class:`SafeTarFile` offers a few additional keyword arguments to the + arguments it has in common with the :class:`TarFile` class: + + *ignore_warnings* takes a list of constants one for each warning that + you like to ignore, by default no warnings are ignored. See the first part + of :ref:`safetarfile-configuration` for the constants. + + *max_files* is the maximum allowed number of files stored in the tar + archive, default is ``100000``. To disable the limit, pass :const:`None` or + ``0``. + + *max_total* is the maximum allowed size in bytes that all files together may + occupy when extracted. This defaults to 1 GiB. To disable the limit, pass + :const:`None` or ``0``. + +.. method:: SafeTarFile.analyze() + + Check the archive for possible issues, and generate a 2-tuple for each + member consisting of the member's :class:`TarInfo` object and a :class:`set` + that is either empty (good) or contains one or more warnings described in + :ref:`safetarfile-configuration` (bad). No :exc:`SecurityError` exceptions + are raised. If a limit is exceeded a :exc:`LimitError` is raised. + +.. method:: SafeTarFile.filter() + + Return a generator that only produces :class:`TarInfo` objects that are not + marked as bad, e.g. to restore the good parts of an archive. However, if a + limit is exceeded a :exc:`LimitError` is raised. + +.. method:: SafeTarFile.is_safe() + + Analyze the archive and return :const:`True` if there were no issues found + and it should be safe to extract the archive to the file system. Neither + :exc:`SecurityError` nor :exc:`LimitError` will be raised. + + + +.. _safetarfile-configuration: + +SafeTarFile configuration +~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are two different types of checks built into :class:`SafeTarFile`. The +first type takes care of archive members whose configuration poses a risk to +the system when they are extracted. Each of these checks can be switched off +by passing a list of the following constants as the *ignore_warnings* argument +to the :class:`SafeTarFile` constructor. These constants are also stored in +the :attr:`warning` attribute of a :exc:`SecurityError`. + +.. data:: WARN_ABSOLUTE_NAME + + An absolute pathname (names starting with a ``"/"``). + +.. data:: WARN_ABSOLUTE_NAME + + An absolute pathname (names starting with a ``"/"``). + +.. data:: WARN_RELATIVE_NAME + + A relative pathname (names starting with ``".."``) that breaks out of the + destination directory. + +.. data:: WARN_DUPLICATE_NAME + + A duplicate pathname. + +.. data:: WARN_ABSOLUTE_LINKNAME + + An absolute linkname. + +.. data:: WARN_RELATIVE_LINKNAME + + A relative linkname that breaks out of the destination directory. + +.. data:: WARN_SETUID_SET + + A regular file with a set-user-id permission bit set. + +.. data:: WARN_SETGID_SET + + A regular file with a set-group-id permission bit set. + +.. data:: WARN_CHARACTER_DEVICE + + A character device node. + +.. data:: WARN_BLOCK_DEVICE + + A block device node. + +The second type of check makes sure that the archive complies to a number of +user-defined limits, e.g. to prevent denial-of-service scenarios by excessive +use of memory or disk space. These limits can be configured using the keyword +arguments exclusive to the :class:`SafeTarFile` constructor. The following +constants are stored in the :attr:`warning` attribute of a :exc:`LimitError`. + +.. data:: LIMIT_MAX_FILES + + Maximum allowed number of files exceeded. + +.. data:: LIMIT_MAX_SIZE + + Maximum allowed total size of unpacked contents exceeded. + + + .. _tarinfo-objects: TarInfo Objects @@ -720,6 +885,21 @@ tar.add("foo", filter=reset) tar.close() +How to safely extract a tar archive from an untrusted source:: + + import tarfile + + with tarfile.safe_open("sample.tar", ignore_warnings={tarfile.WARN_DUPLICATE_NAME}) as tar: + # We don't care about duplicate archive members. + if not tar.is_safe(): + print("sample.tar has the following issues:") + for tarinfo, warnings in tar.analyze(): + print(tarinfo.name, ",".join(warnings)) + print("extracting the good parts") + tar.extractall(members=tar.filter()) + else: + tar.extractall() + .. _tar-formats: diff --git a/Lib/tarfile.py b/Lib/tarfile.py --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -137,6 +137,20 @@ "size": int } +# SafeTarFile-related string constants. +WARN_ABSOLUTE_NAME = "absolute name" +WARN_RELATIVE_NAME = "relative name" +WARN_DUPLICATE_NAME = "duplicate name" +WARN_ABSOLUTE_LINKNAME = "absolute linkname" +WARN_RELATIVE_LINKNAME = "relative linkname" +WARN_SETUID_SET = "setuid set" +WARN_SETGID_SET = "setgid set" +WARN_CHARACTER_DEVICE = "character device" +WARN_BLOCK_DEVICE = "block device" + +LIMIT_MAX_FILES = "file limit exceeded" +LIMIT_MAX_SIZE = "space limit exceeded" + #--------------------------------------------------------- # initialization #--------------------------------------------------------- @@ -295,6 +309,19 @@ class SubsequentHeaderError(HeaderError): """Exception for missing and invalid extended headers.""" pass +class SecurityError(TarError): + """Exception for potentially dangerous contents.""" + def __init__(self, tarinfo, warning): + self.tarinfo = tarinfo + self.warning = warning + def __str__(self): + return "%s: %s" % (self.tarinfo, self.warning) +class LimitError(SecurityError): + """Exception for an exceeded limit.""" + def __init__(self, warning): + super().__init__(None, warning) + def __str__(self): + return self.warning #--------------------------- # internal stream interface @@ -2406,6 +2433,124 @@ self.index += 1 return tarinfo + +class SafeTarFile(TarFile): + """A subclass of TarFile that safeguards against malicious data. + """ + + def __init__(self, *args, ignore_warnings=None, + max_files=100000, max_total=1024**3, **kwargs): + super().__init__(*args, **kwargs) + + if ignore_warnings: + self.ignore_warnings = set(ignore_warnings) + else: + self.ignore_warnings = set() + + self.max_files = max_files + self.max_total = max_total + + def __iter__(self): + """Safe iterator over the TarFile, that raises a SecurityError + exception on the first warning. + """ + for tarinfo, warnings in self.analyze(): + if warnings: + raise SecurityError(tarinfo, warnings.pop()) + yield tarinfo + + def analyze(self): + """Generate a list of (TarInfo, warnings) tuples. + """ + self.names = set() + self.total = 0 + + for tarinfo in super().__iter__(): + warnings = set(self.check_member(tarinfo)) + yield tarinfo, warnings - self.ignore_warnings + + def filter(self): + """Generate a list of good TarInfo objects. + """ + for tarinfo, warnings in self.analyze(): + if warnings: + continue + yield tarinfo + + def is_safe(self): + """Return True if the archive should be safe to extract. + """ + try: + for tarinfo, warnings in self.analyze(): + if warnings: + return False + else: + return True + + except LimitError: + return False + + def check_member(self, tarinfo): + """Check a single TarInfo object for problems. Override this in a + subclass if you want to add more checks. + """ + if self.max_files and len(self.members) == self.max_files: + raise LimitError(LIMIT_MAX_FILES) + + self.total += tarinfo.size + if self.max_total and self.total > self.max_total: + raise LimitError(LIMIT_MAX_SIZE) + + yield from self.check_all(tarinfo) + if tarinfo.issym(): + yield from self.check_symlink(tarinfo) + elif tarinfo.islnk(): + yield from self.check_link(tarinfo) + elif tarinfo.ischr() and tarinfo.isblk(): + yield from self.check_device(tarinfo) + + def check_all(self, tarinfo): + if os.path.isabs(tarinfo.name): + yield WARN_ABSOLUTE_NAME + + name = os.path.normpath(tarinfo.name) + if name.startswith("../"): + yield WARN_RELATIVE_NAME + + if tarinfo.name in self.names: + yield WARN_DUPLICATE_NAME + else: + self.names.add(tarinfo.name) + + if tarinfo.isreg() and tarinfo.mode & stat.S_ISUID: + yield WARN_SETUID_SET + + if tarinfo.isreg() and tarinfo.mode & stat.S_ISGID: + yield WARN_SETGID_SET + + def check_symlink(self, tarinfo): + if os.path.isabs(tarinfo.linkname): + yield WARN_ABSOLUTE_LINKNAME + + linkname = os.path.join(os.path.dirname(tarinfo.name), tarinfo.linkname) + linkname = os.path.normpath(linkname) + if linkname.startswith("../"): + yield WARN_RELATIVE_LINKNAME + + def check_link(self, tarinfo): + if os.path.isabs(tarinfo.linkname): + yield WARN_ABSOLUTE_LINKNAME + + linkname = os.path.normpath(tarinfo.linkname) + if linkname.startswith("../"): + yield WARN_RELATIVE_LINKNAME + + def check_device(self, tarinfo): + if tarinfo.ischr(): + yield WARN_CHARACTER_DEVICE + elif tarinfo.isblk(): + yield WARN_BLOCK_DEVICE + #-------------------- # exported functions #-------------------- @@ -2422,6 +2567,7 @@ bltn_open = open open = TarFile.open +safe_open = SafeTarFile.open def main(): @@ -2447,7 +2593,7 @@ if args.test: src = args.test if is_tarfile(src): - with open(src, 'r') as tar: + with SafeTarFile.open(src, 'r') as tar: tar.getmembers() print(tar.getmembers(), file=sys.stderr) if args.verbose: @@ -2458,7 +2604,7 @@ elif args.list: src = args.list if is_tarfile(src): - with TarFile.open(src, 'r:*') as tf: + with SafeTarFile.open(src, 'r:*') as tf: tf.list(verbose=args.verbose) else: parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) @@ -2473,7 +2619,7 @@ parser.exit(1, parser.format_help()) if is_tarfile(src): - with TarFile.open(src, 'r:*') as tf: + with SafeTarFile.open(src, 'r:*') as tf: tf.extractall(path=curdir) if args.verbose: if curdir == '.': @@ -2504,7 +2650,7 @@ tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' tar_files = args.create - with TarFile.open(tar_name, tar_mode) as tf: + with SafeTarFile.open(tar_name, tar_mode) as tf: for file_name in tar_files: tf.add(file_name)