Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(30244)

Delta Between Two Patch Sets: Lib/tarfile.py

Issue 23228: Crashes when tarfile contains a symlink and unpack directory contain it too
Left Patch Set: Created 4 years, 8 months ago
Right Patch Set: Created 3 years, 4 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | Lib/test/test_tarfile.py » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 #------------------------------------------------------------------- 2 #-------------------------------------------------------------------
3 # tarfile.py 3 # tarfile.py
4 #------------------------------------------------------------------- 4 #-------------------------------------------------------------------
5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de> 5 # Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
6 # All rights reserved. 6 # All rights reserved.
7 # 7 #
8 # Permission is hereby granted, free of charge, to any person 8 # Permission is hereby granted, free of charge, to any person
9 # obtaining a copy of this software and associated documentation 9 # obtaining a copy of this software and associated documentation
10 # files (the "Software"), to deal in the Software without 10 # files (the "Software"), to deal in the Software without
(...skipping 20 matching lines...) Expand all
31 31
32 version = "0.9.0" 32 version = "0.9.0"
33 __author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" 33 __author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
34 __date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" 34 __date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35 __cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" 35 __cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
36 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." 36 __credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
37 37
38 #--------- 38 #---------
39 # Imports 39 # Imports
40 #--------- 40 #---------
41 from builtins import open as bltn_open
41 import sys 42 import sys
42 import os 43 import os
43 import io 44 import io
44 import shutil 45 import shutil
45 import stat 46 import stat
46 import time 47 import time
47 import struct 48 import struct
48 import copy 49 import copy
49 import re 50 import re
50 51
51 try: 52 try:
52 import grp, pwd 53 import grp, pwd
53 except ImportError: 54 except ImportError:
54 grp = pwd = None 55 grp = pwd = None
55 56
56 # os.symlink on Windows prior to 6.0 raises NotImplementedError 57 # os.symlink on Windows prior to 6.0 raises NotImplementedError
57 symlink_exception = (AttributeError, NotImplementedError) 58 symlink_exception = (AttributeError, NotImplementedError, OSError)
58 try:
59 # OSError (winerror=1314) will be raised if the caller does not hold the
60 # SeCreateSymbolicLinkPrivilege privilege
61 symlink_exception += (OSError,)
62 except NameError:
63 pass
64 59
65 # from tarfile import * 60 # from tarfile import *
66 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] 61 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
62 "CompressionError", "StreamError", "ExtractError", "HeaderError",
63 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
64 "DEFAULT_FORMAT", "open"]
67 65
68 #--------------------------------------------------------- 66 #---------------------------------------------------------
69 # tar constants 67 # tar constants
70 #--------------------------------------------------------- 68 #---------------------------------------------------------
71 NUL = b"\0" # the null character 69 NUL = b"\0" # the null character
72 BLOCKSIZE = 512 # length of processing blocks 70 BLOCKSIZE = 512 # length of processing blocks
73 RECORDSIZE = BLOCKSIZE * 20 # length of records 71 RECORDSIZE = BLOCKSIZE * 20 # length of records
74 GNU_MAGIC = b"ustar \0" # magic gnu tar string 72 GNU_MAGIC = b"ustar \0" # magic gnu tar string
75 POSIX_MAGIC = b"ustar\x0000" # magic posix tar string 73 POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
76 74
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 # itn() below. 168 # itn() below.
171 if s[0] in (0o200, 0o377): 169 if s[0] in (0o200, 0o377):
172 n = 0 170 n = 0
173 for i in range(len(s) - 1): 171 for i in range(len(s) - 1):
174 n <<= 8 172 n <<= 8
175 n += s[i + 1] 173 n += s[i + 1]
176 if s[0] == 0o377: 174 if s[0] == 0o377:
177 n = -(256 ** (len(s) - 1) - n) 175 n = -(256 ** (len(s) - 1) - n)
178 else: 176 else:
179 try: 177 try:
180 n = int(nts(s, "ascii", "strict") or "0", 8) 178 s = nts(s, "ascii", "strict")
179 n = int(s.strip() or "0", 8)
181 except ValueError: 180 except ValueError:
182 raise InvalidHeaderError("invalid header") 181 raise InvalidHeaderError("invalid header")
183 return n 182 return n
184 183
185 def itn(n, digits=8, format=DEFAULT_FORMAT): 184 def itn(n, digits=8, format=DEFAULT_FORMAT):
186 """Convert a python number to a number field. 185 """Convert a python number to a number field.
187 """ 186 """
188 # POSIX 1003.1-1988 requires numbers to be encoded as a string of 187 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
189 # octal digits followed by a null-byte, this allows values up to 188 # octal digits followed by a null-byte, this allows values up to
190 # (8**(digits-1))-1. GNU tar allows storing numbers greater than 189 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
(...skipping 25 matching lines...) Expand all
216 it was filled with spaces. According to the GNU tar sources, 215 it was filled with spaces. According to the GNU tar sources,
217 some tars (Sun and NeXT) calculate chksum with signed char, 216 some tars (Sun and NeXT) calculate chksum with signed char,
218 which will be different if there are chars in the buffer with 217 which will be different if there are chars in the buffer with
219 the high bit set. So we calculate two checksums, unsigned and 218 the high bit set. So we calculate two checksums, unsigned and
220 signed. 219 signed.
221 """ 220 """
222 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf)) 221 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
223 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf)) 222 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
224 return unsigned_chksum, signed_chksum 223 return unsigned_chksum, signed_chksum
225 224
226 def copyfileobj(src, dst, length=None): 225 def copyfileobj(src, dst, length=None, exception=OSError):
227 """Copy length bytes from fileobj src to fileobj dst. 226 """Copy length bytes from fileobj src to fileobj dst.
228 If length is None, copy the entire content. 227 If length is None, copy the entire content.
229 """ 228 """
230 if length == 0: 229 if length == 0:
231 return 230 return
232 if length is None: 231 if length is None:
233 shutil.copyfileobj(src, dst) 232 shutil.copyfileobj(src, dst)
234 return 233 return
235 234
236 BUFSIZE = 16 * 1024 235 BUFSIZE = 16 * 1024
237 blocks, remainder = divmod(length, BUFSIZE) 236 blocks, remainder = divmod(length, BUFSIZE)
238 for b in range(blocks): 237 for b in range(blocks):
239 buf = src.read(BUFSIZE) 238 buf = src.read(BUFSIZE)
240 if len(buf) < BUFSIZE: 239 if len(buf) < BUFSIZE:
241 raise OSError("end of file reached") 240 raise exception("unexpected end of data")
242 dst.write(buf) 241 dst.write(buf)
243 242
244 if remainder != 0: 243 if remainder != 0:
245 buf = src.read(remainder) 244 buf = src.read(remainder)
246 if len(buf) < remainder: 245 if len(buf) < remainder:
247 raise OSError("end of file reached") 246 raise exception("unexpected end of data")
248 dst.write(buf) 247 dst.write(buf)
249 return 248 return
250 249
251 def filemode(mode): 250 def filemode(mode):
252 """Deprecated in this location; use stat.filemode.""" 251 """Deprecated in this location; use stat.filemode."""
253 import warnings 252 import warnings
254 warnings.warn("deprecated in favor of stat.filemode", 253 warnings.warn("deprecated in favor of stat.filemode",
255 DeprecationWarning, 2) 254 DeprecationWarning, 2)
256 return stat.filemode(mode) 255 return stat.filemode(mode)
257 256
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
441 self.fileobj.write(self.buf[:self.bufsize]) 440 self.fileobj.write(self.buf[:self.bufsize])
442 self.buf = self.buf[self.bufsize:] 441 self.buf = self.buf[self.bufsize:]
443 442
444 def close(self): 443 def close(self):
445 """Close the _Stream object. No operation should be 444 """Close the _Stream object. No operation should be
446 done on it afterwards. 445 done on it afterwards.
447 """ 446 """
448 if self.closed: 447 if self.closed:
449 return 448 return
450 449
451 if self.mode == "w" and self.comptype != "tar":
452 self.buf += self.cmp.flush()
453
454 if self.mode == "w" and self.buf:
455 self.fileobj.write(self.buf)
456 self.buf = b""
457 if self.comptype == "gz":
458 # The native zlib crc is an unsigned 32-bit integer, but
459 # the Python wrapper implicitly casts that to a signed C
460 # long. So, on a 32-bit box self.crc may "look negative",
461 # while the same crc on a 64-bit box may "look positive".
462 # To avoid irksome warnings from the `struct` module, force
463 # it to look positive on all boxes.
464 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
465 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
466
467 if not self._extfileobj:
468 self.fileobj.close()
469
470 self.closed = True 450 self.closed = True
451 try:
452 if self.mode == "w" and self.comptype != "tar":
453 self.buf += self.cmp.flush()
454
455 if self.mode == "w" and self.buf:
456 self.fileobj.write(self.buf)
457 self.buf = b""
458 if self.comptype == "gz":
459 self.fileobj.write(struct.pack("<L", self.crc))
460 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
461 finally:
462 if not self._extfileobj:
463 self.fileobj.close()
471 464
472 def _init_read_gz(self): 465 def _init_read_gz(self):
473 """Initialize for reading a gzip compressed fileobj. 466 """Initialize for reading a gzip compressed fileobj.
474 """ 467 """
475 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 468 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
476 self.dbuf = b"" 469 self.dbuf = b""
477 470
478 # taken from gzip.GzipFile with some alterations 471 # taken from gzip.GzipFile with some alterations
479 if self.__read(2) != b"\037\213": 472 if self.__read(2) != b"\037\213":
480 raise ReadError("not a gzip file") 473 raise ReadError("not a gzip file")
(...skipping 200 matching lines...) Expand 10 before | Expand all | Expand 10 after
681 data, start, stop, offset = self.map[self.map_index] 674 data, start, stop, offset = self.map[self.map_index]
682 if start <= self.position < stop: 675 if start <= self.position < stop:
683 break 676 break
684 else: 677 else:
685 self.map_index += 1 678 self.map_index += 1
686 if self.map_index == len(self.map): 679 if self.map_index == len(self.map):
687 self.map_index = 0 680 self.map_index = 0
688 length = min(size, stop - self.position) 681 length = min(size, stop - self.position)
689 if data: 682 if data:
690 self.fileobj.seek(offset + (self.position - start)) 683 self.fileobj.seek(offset + (self.position - start))
691 buf += self.fileobj.read(length) 684 b = self.fileobj.read(length)
685 if len(b) != length:
686 raise ReadError("unexpected end of data")
687 buf += b
692 else: 688 else:
693 buf += NUL * length 689 buf += NUL * length
694 size -= length 690 size -= length
695 self.position += length 691 self.position += length
696 return buf 692 return buf
697 693
698 def readinto(self, b): 694 def readinto(self, b):
699 buf = self.read(len(b)) 695 buf = self.read(len(b))
700 b[:len(buf)] = buf 696 b[:len(buf)] = buf
701 return len(buf) 697 return len(buf)
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
806 elif format == PAX_FORMAT: 802 elif format == PAX_FORMAT:
807 return self.create_pax_header(info, encoding) 803 return self.create_pax_header(info, encoding)
808 else: 804 else:
809 raise ValueError("invalid format") 805 raise ValueError("invalid format")
810 806
811 def create_ustar_header(self, info, encoding, errors): 807 def create_ustar_header(self, info, encoding, errors):
812 """Return the object as a ustar header block. 808 """Return the object as a ustar header block.
813 """ 809 """
814 info["magic"] = POSIX_MAGIC 810 info["magic"] = POSIX_MAGIC
815 811
816 if len(info["linkname"]) > LENGTH_LINK: 812 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
817 raise ValueError("linkname is too long") 813 raise ValueError("linkname is too long")
818 814
819 if len(info["name"]) > LENGTH_NAME: 815 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
820 info["prefix"], info["name"] = self._posix_split_name(info["name"]) 816 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
821 817
822 return self._create_header(info, USTAR_FORMAT, encoding, errors) 818 return self._create_header(info, USTAR_FORMAT, encoding, errors)
823 819
824 def create_gnu_header(self, info, encoding, errors): 820 def create_gnu_header(self, info, encoding, errors):
825 """Return the object as a GNU header block sequence. 821 """Return the object as a GNU header block sequence.
826 """ 822 """
827 info["magic"] = GNU_MAGIC 823 info["magic"] = GNU_MAGIC
828 824
829 buf = b"" 825 buf = b""
830 if len(info["linkname"]) > LENGTH_LINK: 826 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
831 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLI NK, encoding, errors) 827 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLI NK, encoding, errors)
832 828
833 if len(info["name"]) > LENGTH_NAME: 829 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
834 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) 830 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
835 831
836 return buf + self._create_header(info, GNU_FORMAT, encoding, errors) 832 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
837 833
838 def create_pax_header(self, info, encoding): 834 def create_pax_header(self, info, encoding):
839 """Return the object as a ustar header block. If it cannot be 835 """Return the object as a ustar header block. If it cannot be
840 represented this way, prepend a pax extended header sequence 836 represented this way, prepend a pax extended header sequence
841 with supplement information. 837 with supplement information.
842 """ 838 """
843 info["magic"] = POSIX_MAGIC 839 info["magic"] = POSIX_MAGIC
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
883 buf = b"" 879 buf = b""
884 880
885 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") 881 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
886 882
887 @classmethod 883 @classmethod
888 def create_pax_global_header(cls, pax_headers): 884 def create_pax_global_header(cls, pax_headers):
889 """Return the object as a pax global header block sequence. 885 """Return the object as a pax global header block sequence.
890 """ 886 """
891 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8") 887 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
892 888
893 def _posix_split_name(self, name): 889 def _posix_split_name(self, name, encoding, errors):
894 """Split a name longer than 100 chars into a prefix 890 """Split a name longer than 100 chars into a prefix
895 and a name part. 891 and a name part.
896 """ 892 """
897 prefix = name[:LENGTH_PREFIX + 1] 893 components = name.split("/")
898 while prefix and prefix[-1] != "/": 894 for i in range(1, len(components)):
899 prefix = prefix[:-1] 895 prefix = "/".join(components[:i])
900 896 name = "/".join(components[i:])
901 name = name[len(prefix):] 897 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
902 prefix = prefix[:-1] 898 len(name.encode(encoding, errors)) <= LENGTH_NAME:
903 899 break
904 if not prefix or len(name) > LENGTH_NAME: 900 else:
905 raise ValueError("name is too long") 901 raise ValueError("name is too long")
902
906 return prefix, name 903 return prefix, name
907 904
908 @staticmethod 905 @staticmethod
909 def _create_header(info, format, encoding, errors): 906 def _create_header(info, format, encoding, errors):
910 """Return a header block. info is a dictionary with file 907 """Return a header block. info is a dictionary with file
911 information, format must be one of the *_FORMAT constants. 908 information, format must be one of the *_FORMAT constants.
912 """ 909 """
913 parts = [ 910 parts = [
914 stn(info.get("name", ""), 100, encoding, errors), 911 stn(info.get("name", ""), 100, encoding, errors),
915 itn(info.get("mode", 0) & 0o7777, 8, format), 912 itn(info.get("mode", 0) & 0o7777, 8, format),
(...skipping 486 matching lines...) Expand 10 before | Expand all | Expand 10 after
1402 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, 1399 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1403 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=N one): 1400 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=N one):
1404 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 1401 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1405 read from an existing archive, 'a' to append data to an existing 1402 read from an existing archive, 'a' to append data to an existing
1406 file or 'w' to create a new file overwriting an existing one. `mode' 1403 file or 'w' to create a new file overwriting an existing one. `mode'
1407 defaults to 'r'. 1404 defaults to 'r'.
1408 If `fileobj' is given, it is used for reading or writing data. If it 1405 If `fileobj' is given, it is used for reading or writing data. If it
1409 can be determined, `mode' is overridden by `fileobj's mode. 1406 can be determined, `mode' is overridden by `fileobj's mode.
1410 `fileobj' is not closed, when TarFile is closed. 1407 `fileobj' is not closed, when TarFile is closed.
1411 """ 1408 """
1412 modes = {"r": "rb", "a": "r+b", "w": "wb"} 1409 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
1413 if mode not in modes: 1410 if mode not in modes:
1414 raise ValueError("mode must be 'r', 'a' or 'w'") 1411 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1415 self.mode = mode 1412 self.mode = mode
1416 self._mode = modes[mode] 1413 self._mode = modes[mode]
1417 1414
1418 if not fileobj: 1415 if not fileobj:
1419 if self.mode == "a" and not os.path.exists(name): 1416 if self.mode == "a" and not os.path.exists(name):
1420 # Create nonexistent files in append mode. 1417 # Create nonexistent files in append mode.
1421 self.mode = "w" 1418 self.mode = "w"
1422 self._mode = "wb" 1419 self._mode = "wb"
1423 fileobj = bltn_open(name, self._mode) 1420 fileobj = bltn_open(name, self._mode)
1424 self._extfileobj = False 1421 self._extfileobj = False
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
1476 self.fileobj.seek(self.offset) 1473 self.fileobj.seek(self.offset)
1477 try: 1474 try:
1478 tarinfo = self.tarinfo.fromtarfile(self) 1475 tarinfo = self.tarinfo.fromtarfile(self)
1479 self.members.append(tarinfo) 1476 self.members.append(tarinfo)
1480 except EOFHeaderError: 1477 except EOFHeaderError:
1481 self.fileobj.seek(self.offset) 1478 self.fileobj.seek(self.offset)
1482 break 1479 break
1483 except HeaderError as e: 1480 except HeaderError as e:
1484 raise ReadError(str(e)) 1481 raise ReadError(str(e))
1485 1482
1486 if self.mode in "aw": 1483 if self.mode in ("a", "w", "x"):
1487 self._loaded = True 1484 self._loaded = True
1488 1485
1489 if self.pax_headers: 1486 if self.pax_headers:
1490 buf = self.tarinfo.create_pax_global_header(self.pax_headers .copy()) 1487 buf = self.tarinfo.create_pax_global_header(self.pax_headers .copy())
1491 self.fileobj.write(buf) 1488 self.fileobj.write(buf)
1492 self.offset += len(buf) 1489 self.offset += len(buf)
1493 except: 1490 except:
1494 if not self._extfileobj: 1491 if not self._extfileobj:
1495 self.fileobj.close() 1492 self.fileobj.close()
1496 self.closed = True 1493 self.closed = True
(...skipping 19 matching lines...) Expand all
1516 'r' or 'r:*' open for reading with transparent compression 1513 'r' or 'r:*' open for reading with transparent compression
1517 'r:' open for reading exclusively uncompressed 1514 'r:' open for reading exclusively uncompressed
1518 'r:gz' open for reading with gzip compression 1515 'r:gz' open for reading with gzip compression
1519 'r:bz2' open for reading with bzip2 compression 1516 'r:bz2' open for reading with bzip2 compression
1520 'r:xz' open for reading with lzma compression 1517 'r:xz' open for reading with lzma compression
1521 'a' or 'a:' open for appending, creating the file if necessary 1518 'a' or 'a:' open for appending, creating the file if necessary
1522 'w' or 'w:' open for writing without compression 1519 'w' or 'w:' open for writing without compression
1523 'w:gz' open for writing with gzip compression 1520 'w:gz' open for writing with gzip compression
1524 'w:bz2' open for writing with bzip2 compression 1521 'w:bz2' open for writing with bzip2 compression
1525 'w:xz' open for writing with lzma compression 1522 'w:xz' open for writing with lzma compression
1523
1524 'x' or 'x:' create a tarfile exclusively without compression, raise
1525 an exception if the file is already created
1526 'x:gz' create a gzip compressed tarfile, raise an exception
1527 if the file is already created
1528 'x:bz2' create a bzip2 compressed tarfile, raise an exception
1529 if the file is already created
1530 'x:xz' create an lzma compressed tarfile, raise an exception
1531 if the file is already created
1526 1532
1527 'r|*' open a stream of tar blocks with transparent compression 1533 'r|*' open a stream of tar blocks with transparent compression
1528 'r|' open an uncompressed stream of tar blocks for reading 1534 'r|' open an uncompressed stream of tar blocks for reading
1529 'r|gz' open a gzip compressed stream of tar blocks 1535 'r|gz' open a gzip compressed stream of tar blocks
1530 'r|bz2' open a bzip2 compressed stream of tar blocks 1536 'r|bz2' open a bzip2 compressed stream of tar blocks
1531 'r|xz' open an lzma compressed stream of tar blocks 1537 'r|xz' open an lzma compressed stream of tar blocks
1532 'w|' open an uncompressed stream for writing 1538 'w|' open an uncompressed stream for writing
1533 'w|gz' open a gzip compressed stream for writing 1539 'w|gz' open a gzip compressed stream for writing
1534 'w|bz2' open a bzip2 compressed stream for writing 1540 'w|bz2' open a bzip2 compressed stream for writing
1535 'w|xz' open an lzma compressed stream for writing 1541 'w|xz' open an lzma compressed stream for writing
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1575 1581
1576 stream = _Stream(name, filemode, comptype, fileobj, bufsize) 1582 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1577 try: 1583 try:
1578 t = cls(name, filemode, stream, **kwargs) 1584 t = cls(name, filemode, stream, **kwargs)
1579 except: 1585 except:
1580 stream.close() 1586 stream.close()
1581 raise 1587 raise
1582 t._extfileobj = False 1588 t._extfileobj = False
1583 return t 1589 return t
1584 1590
1585 elif mode in ("a", "w"): 1591 elif mode in ("a", "w", "x"):
1586 return cls.taropen(name, mode, fileobj, **kwargs) 1592 return cls.taropen(name, mode, fileobj, **kwargs)
1587 1593
1588 raise ValueError("undiscernible mode") 1594 raise ValueError("undiscernible mode")
1589 1595
1590 @classmethod 1596 @classmethod
1591 def taropen(cls, name, mode="r", fileobj=None, **kwargs): 1597 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1592 """Open uncompressed tar archive name for reading or writing. 1598 """Open uncompressed tar archive name for reading or writing.
1593 """ 1599 """
1594 if mode not in ("r", "a", "w"): 1600 if mode not in ("r", "a", "w", "x"):
1595 raise ValueError("mode must be 'r', 'a' or 'w'") 1601 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
1596 return cls(name, mode, fileobj, **kwargs) 1602 return cls(name, mode, fileobj, **kwargs)
1597 1603
1598 @classmethod 1604 @classmethod
1599 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1605 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1600 """Open gzip compressed tar archive name for reading or writing. 1606 """Open gzip compressed tar archive name for reading or writing.
1601 Appending is not allowed. 1607 Appending is not allowed.
1602 """ 1608 """
1603 if mode not in ("r", "w"): 1609 if mode not in ("r", "w", "x"):
1604 raise ValueError("mode must be 'r' or 'w'") 1610 raise ValueError("mode must be 'r', 'w' or 'x'")
1605 1611
1606 try: 1612 try:
1607 import gzip 1613 import gzip
1608 gzip.GzipFile 1614 gzip.GzipFile
1609 except (ImportError, AttributeError): 1615 except (ImportError, AttributeError):
1610 raise CompressionError("gzip module is not available") 1616 raise CompressionError("gzip module is not available")
1611 1617
1612 try: 1618 try:
1613 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) 1619 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
1614 except OSError: 1620 except OSError:
(...skipping 12 matching lines...) Expand all
1627 fileobj.close() 1633 fileobj.close()
1628 raise 1634 raise
1629 t._extfileobj = False 1635 t._extfileobj = False
1630 return t 1636 return t
1631 1637
1632 @classmethod 1638 @classmethod
1633 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): 1639 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1634 """Open bzip2 compressed tar archive name for reading or writing. 1640 """Open bzip2 compressed tar archive name for reading or writing.
1635 Appending is not allowed. 1641 Appending is not allowed.
1636 """ 1642 """
1637 if mode not in ("r", "w"): 1643 if mode not in ("r", "w", "x"):
1638 raise ValueError("mode must be 'r' or 'w'.") 1644 raise ValueError("mode must be 'r', 'w' or 'x'")
1639 1645
1640 try: 1646 try:
1641 import bz2 1647 import bz2
1642 except ImportError: 1648 except ImportError:
1643 raise CompressionError("bz2 module is not available") 1649 raise CompressionError("bz2 module is not available")
1644 1650
1645 fileobj = bz2.BZ2File(fileobj or name, mode, 1651 fileobj = bz2.BZ2File(fileobj or name, mode,
1646 compresslevel=compresslevel) 1652 compresslevel=compresslevel)
1647 1653
1648 try: 1654 try:
1649 t = cls.taropen(name, mode, fileobj, **kwargs) 1655 t = cls.taropen(name, mode, fileobj, **kwargs)
1650 except (OSError, EOFError): 1656 except (OSError, EOFError):
1651 fileobj.close() 1657 fileobj.close()
1652 if mode == 'r': 1658 if mode == 'r':
1653 raise ReadError("not a bzip2 file") 1659 raise ReadError("not a bzip2 file")
1654 raise 1660 raise
1655 except: 1661 except:
1656 fileobj.close() 1662 fileobj.close()
1657 raise 1663 raise
1658 t._extfileobj = False 1664 t._extfileobj = False
1659 return t 1665 return t
1660 1666
1661 @classmethod 1667 @classmethod
1662 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs): 1668 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
1663 """Open lzma compressed tar archive name for reading or writing. 1669 """Open lzma compressed tar archive name for reading or writing.
1664 Appending is not allowed. 1670 Appending is not allowed.
1665 """ 1671 """
1666 if mode not in ("r", "w"): 1672 if mode not in ("r", "w", "x"):
1667 raise ValueError("mode must be 'r' or 'w'") 1673 raise ValueError("mode must be 'r', 'w' or 'x'")
1668 1674
1669 try: 1675 try:
1670 import lzma 1676 import lzma
1671 except ImportError: 1677 except ImportError:
1672 raise CompressionError("lzma module is not available") 1678 raise CompressionError("lzma module is not available")
1673 1679
1674 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset) 1680 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
1675 1681
1676 try: 1682 try:
1677 t = cls.taropen(name, mode, fileobj, **kwargs) 1683 t = cls.taropen(name, mode, fileobj, **kwargs)
(...skipping 19 matching lines...) Expand all
1697 #-------------------------------------------------------------------------- 1703 #--------------------------------------------------------------------------
1698 # The public methods which TarFile provides: 1704 # The public methods which TarFile provides:
1699 1705
1700 def close(self): 1706 def close(self):
1701 """Close the TarFile. In write-mode, two finishing zero blocks are 1707 """Close the TarFile. In write-mode, two finishing zero blocks are
1702 appended to the archive. 1708 appended to the archive.
1703 """ 1709 """
1704 if self.closed: 1710 if self.closed:
1705 return 1711 return
1706 1712
1707 if self.mode in "aw":
1708 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1709 self.offset += (BLOCKSIZE * 2)
1710 # fill up the end with zero-blocks
1711 # (like option -b20 for tar does)
1712 blocks, remainder = divmod(self.offset, RECORDSIZE)
1713 if remainder > 0:
1714 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1715
1716 if not self._extfileobj:
1717 self.fileobj.close()
1718 self.closed = True 1713 self.closed = True
1714 try:
1715 if self.mode in ("a", "w", "x"):
1716 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1717 self.offset += (BLOCKSIZE * 2)
1718 # fill up the end with zero-blocks
1719 # (like option -b20 for tar does)
1720 blocks, remainder = divmod(self.offset, RECORDSIZE)
1721 if remainder > 0:
1722 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1723 finally:
1724 if not self._extfileobj:
1725 self.fileobj.close()
1719 1726
1720 def getmember(self, name): 1727 def getmember(self, name):
1721 """Return a TarInfo object for member `name'. If `name' can not be 1728 """Return a TarInfo object for member `name'. If `name' can not be
1722 found in the archive, KeyError is raised. If a member occurs more 1729 found in the archive, KeyError is raised. If a member occurs more
1723 than once in the archive, its last occurrence is assumed to be the 1730 than once in the archive, its last occurrence is assumed to be the
1724 most up-to-date version. 1731 most up-to-date version.
1725 """ 1732 """
1726 tarinfo = self._getmember(name) 1733 tarinfo = self._getmember(name)
1727 if tarinfo is None: 1734 if tarinfo is None:
1728 raise KeyError("filename %r not found" % name) 1735 raise KeyError("filename %r not found" % name)
1729 return tarinfo 1736 return tarinfo
1730 1737
1731 def getmembers(self): 1738 def getmembers(self):
1732 """Return the members of the archive as a list of TarInfo objects. The 1739 """Return the members of the archive as a list of TarInfo objects. The
1733 list has the same order as the members in the archive. 1740 list has the same order as the members in the archive.
1734 """ 1741 """
1735 self._check() 1742 self._check()
1736 if not self._loaded: # if we want to obtain a list of 1743 if not self._loaded: # if we want to obtain a list of
1737 self._load() # all members, we first have to 1744 self._load() # all members, we first have to
1738 # scan the whole archive. 1745 # scan the whole archive.
1739 return self.members 1746 return self.members
1740 1747
1741 def getnames(self): 1748 def getnames(self):
1742 """Return the members of the archive as a list of their names. It has 1749 """Return the members of the archive as a list of their names. It has
1743 the same order as the list returned by getmembers(). 1750 the same order as the list returned by getmembers().
1744 """ 1751 """
1745 return [tarinfo.name for tarinfo in self.getmembers()] 1752 return [tarinfo.name for tarinfo in self.getmembers()]
1746 1753
1747 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1754 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1748 """Create a TarInfo object for either the file `name' or the file 1755 """Create a TarInfo object from the result of os.stat or equivalent
1749 object `fileobj' (using os.fstat on its file descriptor). You can 1756 on an existing file. The file is either named by `name', or
1750 modify some of the TarInfo's attributes before you add it using 1757 specified as a file object `fileobj' with a file descriptor. If
1751 addfile(). If given, `arcname' specifies an alternative name for the 1758 given, `arcname' specifies an alternative name for the file in the
1752 file in the archive. 1759 archive, otherwise, the name is taken from the 'name' attribute of
1753 """ 1760 'fileobj', or the 'name' argument. The name should be a text
1754 self._check("aw") 1761 string.
1762 """
1763 self._check("awx")
1755 1764
1756 # When fileobj is given, replace name by 1765 # When fileobj is given, replace name by
1757 # fileobj's real name. 1766 # fileobj's real name.
1758 if fileobj is not None: 1767 if fileobj is not None:
1759 name = fileobj.name 1768 name = fileobj.name
1760 1769
1761 # Building the name of the member in the archive. 1770 # Building the name of the member in the archive.
1762 # Backward slashes are converted to forward slashes, 1771 # Backward slashes are converted to forward slashes,
1763 # Absolute paths are turned to relative paths. 1772 # Absolute paths are turned to relative paths.
1764 if arcname is None: 1773 if arcname is None:
1765 arcname = name 1774 arcname = name
1766 drv, arcname = os.path.splitdrive(arcname) 1775 drv, arcname = os.path.splitdrive(arcname)
1767 arcname = arcname.replace(os.sep, "/") 1776 arcname = arcname.replace(os.sep, "/")
1768 arcname = arcname.lstrip("/") 1777 arcname = arcname.lstrip("/")
1769 1778
1770 # Now, fill the TarInfo object with 1779 # Now, fill the TarInfo object with
1771 # information specific for the file. 1780 # information specific for the file.
1772 tarinfo = self.tarinfo() 1781 tarinfo = self.tarinfo()
1773 tarinfo.tarfile = self 1782 tarinfo.tarfile = self # Not needed
1774 1783
1775 # Use os.stat or os.lstat, depending on platform 1784 # Use os.stat or os.lstat, depending on platform
1776 # and if symlinks shall be resolved. 1785 # and if symlinks shall be resolved.
1777 if fileobj is None: 1786 if fileobj is None:
1778 if hasattr(os, "lstat") and not self.dereference: 1787 if hasattr(os, "lstat") and not self.dereference:
1779 statres = os.lstat(name) 1788 statres = os.lstat(name)
1780 else: 1789 else:
1781 statres = os.stat(name) 1790 statres = os.stat(name)
1782 else: 1791 else:
1783 statres = os.fstat(fileobj.fileno()) 1792 statres = os.fstat(fileobj.fileno())
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
1878 """Add the file `name' to the archive. `name' may be any type of file 1887 """Add the file `name' to the archive. `name' may be any type of file
1879 (directory, fifo, symbolic link, etc.). If given, `arcname' 1888 (directory, fifo, symbolic link, etc.). If given, `arcname'
1880 specifies an alternative name for the file in the archive. 1889 specifies an alternative name for the file in the archive.
1881 Directories are added recursively by default. This can be avoided by 1890 Directories are added recursively by default. This can be avoided by
1882 setting `recursive' to False. `exclude' is a function that should 1891 setting `recursive' to False. `exclude' is a function that should
1883 return True for each filename to be excluded. `filter' is a function 1892 return True for each filename to be excluded. `filter' is a function
1884 that expects a TarInfo object argument and returns the changed 1893 that expects a TarInfo object argument and returns the changed
1885 TarInfo object, if it returns None the TarInfo object will be 1894 TarInfo object, if it returns None the TarInfo object will be
1886 excluded from the archive. 1895 excluded from the archive.
1887 """ 1896 """
1888 self._check("aw") 1897 self._check("awx")
1889 1898
1890 if arcname is None: 1899 if arcname is None:
1891 arcname = name 1900 arcname = name
1892 1901
1893 # Exclude pathnames. 1902 # Exclude pathnames.
1894 if exclude is not None: 1903 if exclude is not None:
1895 import warnings 1904 import warnings
1896 warnings.warn("use the filter argument instead", 1905 warnings.warn("use the filter argument instead",
1897 DeprecationWarning, 2) 1906 DeprecationWarning, 2)
1898 if exclude(name): 1907 if exclude(name):
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
1930 if recursive: 1939 if recursive:
1931 for f in os.listdir(name): 1940 for f in os.listdir(name):
1932 self.add(os.path.join(name, f), os.path.join(arcname, f), 1941 self.add(os.path.join(name, f), os.path.join(arcname, f),
1933 recursive, exclude, filter=filter) 1942 recursive, exclude, filter=filter)
1934 1943
1935 else: 1944 else:
1936 self.addfile(tarinfo) 1945 self.addfile(tarinfo)
1937 1946
1938 def addfile(self, tarinfo, fileobj=None): 1947 def addfile(self, tarinfo, fileobj=None):
1939 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 1948 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1940 given, tarinfo.size bytes are read from it and added to the archive. 1949 given, it should be a binary file, and tarinfo.size bytes are read
1941 You can create TarInfo objects using gettarinfo(). 1950 from it and added to the archive. You can create TarInfo objects
1942 On Windows platforms, `fileobj' should always be opened with mode 1951 directly, or by using gettarinfo().
1943 'rb' to avoid irritation about the file size. 1952 """
1944 """ 1953 self._check("awx")
1945 self._check("aw")
1946 1954
1947 tarinfo = copy.copy(tarinfo) 1955 tarinfo = copy.copy(tarinfo)
1948 1956
1949 buf = tarinfo.tobuf(self.format, self.encoding, self.errors) 1957 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
1950 self.fileobj.write(buf) 1958 self.fileobj.write(buf)
1951 self.offset += len(buf) 1959 self.offset += len(buf)
1952 1960
1953 # If there's data to follow, append it. 1961 # If there's data to follow, append it.
1954 if fileobj is not None: 1962 if fileobj is not None:
1955 copyfileobj(fileobj, self.fileobj, tarinfo.size) 1963 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1956 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 1964 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1957 if remainder > 0: 1965 if remainder > 0:
1958 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 1966 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1959 blocks += 1 1967 blocks += 1
1960 self.offset += blocks * BLOCKSIZE 1968 self.offset += blocks * BLOCKSIZE
1961 1969
1962 self.members.append(tarinfo) 1970 self.members.append(tarinfo)
1963 1971
1964 def extractall(self, path=".", members=None): 1972 def extractall(self, path=".", members=None, *, numeric_owner=False):
1965 """Extract all members from the archive to the current working 1973 """Extract all members from the archive to the current working
1966 directory and set owner, modification time and permissions on 1974 directory and set owner, modification time and permissions on
1967 directories afterwards. `path' specifies a different directory 1975 directories afterwards. `path' specifies a different directory
1968 to extract to. `members' is optional and must be a subset of the 1976 to extract to. `members' is optional and must be a subset of the
1969 list returned by getmembers(). 1977 list returned by getmembers(). If `numeric_owner` is True, only
1978 the numbers for user/group names are used and not the names.
1970 """ 1979 """
1971 directories = [] 1980 directories = []
1972 1981
1973 if members is None: 1982 if members is None:
1974 members = self 1983 members = self
1975 1984
1976 for tarinfo in members: 1985 for tarinfo in members:
1977 if tarinfo.isdir(): 1986 if tarinfo.isdir():
1978 # Extract directories with a safe mode. 1987 # Extract directories with a safe mode.
1979 directories.append(tarinfo) 1988 directories.append(tarinfo)
1980 tarinfo = copy.copy(tarinfo) 1989 tarinfo = copy.copy(tarinfo)
1981 tarinfo.mode = 0o700 1990 tarinfo.mode = 0o700
1982 # Do not set_attrs directories, as we will do that further down 1991 # Do not set_attrs directories, as we will do that further down
1983 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) 1992 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
1993 numeric_owner=numeric_owner)
1984 1994
1985 # Reverse sort directories. 1995 # Reverse sort directories.
1986 directories.sort(key=lambda a: a.name) 1996 directories.sort(key=lambda a: a.name)
1987 directories.reverse() 1997 directories.reverse()
1988 1998
1989 # Set correct owner, mtime and filemode on directories. 1999 # Set correct owner, mtime and filemode on directories.
1990 for tarinfo in directories: 2000 for tarinfo in directories:
1991 dirpath = os.path.join(path, tarinfo.name) 2001 dirpath = os.path.join(path, tarinfo.name)
1992 try: 2002 try:
1993 self.chown(tarinfo, dirpath) 2003 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
1994 self.utime(tarinfo, dirpath) 2004 self.utime(tarinfo, dirpath)
1995 self.chmod(tarinfo, dirpath) 2005 self.chmod(tarinfo, dirpath)
1996 except ExtractError as e: 2006 except ExtractError as e:
1997 if self.errorlevel > 1: 2007 if self.errorlevel > 1:
1998 raise 2008 raise
1999 else: 2009 else:
2000 self._dbg(1, "tarfile: %s" % e) 2010 self._dbg(1, "tarfile: %s" % e)
2001 2011
2002 def extract(self, member, path="", set_attrs=True): 2012 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
2003 """Extract a member from the archive to the current working directory, 2013 """Extract a member from the archive to the current working directory,
2004 using its full name. Its file information is extracted as accurately 2014 using its full name. Its file information is extracted as accurately
2005 as possible. `member' may be a filename or a TarInfo object. You can 2015 as possible. `member' may be a filename or a TarInfo object. You can
2006 specify a different directory using `path'. File attributes (owner, 2016 specify a different directory using `path'. File attributes (owner,
2007 mtime, mode) are set unless `set_attrs' is False. 2017 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2018 is True, only the numbers for user/group names are used and not
2019 the names.
2008 """ 2020 """
2009 self._check("r") 2021 self._check("r")
2010 2022
2011 if isinstance(member, str): 2023 if isinstance(member, str):
2012 tarinfo = self.getmember(member) 2024 tarinfo = self.getmember(member)
2013 else: 2025 else:
2014 tarinfo = member 2026 tarinfo = member
2015 2027
2016 # Prepare the link target for makelink(). 2028 # Prepare the link target for makelink().
2017 if tarinfo.islnk(): 2029 if tarinfo.islnk():
2018 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 2030 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2019 2031
2020 try: 2032 try:
2021 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2033 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2022 set_attrs=set_attrs) 2034 set_attrs=set_attrs,
2035 numeric_owner=numeric_owner)
2023 except OSError as e: 2036 except OSError as e:
2024 if self.errorlevel > 0: 2037 if self.errorlevel > 0:
2025 raise 2038 raise
2026 else: 2039 else:
2027 if e.filename is None: 2040 if e.filename is None:
2028 self._dbg(1, "tarfile: %s" % e.strerror) 2041 self._dbg(1, "tarfile: %s" % e.strerror)
2029 else: 2042 else:
2030 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 2043 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2031 except ExtractError as e: 2044 except ExtractError as e:
2032 if self.errorlevel > 1: 2045 if self.errorlevel > 1:
(...skipping 25 matching lines...) Expand all
2058 # stream of tar blocks. 2071 # stream of tar blocks.
2059 raise StreamError("cannot extract (sym)link as file object") 2072 raise StreamError("cannot extract (sym)link as file object")
2060 else: 2073 else:
2061 # A (sym)link's file object is its target's file object. 2074 # A (sym)link's file object is its target's file object.
2062 return self.extractfile(self._find_link_target(tarinfo)) 2075 return self.extractfile(self._find_link_target(tarinfo))
2063 else: 2076 else:
2064 # If there's no data associated with the member (directory, chrdev, 2077 # If there's no data associated with the member (directory, chrdev,
2065 # blkdev, etc.), return None instead of a file object. 2078 # blkdev, etc.), return None instead of a file object.
2066 return None 2079 return None
2067 2080
2068 def _extract_member(self, tarinfo, targetpath, set_attrs=True): 2081 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2082 numeric_owner=False):
2069 """Extract the TarInfo object tarinfo to a physical 2083 """Extract the TarInfo object tarinfo to a physical
2070 file called targetpath. 2084 file called targetpath.
2071 """ 2085 """
2072 # Fetch the TarInfo object for the given name 2086 # Fetch the TarInfo object for the given name
2073 # and build the destination pathname, replacing 2087 # and build the destination pathname, replacing
2074 # forward slashes to platform specific separators. 2088 # forward slashes to platform specific separators.
2075 targetpath = targetpath.rstrip("/") 2089 targetpath = targetpath.rstrip("/")
2076 targetpath = targetpath.replace("/", os.sep) 2090 targetpath = targetpath.replace("/", os.sep)
2077 2091
2078 # Create all upper directories. 2092 # Create all upper directories.
(...skipping 17 matching lines...) Expand all
2096 elif tarinfo.ischr() or tarinfo.isblk(): 2110 elif tarinfo.ischr() or tarinfo.isblk():
2097 self.makedev(tarinfo, targetpath) 2111 self.makedev(tarinfo, targetpath)
2098 elif tarinfo.islnk() or tarinfo.issym(): 2112 elif tarinfo.islnk() or tarinfo.issym():
2099 self.makelink(tarinfo, targetpath) 2113 self.makelink(tarinfo, targetpath)
2100 elif tarinfo.type not in SUPPORTED_TYPES: 2114 elif tarinfo.type not in SUPPORTED_TYPES:
2101 self.makeunknown(tarinfo, targetpath) 2115 self.makeunknown(tarinfo, targetpath)
2102 else: 2116 else:
2103 self.makefile(tarinfo, targetpath) 2117 self.makefile(tarinfo, targetpath)
2104 2118
2105 if set_attrs: 2119 if set_attrs:
2106 self.chown(tarinfo, targetpath) 2120 self.chown(tarinfo, targetpath, numeric_owner)
2107 if not tarinfo.issym(): 2121 if not tarinfo.issym():
2108 self.chmod(tarinfo, targetpath) 2122 self.chmod(tarinfo, targetpath)
2109 self.utime(tarinfo, targetpath) 2123 self.utime(tarinfo, targetpath)
2110 2124
2111 #-------------------------------------------------------------------------- 2125 #--------------------------------------------------------------------------
2112 # Below are the different file methods. They are called via 2126 # Below are the different file methods. They are called via
2113 # _extract_member() when extract() is called. They can be replaced in a 2127 # _extract_member() when extract() is called. They can be replaced in a
2114 # subclass to implement other functionality. 2128 # subclass to implement other functionality.
2115 2129
2116 def makedir(self, tarinfo, targetpath): 2130 def makedir(self, tarinfo, targetpath):
2117 """Make a directory called targetpath. 2131 """Make a directory called targetpath.
2118 """ 2132 """
2119 try: 2133 try:
2120 # Use a safe mode for the directory, the real mode is set 2134 # Use a safe mode for the directory, the real mode is set
2121 # later in _extract_member(). 2135 # later in _extract_member().
2122 os.mkdir(targetpath, 0o700) 2136 os.mkdir(targetpath, 0o700)
2123 except FileExistsError: 2137 except FileExistsError:
2124 pass 2138 pass
2125 2139
2126 def makefile(self, tarinfo, targetpath): 2140 def makefile(self, tarinfo, targetpath):
2127 """Make a file called targetpath. 2141 """Make a file called targetpath.
2128 """ 2142 """
2129 source = self.fileobj 2143 source = self.fileobj
2130 source.seek(tarinfo.offset_data) 2144 source.seek(tarinfo.offset_data)
2131 with bltn_open(targetpath, "wb") as target: 2145 with bltn_open(targetpath, "wb") as target:
2132 if tarinfo.sparse is not None: 2146 if tarinfo.sparse is not None:
2133 for offset, size in tarinfo.sparse: 2147 for offset, size in tarinfo.sparse:
2134 target.seek(offset) 2148 target.seek(offset)
2135 copyfileobj(source, target, size) 2149 copyfileobj(source, target, size, ReadError)
2136 else: 2150 else:
2137 copyfileobj(source, target, tarinfo.size) 2151 copyfileobj(source, target, tarinfo.size, ReadError)
2138 target.seek(tarinfo.size) 2152 target.seek(tarinfo.size)
2139 target.truncate() 2153 target.truncate()
2140 2154
2141 def makeunknown(self, tarinfo, targetpath): 2155 def makeunknown(self, tarinfo, targetpath):
2142 """Make a file from a TarInfo object with an unknown type 2156 """Make a file from a TarInfo object with an unknown type
2143 at targetpath. 2157 at targetpath.
2144 """ 2158 """
2145 self.makefile(tarinfo, targetpath) 2159 self.makefile(tarinfo, targetpath)
2146 self._dbg(1, "tarfile: Unknown file type %r, " \ 2160 self._dbg(1, "tarfile: Unknown file type %r, " \
2147 "extracted as regular file." % tarinfo.type) 2161 "extracted as regular file." % tarinfo.type)
(...skipping 22 matching lines...) Expand all
2170 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 2184 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2171 2185
2172 def makelink(self, tarinfo, targetpath): 2186 def makelink(self, tarinfo, targetpath):
2173 """Make a (symbolic) link called targetpath. If it cannot be created 2187 """Make a (symbolic) link called targetpath. If it cannot be created
2174 (platform limitation), we try to make a copy of the referenced file 2188 (platform limitation), we try to make a copy of the referenced file
2175 instead of a link. 2189 instead of a link.
2176 """ 2190 """
2177 try: 2191 try:
2178 # For systems that support symbolic and hard links. 2192 # For systems that support symbolic and hard links.
2179 if tarinfo.issym(): 2193 if tarinfo.issym():
2180 # a symlink with that name exists already, we need to replace
2181 # it with our version (see #37688)
2182 if os.path.lexists(targetpath):
2183 os.remove(targetpath)
2184 os.symlink(tarinfo.linkname, targetpath) 2194 os.symlink(tarinfo.linkname, targetpath)
2185 else: 2195 else:
2186 # See extract(). 2196 # See extract().
2187 if os.path.exists(tarinfo._link_target): 2197 if os.path.exists(tarinfo._link_target):
2188 os.link(tarinfo._link_target, targetpath) 2198 os.link(tarinfo._link_target, targetpath)
2189 else: 2199 else:
2190 self._extract_member(self._find_link_target(tarinfo), 2200 self._extract_member(self._find_link_target(tarinfo),
2191 targetpath) 2201 targetpath)
2192 except symlink_exception: 2202 except symlink_exception as e:
2203 # On Windows, OSError (winerror=1314) will be raised if the caller
2204 # does not hold the SeCreateSymbolicLinkPrivilege privilege.
2205 if getattr(e, "winerror", None) is None:
Martin Panter 2018/12/15 23:44:46 What about AttributeError and NotImplementedError,
2206 raise
2207
2193 try: 2208 try:
2194 self._extract_member(self._find_link_target(tarinfo), 2209 self._extract_member(self._find_link_target(tarinfo),
2195 targetpath) 2210 targetpath)
2196 except KeyError: 2211 except KeyError:
2197 raise ExtractError("unable to resolve link inside archive") 2212 raise ExtractError("unable to resolve link inside archive")
2198 2213
2199 def chown(self, tarinfo, targetpath): 2214 def chown(self, tarinfo, targetpath, numeric_owner):
2200 """Set owner of targetpath according to tarinfo. 2215 """Set owner of targetpath according to tarinfo. If numeric_owner
2216 is True, use .gid/.uid instead of .gname/.uname.
2201 """ 2217 """
2202 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: 2218 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2203 # We have to be root to do so. 2219 # We have to be root to do so.
2204 try: 2220 if numeric_owner:
2205 g = grp.getgrnam(tarinfo.gname)[2]
2206 except KeyError:
2207 g = tarinfo.gid 2221 g = tarinfo.gid
2208 try:
2209 u = pwd.getpwnam(tarinfo.uname)[2]
2210 except KeyError:
2211 u = tarinfo.uid 2222 u = tarinfo.uid
2223 else:
2224 try:
2225 g = grp.getgrnam(tarinfo.gname)[2]
2226 except KeyError:
2227 g = tarinfo.gid
2228 try:
2229 u = pwd.getpwnam(tarinfo.uname)[2]
2230 except KeyError:
2231 u = tarinfo.uid
2212 try: 2232 try:
2213 if tarinfo.issym() and hasattr(os, "lchown"): 2233 if tarinfo.issym() and hasattr(os, "lchown"):
2214 os.lchown(targetpath, u, g) 2234 os.lchown(targetpath, u, g)
2215 else: 2235 else:
2216 os.chown(targetpath, u, g) 2236 os.chown(targetpath, u, g)
2217 except OSError as e: 2237 except OSError as e:
2218 raise ExtractError("could not change owner") 2238 raise ExtractError("could not change owner")
2219 2239
2220 def chmod(self, tarinfo, targetpath): 2240 def chmod(self, tarinfo, targetpath):
2221 """Set file permissions of targetpath according to tarinfo. 2241 """Set file permissions of targetpath according to tarinfo.
(...skipping 19 matching lines...) Expand all
2241 """Return the next member of the archive as a TarInfo object, when 2261 """Return the next member of the archive as a TarInfo object, when
2242 TarFile is opened for reading. Return None if there is no more 2262 TarFile is opened for reading. Return None if there is no more
2243 available. 2263 available.
2244 """ 2264 """
2245 self._check("ra") 2265 self._check("ra")
2246 if self.firstmember is not None: 2266 if self.firstmember is not None:
2247 m = self.firstmember 2267 m = self.firstmember
2248 self.firstmember = None 2268 self.firstmember = None
2249 return m 2269 return m
2250 2270
2271 # Advance the file pointer.
2272 if self.offset != self.fileobj.tell():
2273 self.fileobj.seek(self.offset - 1)
2274 if not self.fileobj.read(1):
2275 raise ReadError("unexpected end of data")
2276
2251 # Read the next block. 2277 # Read the next block.
2252 self.fileobj.seek(self.offset)
2253 tarinfo = None 2278 tarinfo = None
2254 while True: 2279 while True:
2255 try: 2280 try:
2256 tarinfo = self.tarinfo.fromtarfile(self) 2281 tarinfo = self.tarinfo.fromtarfile(self)
2257 except EOFHeaderError as e: 2282 except EOFHeaderError as e:
2258 if self.ignore_zeros: 2283 if self.ignore_zeros:
2259 self._dbg(2, "0x%X: %s" % (self.offset, e)) 2284 self._dbg(2, "0x%X: %s" % (self.offset, e))
2260 self.offset += BLOCKSIZE 2285 self.offset += BLOCKSIZE
2261 continue 2286 continue
2262 except InvalidHeaderError as e: 2287 except InvalidHeaderError as e:
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
2344 2369
2345 member = self._getmember(linkname, tarinfo=limit, normalize=True) 2370 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2346 if member is None: 2371 if member is None:
2347 raise KeyError("linkname %r not found" % linkname) 2372 raise KeyError("linkname %r not found" % linkname)
2348 return member 2373 return member
2349 2374
2350 def __iter__(self): 2375 def __iter__(self):
2351 """Provide an iterator object. 2376 """Provide an iterator object.
2352 """ 2377 """
2353 if self._loaded: 2378 if self._loaded:
2354 return iter(self.members) 2379 yield from self.members
2355 else: 2380 return
2356 return TarIter(self) 2381
2382 # Yield items using TarFile's next() method.
2383 # When all members have been read, set TarFile as _loaded.
2384 index = 0
2385 # Fix for SF #1100429: Under rare circumstances it can
2386 # happen that getmembers() is called during iteration,
2387 # which will have already exhausted the next() method.
2388 if self.firstmember is not None:
2389 tarinfo = self.next()
2390 index += 1
2391 yield tarinfo
2392
2393 while True:
2394 if index < len(self.members):
2395 tarinfo = self.members[index]
2396 elif not self._loaded:
2397 tarinfo = self.next()
2398 if not tarinfo:
2399 self._loaded = True
2400 return
2401 else:
2402 return
2403 index += 1
2404 yield tarinfo
2357 2405
2358 def _dbg(self, level, msg): 2406 def _dbg(self, level, msg):
2359 """Write debugging output to sys.stderr. 2407 """Write debugging output to sys.stderr.
2360 """ 2408 """
2361 if level <= self.debug: 2409 if level <= self.debug:
2362 print(msg, file=sys.stderr) 2410 print(msg, file=sys.stderr)
2363 2411
2364 def __enter__(self): 2412 def __enter__(self):
2365 self._check() 2413 self._check()
2366 return self 2414 return self
2367 2415
2368 def __exit__(self, type, value, traceback): 2416 def __exit__(self, type, value, traceback):
2369 if type is None: 2417 if type is None:
2370 self.close() 2418 self.close()
2371 else: 2419 else:
2372 # An exception occurred. We must not call close() because 2420 # An exception occurred. We must not call close() because
2373 # it would try to write end-of-archive blocks and padding. 2421 # it would try to write end-of-archive blocks and padding.
2374 if not self._extfileobj: 2422 if not self._extfileobj:
2375 self.fileobj.close() 2423 self.fileobj.close()
2376 self.closed = True 2424 self.closed = True
2377 # class TarFile
2378
2379 class TarIter:
2380 """Iterator Class.
2381
2382 for tarinfo in TarFile(...):
2383 suite...
2384 """
2385
2386 def __init__(self, tarfile):
2387 """Construct a TarIter object.
2388 """
2389 self.tarfile = tarfile
2390 self.index = 0
2391 def __iter__(self):
2392 """Return iterator object.
2393 """
2394 return self
2395 def __next__(self):
2396 """Return the next item using TarFile's next() method.
2397 When all members have been read, set TarFile as _loaded.
2398 """
2399 # Fix for SF #1100429: Under rare circumstances it can
2400 # happen that getmembers() is called during iteration,
2401 # which will cause TarIter to stop prematurely.
2402
2403 if self.index == 0 and self.tarfile.firstmember is not None:
2404 tarinfo = self.tarfile.next()
2405 elif self.index < len(self.tarfile.members):
2406 tarinfo = self.tarfile.members[self.index]
2407 elif not self.tarfile._loaded:
2408 tarinfo = self.tarfile.next()
2409 if not tarinfo:
2410 self.tarfile._loaded = True
2411 raise StopIteration
2412 else:
2413 raise StopIteration
2414 self.index += 1
2415 return tarinfo
2416 2425
2417 #-------------------- 2426 #--------------------
2418 # exported functions 2427 # exported functions
2419 #-------------------- 2428 #--------------------
2420 def is_tarfile(name): 2429 def is_tarfile(name):
2421 """Return True if name points to a tar archive that we 2430 """Return True if name points to a tar archive that we
2422 are able to handle, else return False. 2431 are able to handle, else return False.
2423 """ 2432 """
2424 try: 2433 try:
2425 t = open(name) 2434 t = open(name)
2426 t.close() 2435 t.close()
2427 return True 2436 return True
2428 except TarError: 2437 except TarError:
2429 return False 2438 return False
2430 2439
2431 bltn_open = open
2432 open = TarFile.open 2440 open = TarFile.open
2433 2441
2434 2442
2435 def main(): 2443 def main():
2436 import argparse 2444 import argparse
2437 2445
2438 description = 'A simple command line interface for tarfile module.' 2446 description = 'A simple command line interface for tarfile module.'
2439 parser = argparse.ArgumentParser(description=description) 2447 parser = argparse.ArgumentParser(description=description)
2440 parser.add_argument('-v', '--verbose', action='store_true', default=False, 2448 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2441 help='Verbose output') 2449 help='Verbose output')
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
2491 'into {!r} directory.').format(src, curdir) 2499 'into {!r} directory.').format(src, curdir)
2492 print(msg) 2500 print(msg)
2493 else: 2501 else:
2494 parser.exit(1, '{!r} is not a tar archive.\n'.format(src)) 2502 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2495 2503
2496 elif args.create: 2504 elif args.create:
2497 tar_name = args.create.pop(0) 2505 tar_name = args.create.pop(0)
2498 _, ext = os.path.splitext(tar_name) 2506 _, ext = os.path.splitext(tar_name)
2499 compressions = { 2507 compressions = {
2500 # gz 2508 # gz
2501 'gz': 'gz', 2509 '.gz': 'gz',
2502 'tgz': 'gz', 2510 '.tgz': 'gz',
2503 # xz 2511 # xz
2504 'xz': 'xz', 2512 '.xz': 'xz',
2505 'txz': 'xz', 2513 '.txz': 'xz',
2506 # bz2 2514 # bz2
2507 'bz2': 'bz2', 2515 '.bz2': 'bz2',
2508 'tbz': 'bz2', 2516 '.tbz': 'bz2',
2509 'tbz2': 'bz2', 2517 '.tbz2': 'bz2',
2510 'tb2': 'bz2', 2518 '.tb2': 'bz2',
2511 } 2519 }
2512 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w' 2520 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2513 tar_files = args.create 2521 tar_files = args.create
2514 2522
2515 with TarFile.open(tar_name, tar_mode) as tf: 2523 with TarFile.open(tar_name, tar_mode) as tf:
2516 for file_name in tar_files: 2524 for file_name in tar_files:
2517 tf.add(file_name) 2525 tf.add(file_name)
2518 2526
2519 if args.verbose: 2527 if args.verbose:
2520 print('{!r} file created.'.format(tar_name)) 2528 print('{!r} file created.'.format(tar_name))
2521 2529
2522 else: 2530 else:
2523 parser.exit(1, parser.format_help()) 2531 parser.exit(1, parser.format_help())
2524 2532
2525 if __name__ == '__main__': 2533 if __name__ == '__main__':
2526 main() 2534 main()
LEFTRIGHT

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+