diff -r 0399e842073a Lib/tarfile.py --- a/Lib/tarfile.py Wed Feb 19 23:06:41 2014 -0500 +++ b/Lib/tarfile.py Thu Feb 20 19:12:55 2014 +0800 @@ -47,6 +47,7 @@ import struct import copy import re +import errno try: import grp, pwd @@ -2085,6 +2086,16 @@ else: self._dbg(1, tarinfo.name) + if os.path.exists(targetpath) and \ + (not os.path.isdir(targetpath) or os.path.islink(targetpath)): + os.remove(targetpath) + else: + # File should be able to overwrite empty directory + try: + os.rmdir(targetpath) + except OSError: + pass + if tarinfo.isreg(): self.makefile(tarinfo, targetpath) elif tarinfo.isdir(): @@ -2175,7 +2186,10 @@ try: # For systems that support symbolic and hard links. if tarinfo.issym(): - os.symlink(tarinfo.linkname, targetpath) + if os.name == 'nt' and os.path.isdir(tarinfo.linkname): + os.symlink(tarinfo.linkname, targetpath, target_is_directory=True) + else: + os.symlink(tarinfo.linkname, targetpath) else: # See extract(). if os.path.exists(tarinfo._link_target): diff -r 0399e842073a Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py Wed Feb 19 23:06:41 2014 -0500 +++ b/Lib/test/test_tarfile.py Thu Feb 20 19:12:55 2014 +0800 @@ -2,6 +2,7 @@ import os import io import shutil +import errno from hashlib import md5 import unittest @@ -1162,6 +1163,118 @@ self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/")) + def test_extractall_current_directory(self): + tempdir = os.path.join(TEMPDIR, "testcurdir") + temparchive = os.path.join(TEMPDIR, "testcurdir.tar") + os.mkdir(tempdir) + try: + source_file = os.path.join(tempdir, 'source') + with open(source_file,'w') as f: + f.write('source file content\n') + tar = tarfile.open(temparchive,'w') + try: + # We don't want to add absolute path to tar file because + # we don't want nested directory after extracting the file. + with support.change_cwd(tempdir): + tar.add('./') + finally: + tar.close() + os.remove(source_file) + tar = tarfile.open(temparchive,'r') + try: + tar.extractall(path=tempdir) + with open(source_file) as f: + self.assertEqual(f.read(), 'source file content\n') + except OSError: + self.fail("extractall failed with ./ file") + finally: + tar.close() + finally: + os.unlink(temparchive) + shutil.rmtree(tempdir) + + def test_extractall_non_directory_overwrites_directory(self): + tempdir = os.path.join(TEMPDIR, "testnotdiroverwritesdir") + temparchive = os.path.join(TEMPDIR, "testnotdiroverwritesdir.tar") + os.mkdir(tempdir) + try: + source_file = os.path.join(tempdir, 'source') + with open(source_file,'w') as f: + f.write('source file content\n') + tar = tarfile.open(temparchive,'w') + try: + # We don't want to add absolute path to tar file because + # we don't want nested directory after extracting the file. + with support.change_cwd(tempdir): + tar.add('source') + finally: + tar.close() + os.remove(source_file) + os.mkdir(source_file) + tar = tarfile.open(temparchive,'r') + try: + tar.extractall(path=tempdir) + # Non-directory file should be able to overwrites empty + # directory + with open(source_file) as f: + self.assertEqual(f.read(), 'source file content\n') + except OSError: + self.fail("extractall failed with non-directory files") + finally: + tar.close() + os.remove(source_file) + os.mkdir(source_file) + os.mkdir(os.path.join(source_file, 'inner_source')) + tar = tarfile.open(temparchive,'r') + try: + tar.extractall(path=tempdir) + # Non-directory file should not be able to overwrites non-empty + # directory + self.fail("extractall failed with file overwriting non-empty " + "directory") + # This should raise OSError: [Errno 21] Is a directory + except OSError as e: + self.assertEqual(e.errno, errno.EISDIR) + finally: + tar.close() + finally: + os.unlink(temparchive) + shutil.rmtree(tempdir) + + def test_extractall_not_empty_directory(self): + tempdir = os.path.join(TEMPDIR, "testnotemptydir") + temparchive = os.path.join(TEMPDIR, "testnotemptydir.tar") + os.mkdir(tempdir) + try: + dir_A = os.path.join(tempdir, 'A') + dir_B = os.path.join(tempdir, 'B') + dir_C = os.path.join(dir_A, 'C') + os.mkdir(dir_A) + os.mkdir(dir_B) + os.mkdir(dir_C) + tar = tarfile.open(temparchive,'w') + try: + # We don't want to add absolute path to tar file because + # we don't want nested directory after extracting the file. + with support.change_cwd(tempdir): + tar.add('A') + tar.add('B') + finally: + tar.close() + os.rmdir(dir_C) + dir_D = os.path.join(dir_A, 'D') + os.mkdir(dir_D) + tar = tarfile.open(temparchive,'r') + try: + tar.extractall(path=tempdir) + self.assertEqual(sorted(os.listdir(dir_A)), ['C', 'D']) + except OSError: + self.fail("extractall failed with not empty directory") + finally: + tar.close() + finally: + os.unlink(temparchive) + shutil.rmtree(tempdir) @support.skip_unless_symlink def test_extractall_symlinks(self): @@ -1170,20 +1283,52 @@ temparchive = os.path.join(TEMPDIR, "testsymlinks.tar") os.mkdir(tempdir) try: - source_file = os.path.join(tempdir,'source') - target_file = os.path.join(tempdir,'symlink') + # File symlink + source_file = os.path.join(tempdir, 'source') + other_source_file = os.path.join(tempdir, 'other_source') + target_file = os.path.join(tempdir, 'symlink') with open(source_file,'w') as f: - f.write('something\n') + f.write('source file content\n') + with open(other_source_file,'w') as f: + f.write('other source file content\n') os.symlink(source_file, target_file) + # Directory symlink + source_dir = os.path.join(tempdir, 'source_dir') + other_source_dir = os.path.join(tempdir, 'other_source_dir') + target_dir = os.path.join(tempdir, 'symlink_dir') + os.mkdir(source_dir) + os.mkdir(other_source_dir) + os.symlink(source_dir, target_dir) tar = tarfile.open(temparchive,'w') - tar.add(source_file) - tar.add(target_file) + # We don't want to add absolute path to tar file because + # we don't want nested directory after extracting the file: + # /home/user/python/symlink/home/user/python/symlink/file + with support.change_cwd(tempdir): + tar.add('source') + tar.add('symlink') + tar.add('source_dir') + tar.add('symlink_dir') tar.close() - # Let's extract it to the location which contains the symlink + # Point target_file to other_source_file and target_dir to + # other_source_dir to exercise overwriting behavior. + os.unlink(target_file) + os.symlink(other_source_file, target_file) + os.unlink(target_dir) + os.symlink(other_source_dir, target_dir) tar = tarfile.open(temparchive,'r') - # this should not raise OSError: [Errno 17] File exists try: + with open(target_file) as f: + self.assertEqual(f.read(), 'other source file content\n') + self.assertEqual(os.readlink(target_dir), other_source_dir) + # Let's extract it to the location which contains the symlink tar.extractall(path=tempdir) + # target_file that symlinked to other_source_file now symlinks + # back to source_file, and target_dir that symlinked to + # other_source_dir now symlinks back to source_dir. + with open(target_file) as f: + self.assertEqual(f.read(), 'source file content\n') + self.assertEqual(os.readlink(target_dir), source_dir) + # This should not raise OSError: [Errno 17] File exists except OSError: self.fail("extractall failed with symlinked files") finally: