import os import tarfile import tempfile import time class RandFile: def __init__(self, size): self.size = size self.chr = os.urandom(1) def close(self): pass def read(self, size=-1): readsize = size if readsize > self.size or size == -1: readsize = self.size self.size -= readsize return self.chr * readsize ReadError = tarfile.ReadError copyfileobj = tarfile.copyfileobj class FastTarFile(tarfile.TarFile): def makefile(self, tarinfo, targetpath): """Make a file called targetpath. """ source = self.fileobj source.seek(tarinfo.offset_data) with open(targetpath, "wb") as target: if tarinfo.sparse is not None: for offset, size in tarinfo.sparse: target.seek(offset) copyfileobj(source, target, size, ReadError) target.seek(tarinfo.size) target.truncate() else: copyfileobj(source, target, tarinfo.size, ReadError) def get_fake_tarinfo(name, size): tarinfo = tarfile.TarInfo(name) tarinfo.size = size return tarinfo FILE_SIZE = 10*1024*1024 NUM_FILES = 1024 if __name__ == '__main__': with tempfile.TemporaryDirectory() as tdir: test_tar = os.path.join(tdir, 'test1.tar') test_tar2 = os.path.join(tdir, 'test2.tar') print('Generating Random Test Tar Files', end='') with tarfile.open(test_tar, mode='x') as tar1: with tarfile.open(test_tar2, mode='x') as tar2: for i in range(NUM_FILES): print('.', end='', flush=True) name = str(i) tarinfo = get_fake_tarinfo(name, FILE_SIZE) tar1.addfile(tarinfo, fileobj=RandFile(FILE_SIZE)) tar2.addfile(tarinfo, fileobj=RandFile(FILE_SIZE)) print() print('Testing Tar Impls') with tempfile.TemporaryDirectory() as tout: normal = time.time() with tarfile.open(test_tar2) as tar: tar.extractall(tout) normal = time.time() - normal print('Time Delta for TarFile:', normal) with tempfile.TemporaryDirectory() as tout: fast = time.time() with FastTarFile.open(test_tar) as tar: tar.extractall(tout) fast = time.time() - fast print('Time Delta for FastTarFile:', fast) print('Time Diff:', normal - fast, 1 - fast/normal)