import os import sys import time NFILE = 10**5 # x 2 (file + symlink) NSYMLINK = 10**2 NDIR = 10**4 assert NSYMLINK <= NFILE RUNS = 5 LOOPS = 5 # only used for cached def drop_caches(): print("drop disk caches") with open("/proc/sys/vm/drop_caches", "wb") as fp: fp.write(b'3\n') def test_listdir(stat): if stat: for name in os.listdir(): os.stat(name) else: for entry in os.listdir(): pass def test_scandir(stat): if stat: for entry in os.scandir(): entry.is_dir() else: for entry in os.scandir(): pass def bench(times, func, stat, cached): if cached: loops = LOOPS else: loops = 1 t0 = time.perf_counter() for loop in range(loops): if not cached: drop_caches() func(stat) t1 = time.perf_counter() dt = (t1 - t0) / loops times.append(dt) def count_entries(): nentries = 0 for entry in os.scandir(): nentries += 1 print("# entries: %s" % nentries) return nentries def create_dir(path): print("Directory: %s" % path) try: os.mkdir(path) except FileExistsError: print("ERROR: %s already exixts" % path) sys.exit(1) os.chdir(path) nentries = 0 t0 = time.monotonic() next_msg = t0 + 1.0 def progress(): nonlocal next_msg if time.monotonic() >= next_msg: next_msg = time.monotonic() + 1.0 print("... %s entries created" % nentries) print("Create %s files..." % NFILE) for index in range(NFILE): progress() filename = "file-%08d" % index open(filename, "xb").close() nentries += 1 print("Create %s symlinks to files..." % NSYMLINK) for index in range(NSYMLINK): progress() filename = "file-%08d" % index linkname = "symlink-%08d" % index os.symlink(filename, linkname) nentries += 1 print("Create %s directories..." % NDIR) for index in range(NDIR): progress() dirname = "dir-%08d" % index os.mkdir(dirname) nentries += 1 dt = time.monotonic() - t0 print("Creation took %.1f sec" % dt) count_entries() def runbench(path, stat, cached): dt_listdir = [] dt_scandir = [] if stat: print("Test listdir+stat vs scandir+is_dir") else: print("Test listdir vs scandir") if not cached: try: drop_caches() except PermissionError: print("Cannot flush disk caches: try to run this program as root") sys.exit(1) old_dir = os.getcwd() try: os.chdir(path) print("Directory: %s" % os.getcwd()) nentries = count_entries() print("Benchmark...") for run in range(RUNS): bench(dt_listdir, test_listdir, stat, cached) print("listdir: %.1f ms" % (dt_listdir[-1]*1e3)) bench(dt_scandir, test_scandir, stat, cached) print("scandir: %.1f ms" % (dt_scandir[-1]*1e3)) print("") finally: os.chdir(old_dir) best_listdir = min(dt_listdir) best_scandir = min(dt_scandir) worst_listdir = max(dt_listdir) worst_scandir = max(dt_scandir) print("Result:") print("listdir: min=%.1f ms (%.1f us per file), max=%.1f ms (%.1f us per file)" % (best_listdir*1e3, best_listdir/nentries*1e6, worst_listdir*1e3, worst_listdir/nentries*1e6)) print("scandir: min=%.1f ms (%.1f us per file), max=%.1f ms (%.1f us per file)" % (best_scandir*1e3, best_scandir/nentries*1e6, worst_scandir*1e3, worst_scandir/nentries*1e6)) print("scandir is %.1fx faster than listdir (worst: %.1fx, best: %.1fx faster)" % (best_listdir / best_scandir, best_listdir / worst_scandir, worst_listdir / best_scandir)) def main(): COMMANDS = ("create", "bench", "bench_nocache", "bench_nostat", "bench_nostat_nocache") if len(sys.argv) != 3 or sys.argv[1] not in COMMANDS: print("usage: %s COMMAND directory" % sys.argv[0]) print("Commands: %s" % ', '.join(COMMANDS)) sys.exit(1) command = sys.argv[1] path = sys.argv[2] if command == 'create': create_dir(path) elif command == "bench_nostat": runbench(path, False, True) elif command == "bench_nostat_nocache": runbench(path, True, False) elif command == "bench_nocache": runbench(path, True, False) elif command == "bench": runbench(path, True, True) else: print("invalid command!") if __name__ == "__main__": main()