diff --git a/Doc/library/os.rst b/Doc/library/os.rst --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2208,18 +2208,21 @@ os.rmdir(os.path.join(root, name)) -.. function:: fwalk(top, topdown=True, onerror=None, followlinks=False) +.. function:: fwalk(top='.', topdown=True, onerror=None, followlinks=False, *, dir_fd=None) .. index:: single: directory; walking single: directory; traversal This behaves exactly like :func:`walk`, except that it yields a 4-tuple - ``(dirpath, dirnames, filenames, dirfd)``. + ``(dirpath, dirnames, filenames, dirfd)``, and it supports ``dir_fd``. *dirpath*, *dirnames* and *filenames* are identical to :func:`walk` output, and *dirfd* is a file descriptor referring to the directory *dirpath*. + This function always supports :ref:`paths relative to directory descriptors + `. + .. note:: Since :func:`fwalk` yields file descriptors, those are only valid until diff --git a/Lib/os.py b/Lib/os.py --- a/Lib/os.py +++ b/Lib/os.py @@ -331,7 +331,16 @@ __all__.extend(["makedirs", "removedirs", "renames"]) -def walk(top, topdown=True, onerror=None, followlinks=False): +def _isdir_dir_fd(path, dir_fd): + """ + A substitute for os.path.isdir that supports dir_fd. + """ + try: + return st.S_ISDIR(stat(path, dir_fd=dir_fd).st_mode) + except FileNotFoundError: + return False + +def walk(top=".", topdown=True, onerror=None, followlinks=False, *, dir_fd=None): """Directory tree generator. For each directory in the directory tree rooted at top (including top @@ -395,36 +404,57 @@ # always suppressed the exception then, rather than blow up for a # minor reason when (say) a thousand readable directories are still # left to visit. That logic is copied here. + # + # Note that listdir and error are globals in this module due + # to earlier import-*. try: - # Note that listdir and error are globals in this module due - # to earlier import-*. - names = listdir(top) + if dir_fd is None: + close_me = None + names = listdir(top) + else: + close_me = open(top, O_RDONLY, dir_fd=dir_fd) + names = listdir(close_me) except error as err: + if close_me is not None: + close(close_me) if onerror is not None: onerror(err) return dirs, nondirs = [], [] - for name in names: - if isdir(join(top, name)): - dirs.append(name) - else: - nondirs.append(name) + if close_me is not None: + for name in names: + # name is in directory referenced by close_me! + # don't bother joining with top here. + if _isdir_dir_fd(name, close_me): + dirs.append(name) + else: + nondirs.append(name) + close(close_me) + else: + for name in names: + new_path = join(top, name) + if isdir(new_path): + dirs.append(name) + else: + nondirs.append(name) if topdown: yield top, dirs, nondirs for name in dirs: new_path = join(top, name) if followlinks or not islink(new_path): - yield from walk(new_path, topdown, onerror, followlinks) + yield from walk(new_path, topdown, onerror, followlinks, dir_fd=dir_fd) if not topdown: yield top, dirs, nondirs __all__.append("walk") +if (listdir in supports_fd) and ({open, stat} <= supports_dir_fd): + supports_dir_fd.add(walk) -if open in supports_dir_fd: +if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd: - def fwalk(top, topdown=True, onerror=None, followlinks=False): + def fwalk(top=".", topdown=True, onerror=None, followlinks=False, *, dir_fd=None): """Directory tree generator. This behaves exactly like walk(), except that it yields a 4-tuple @@ -434,9 +464,13 @@ `dirpath`, `dirnames` and `filenames` are identical to walk() output, and `dirfd` is a file descriptor referring to the directory `dirpath`. - The advantage of walkfd() over walk() is that it's safe against symlink + The advantage of fwalk() over walk() is that it's safe against symlink races (when followlinks is False). + If dir_fd is not None, it should be a file descriptor open to a directory, + and top should be relative; top will then be relative to that directory. + (dir_fd is always supported for fwalk.) + Caution: Since fwalk() yields file descriptors, those are only valid until the next iteration step, so you should dup() them if you want to keep them @@ -455,11 +489,11 @@ """ # Note: To guard against symlink races, we use the standard # lstat()/open()/fstat() trick. - orig_st = lstat(top) - topfd = open(top, O_RDONLY) + orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd) + topfd = open(top, O_RDONLY, dir_fd=dir_fd) try: if (followlinks or (st.S_ISDIR(orig_st.st_mode) and - path.samestat(orig_st, fstat(topfd)))): + path.samestat(orig_st, stat(topfd)))): yield from _fwalk(topfd, top, topdown, onerror, followlinks) finally: close(topfd) @@ -502,7 +536,7 @@ onerror(err) return try: - if followlinks or path.samestat(orig_st, fstat(dirfd)): + if followlinks or path.samestat(orig_st, stat(dirfd)): dirpath = path.join(toppath, name) yield from _fwalk(dirfd, dirpath, topdown, onerror, followlinks) finally: diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -616,8 +616,9 @@ key = 'key=' self.assertRaises(OSError, os.environ.__delitem__, key) -class WalkTests(unittest.TestCase): - """Tests for os.walk().""" + +class SetupWalkTree(unittest.TestCase): + """Cosntruct a file tree for os.walk and os.fwalk to examine.""" def setUp(self): import os @@ -636,24 +637,23 @@ # broken_link # TEST2/ # tmp4 a lone file - walk_path = join(support.TESTFN, "TEST1") - sub1_path = join(walk_path, "SUB1") - sub11_path = join(sub1_path, "SUB11") - sub2_path = join(walk_path, "SUB2") - tmp1_path = join(walk_path, "tmp1") - tmp2_path = join(sub1_path, "tmp2") - tmp3_path = join(sub2_path, "tmp3") - link_path = join(sub2_path, "link") - t2_path = join(support.TESTFN, "TEST2") - tmp4_path = join(support.TESTFN, "TEST2", "tmp4") - link_path = join(sub2_path, "link") - broken_link_path = join(sub2_path, "broken_link") + self.walk_path = walk_path = join(support.TESTFN, "TEST1") + self.sub1_path = sub1_path = join(walk_path, "SUB1") + self.sub11_path = join(sub1_path, "SUB11") + self.sub2_path = sub2_path = join(walk_path, "SUB2") + self.tmp1_path = join(walk_path, "tmp1") + self.tmp2_path = join(sub1_path, "tmp2") + self.tmp3_path = join(sub2_path, "tmp3") + self.t2_path = t2_path = join(support.TESTFN, "TEST2") + self.tmp4_path = join(support.TESTFN, "TEST2", "tmp4") + self.link_path = link_path = join(sub2_path, "link") + self.broken_link_path = broken_link_path = join(sub2_path, "broken_link") # Create stuff. - os.makedirs(sub11_path) - os.makedirs(sub2_path) - os.makedirs(t2_path) - for path in tmp1_path, tmp2_path, tmp3_path, tmp4_path: + os.makedirs(self.sub11_path) + os.makedirs(self.sub2_path) + os.makedirs(self.t2_path) + for path in self.tmp1_path, self.tmp2_path, self.tmp3_path, self.tmp4_path: f = open(path, "w") f.write("I'm " + path + " and proud of it. Blame test_os.\n") f.close() @@ -663,62 +663,11 @@ os.symlink(src, dest, True) else: symlink_to_dir = os.symlink - symlink_to_dir(os.path.abspath(t2_path), link_path) + symlink_to_dir(os.path.abspath(self.t2_path), link_path) symlink_to_dir('broken', broken_link_path) - sub2_tree = (sub2_path, ["link"], ["broken_link", "tmp3"]) + self.sub2_tree = (sub2_path, ["link"], ["broken_link", "tmp3"]) else: - sub2_tree = (sub2_path, [], ["tmp3"]) - - # Walk top-down. - all = list(os.walk(walk_path)) - self.assertEqual(len(all), 4) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: TESTFN, SUB1, SUB11, SUB2 - # flipped: TESTFN, SUB2, SUB1, SUB11 - flipped = all[0][1][0] != "SUB1" - all[0][1].sort() - all[3 - 2 * flipped][-1].sort() - self.assertEqual(all[0], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[1 + flipped], (sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 + flipped], (sub11_path, [], [])) - self.assertEqual(all[3 - 2 * flipped], sub2_tree) - - # Prune the search. - all = [] - for root, dirs, files in os.walk(walk_path): - all.append((root, dirs, files)) - # Don't descend into SUB1. - if 'SUB1' in dirs: - # Note that this also mutates the dirs we appended to all! - dirs.remove('SUB1') - self.assertEqual(len(all), 2) - self.assertEqual(all[0], (walk_path, ["SUB2"], ["tmp1"])) - all[1][-1].sort() - self.assertEqual(all[1], sub2_tree) - - # Walk bottom-up. - all = list(os.walk(walk_path, topdown=False)) - self.assertEqual(len(all), 4) - # We can't know which order SUB1 and SUB2 will appear in. - # Not flipped: SUB11, SUB1, SUB2, TESTFN - # flipped: SUB2, SUB11, SUB1, TESTFN - flipped = all[3][1][0] != "SUB1" - all[3][1].sort() - all[2 - 2 * flipped][-1].sort() - self.assertEqual(all[3], (walk_path, ["SUB1", "SUB2"], ["tmp1"])) - self.assertEqual(all[flipped], (sub11_path, [], [])) - self.assertEqual(all[flipped + 1], (sub1_path, ["SUB11"], ["tmp2"])) - self.assertEqual(all[2 - 2 * flipped], sub2_tree) - - if support.can_symlink(): - # Walk, following symlinks. - for root, dirs, files in os.walk(walk_path, followlinks=True): - if root == link_path: - self.assertEqual(dirs, []) - self.assertEqual(files, ["tmp4"]) - break - else: - self.fail("Didn't follow symlink with followlinks=True") + self.sub2_tree = (sub2_path, [], ["tmp3"]) def tearDown(self): # Tear everything down. This is a decent use for bottom-up on @@ -736,24 +685,112 @@ os.remove(dirname) os.rmdir(support.TESTFN) +class WalkTests(SetupWalkTree): + + def test_walk_no_arguments(self): + self.assertTrue(any(os.walk())) + + def test_walk(self): + # Walk top-down. + all = list(os.walk(self.walk_path)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: TESTFN, SUB1, SUB11, SUB2 + # flipped: TESTFN, SUB2, SUB1, SUB11 + flipped = all[0][1][0] != "SUB1" + all[0][1].sort() + all[3 - 2 * flipped][-1].sort() + self.assertEqual(all[0], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[1 + flipped], (self.sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 + flipped], (self.sub11_path, [], [])) + self.assertEqual(all[3 - 2 * flipped], self.sub2_tree) + + # Prune the search. + all = [] + for root, dirs, files in os.walk(self.walk_path): + all.append((root, dirs, files)) + # Don't descend into SUB1. + if 'SUB1' in dirs: + # Note that this also mutates the dirs we appended to all! + dirs.remove('SUB1') + self.assertEqual(len(all), 2) + self.assertEqual(all[0], (self.walk_path, ["SUB2"], ["tmp1"])) + all[1][-1].sort() + self.assertEqual(all[1], self.sub2_tree) + + # Walk bottom-up. + all = list(os.walk(self.walk_path, topdown=False)) + self.assertEqual(len(all), 4) + # We can't know which order SUB1 and SUB2 will appear in. + # Not flipped: SUB11, SUB1, SUB2, TESTFN + # flipped: SUB2, SUB11, SUB1, TESTFN + flipped = all[3][1][0] != "SUB1" + all[3][1].sort() + all[2 - 2 * flipped][-1].sort() + self.assertEqual(all[3], (self.walk_path, ["SUB1", "SUB2"], ["tmp1"])) + self.assertEqual(all[flipped], (self.sub11_path, [], [])) + self.assertEqual(all[flipped + 1], (self.sub1_path, ["SUB11"], ["tmp2"])) + self.assertEqual(all[2 - 2 * flipped], self.sub2_tree) + + if support.can_symlink(): + # Walk, following symlinks. + for root, dirs, files in os.walk(self.walk_path, followlinks=True): + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with followlinks=True") + + @unittest.skipUnless(os.walk in os.supports_dir_fd, + "os.walk needs dir_fd for this test.") + def test_walk_dir_fd(self): + fd = os.open('.', os.O_RDONLY) + try: + list1 = list(os.walk(self.walk_path)) + list2 = list(os.walk(self.walk_path, dir_fd=fd)) + self.assertEqual(list1, list2) + finally: + if fd is not None: + os.close(fd) + @unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()") -class FwalkTests(WalkTests): +class FwalkTests(SetupWalkTree): """Tests for os.fwalk().""" - def test_compare_to_walk(self): - # compare with walk() results + def _compare_to_walk(self, walk_kwargs, fwalk_kwargs): + """ + compare with walk() results. + """ for topdown, followlinks in itertools.product((True, False), repeat=2): - args = support.TESTFN, topdown, None, followlinks + d = {'topdown': topdown, 'followlinks': followlinks} + walk_kwargs.update(d) + fwalk_kwargs.update(d) + expected = {} - for root, dirs, files in os.walk(*args): + for root, dirs, files in os.walk(**walk_kwargs): expected[root] = (set(dirs), set(files)) - for root, dirs, files, rootfd in os.fwalk(*args): + for root, dirs, files, rootfd in os.fwalk(**fwalk_kwargs): self.assertIn(root, expected) self.assertEqual(expected[root], (set(dirs), set(files))) - def test_dir_fd(self): + def test_fwalk_compare_to_walk(self): + kwargs = {'top': support.TESTFN} + self._compare_to_walk(kwargs, kwargs) + + def test_fwalk_use_dir_fd(self): + try: + fd = os.open(".", os.O_RDONLY) + walk_kwargs = {'top': support.TESTFN} + fwalk_kwargs = walk_kwargs.copy() + fwalk_kwargs['dir_fd'] = fd + self._compare_to_walk(walk_kwargs, fwalk_kwargs) + finally: + os.close(fd) + + def test_fwalk_yields_correct_dir_fd(self): # check returned file descriptors for topdown, followlinks in itertools.product((True, False), repeat=2): args = support.TESTFN, topdown, None, followlinks @@ -765,7 +802,7 @@ # check that listdir() returns consistent information self.assertEqual(set(os.listdir(rootfd)), set(dirs) | set(files)) - def test_fd_leak(self): + def test_fwalk_fd_leak(self): # Since we're opening a lot of FDs, we must be careful to avoid leaks: # we both check that calling fwalk() a large number of times doesn't # yield EMFILE, and that the minimum allocated FD hasn't changed.