diff -r ece75a3b942c Doc/library/glob.rst --- a/Doc/library/glob.rst Wed Dec 05 17:59:29 2012 +0200 +++ b/Doc/library/glob.rst Wed Dec 05 19:44:50 2012 +0200 @@ -23,7 +23,7 @@ For example, ``'[?]'`` matches the character ``'?'``. -.. function:: glob(pathname) +.. function:: glob(pathname, *, recursive=False) Return a possibly-empty list of path names that match *pathname*, which must be a string containing a path specification. *pathname* can be either absolute @@ -31,15 +31,23 @@ :file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken symlinks are included in the results (as in the shell). + If *recursive* is true, the pattern ``**`` will match a files and zero or + more directories and subdirectories. If the pattern is followed by a + ``os.sep``, only directories and subdirectories match. -.. function:: iglob(pathname) + .. versionchanged:: 3.4 + Support for recursive globs using ``**``. + + +.. function:: iglob(pathname, recursive=False) Return an :term:`iterator` which yields the same values as :func:`glob` without actually storing them all simultaneously. -For example, consider a directory containing only the following files: -:file:`1.gif`, :file:`2.txt`, and :file:`card.gif`. :func:`glob` will produce +For example, consider a directory containing the following files: +:file:`1.gif`, :file:`2.txt`, :file:`card.gif` and a subdirectory :file:`sub` +which contains only the file :file:`3.txt`. :func:`glob` will produce the following results. Notice how any leading components of the path are preserved. :: @@ -50,6 +58,10 @@ ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] + >>> glob.glob('**/*.txt', recursive=True) + ['2.txt', 'sub/3.txt'] + >>> glob.glob('./**/', recursive=True) + ['./', './sub/'] .. seealso:: diff -r ece75a3b942c Lib/glob.py --- a/Lib/glob.py Wed Dec 05 17:59:29 2012 +0200 +++ b/Lib/glob.py Wed Dec 05 19:44:50 2012 +0200 @@ -6,19 +6,23 @@ __all__ = ["glob", "iglob"] -def glob(pathname): +def glob(pathname, *, recursive=False): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. + If recursive is true, the pattern '**' will match a files and zero or + more directories and subdirectories. """ - return list(iglob(pathname)) + return list(iglob(pathname, recursive=recursive)) -def iglob(pathname): +def iglob(pathname, *, recursive=False): """Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. + If recursive is true, the pattern '**' will match a files and zero or + more directories and subdirectories. """ if not has_magic(pathname): if os.path.lexists(pathname): @@ -26,14 +30,20 @@ return dirname, basename = os.path.split(pathname) if not dirname: - yield from glob1(None, basename) + if recursive and basename in ('**', b'**'): + yield from glob2(dirname, basename) + else: + yield from glob1(dirname, basename) return if has_magic(dirname): - dirs = iglob(dirname) + dirs = iglob(dirname, recursive=recursive) else: dirs = [dirname] if has_magic(basename): - glob_in_dir = glob1 + if recursive and basename in ('**', b'**'): + glob_in_dir = glob2 + else: + glob_in_dir = glob1 else: glob_in_dir = glob0 for dirname in dirs: @@ -54,8 +64,8 @@ names = os.listdir(dirname) except os.error: return [] - if pattern[0] != '.': - names = [x for x in names if x[0] != '.'] + if not _ishidden(pattern): + names = [x for x in names if not _ishidden(x)] return fnmatch.filter(names, pattern) def glob0(dirname, basename): @@ -69,6 +79,34 @@ return [basename] return [] +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def glob2(dirname, pattern): + assert pattern in ('**', b'**') + if dirname: + yield pattern[:0] + yield from _rlistdir(dirname) + +# Recursively yields relative pathnames inside a literal directory. + +def _rlistdir(dirname): + if not dirname: + if isinstance(dirname, bytes): + dirname = bytes(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return + for x in names: + if not _ishidden(x): + yield x + path = os.path.join(dirname, x) if dirname else x + for y in _rlistdir(path): + yield os.path.join(x, y) + magic_check = re.compile('[*?[]') magic_check_bytes = re.compile(b'[*?[]') @@ -79,3 +117,6 @@ else: match = magic_check.search(s) return match is not None + +def _ishidden(path): + return path[0] in ('.', b'.'[0]) diff -r ece75a3b942c Lib/test/test_glob.py --- a/Lib/test/test_glob.py Wed Dec 05 17:59:29 2012 +0200 +++ b/Lib/test/test_glob.py Wed Dec 05 19:44:50 2012 +0200 @@ -10,6 +10,9 @@ def norm(self, *parts): return os.path.normpath(os.path.join(self.tempdir, *parts)) + def joins(self, *tuples): + return [os.path.join(self.tempdir, *parts) for parts in tuples] + def mktemp(self, *parts): filename = self.norm(*parts) base, file = os.path.split(filename) @@ -25,25 +28,34 @@ self.mktemp('ZZZ') self.mktemp('a', 'bcd', 'EF') self.mktemp('a', 'bcd', 'efg', 'ha') + self.mktemp('.hidden') + self.mktemp('a', '.hidden') if can_symlink(): os.symlink(self.norm('broken'), self.norm('sym1')) - os.symlink(self.norm('broken'), self.norm('sym2')) + os.symlink('broken', self.norm('sym2')) + os.symlink(os.path.join('a', 'bcd'), self.norm('sym3')) def tearDown(self): shutil.rmtree(self.tempdir) - def glob(self, *parts): + def glob(self, *parts, **kwargs): if len(parts) == 1: pattern = parts[0] else: pattern = os.path.join(*parts) p = os.path.join(self.tempdir, pattern) - res = glob.glob(p) - self.assertEqual(list(glob.iglob(p)), res) + res = glob.glob(p, **kwargs) + self.assertEqual(list(glob.iglob(p, **kwargs)), res) + bres = [x.encode() for x in res] + self.assertEqual(glob.glob(p.encode(), **kwargs), bres) + self.assertEqual(list(glob.iglob(p.encode(), **kwargs)), bres) return res def assertSequencesEqual_noorder(self, l1, l2): + l1 = list(l1) + l2 = list(l2) self.assertEqual(set(l1), set(l2)) + self.assertEqual(sorted(l1), sorted(l2)) def test_glob_literal(self): eq = self.assertSequencesEqual_noorder @@ -97,14 +109,83 @@ self.assertEqual(len(res), 1) # either of these results are reasonable self.assertIn(res[0], [self.tempdir, self.tempdir + os.sep]) + eq = self.assertSequencesEqual_noorder + eq(self.glob('a*', ''), [os.path.join(self.tempdir, x, '') + for x in ['a', 'aaa', 'aab']]) + + @skip_unless_symlink + def test_glob_symlinks(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('sym3'), [self.norm('sym3')]) + eq(self.glob('sym3', '*'), [self.norm('sym3', 'EF'), + self.norm('sym3', 'efg')]) + eq(self.glob('*', '*F'), [self.norm('aaa', 'zzzF'), + self.norm('aab', 'F'), self.norm('sym3', 'EF')]) @skip_unless_symlink def test_glob_broken_symlinks(self): eq = self.assertSequencesEqual_noorder - eq(self.glob('sym*'), [self.norm('sym1'), self.norm('sym2')]) + eq(self.glob('sym*'), [self.norm('sym1'), self.norm('sym2'), + self.norm('sym3')]) eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym2'), [self.norm('sym2')]) + def rglob(self, *parts, **kwargs): + return self.glob(*parts, recursive=True, **kwargs) + + def test_recursive_glob(self): + eq = self.assertSequencesEqual_noorder + #print([tuple(x.split('/')[1:]) for x in self.glob('**')]) + full = [('ZZZ',), + ('a',), ('a', 'D'), + ('a', 'bcd'), + ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), + ('a', 'bcd', 'efg', 'ha'), + ('aaa',), ('aaa', 'zzzF'), + ('aab',), ('aab', 'F'), + ('sym1',), ('sym2',), + ('sym3',), + ('sym3', 'EF'), + ('sym3', 'efg'), + ('sym3', 'efg', 'ha'), + ] + eq(self.rglob('**'), self.joins(('',), *full)) + eq(self.rglob('.', '**'), self.joins(('.',''), + *(('.',) + i for i in full))) + dirs = [('a', ''), ('a', 'bcd', ''), ('a', 'bcd', 'efg', ''), + ('aaa', ''), ('aab', ''), ('sym3', ''), ('sym3', 'efg', '')] + eq(self.rglob('**', ''), self.joins(('',), *dirs)) + + eq(self.rglob('a', '**'), self.joins( + ('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha'))) + eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',))) + eq(self.rglob('**', 'EF'), self.joins(('a', 'bcd', 'EF'), + ('sym3', 'EF'))) + eq(self.rglob('**', '*F'), self.joins( + ('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F'), + ('sym3', 'EF'))) + eq(self.rglob('**', '*F', ''), []) + eq(self.rglob('**', 'bcd', '*'), self.joins( + ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg'))) + eq(self.rglob('a', '**', 'bcd'), self.joins(('a', 'bcd'))) + + predir = os.path.abspath(os.curdir) + try: + os.chdir(self.tempdir) + join = os.path.join + eq(glob.glob('**', recursive=True), [join(*i) for i in full]) + eq(glob.glob(join('**', ''), recursive=True), + [join(*i) for i in dirs]) + eq(glob.glob(join('**','zz*F'), recursive=True), + [join('aaa', 'zzzF')]) + eq(glob.glob('**zz*F', recursive=True), []) + eq(glob.glob(join('**', 'EF'), recursive=True), + [join('a', 'bcd', 'EF'), join('sym3', 'EF')]) + finally: + os.chdir(predir) + def test_main(): run_unittest(GlobTests)