diff -r 95379bf91df4 Doc/library/glob.rst --- a/Doc/library/glob.rst Wed Jan 02 12:32:10 2013 +0200 +++ b/Doc/library/glob.rst Wed Jan 02 14:29:43 2013 +0200 @@ -23,7 +23,7 @@ For example, ``'[?]'`` matches the character ``'?'``. -.. function:: glob(pathname) +.. function:: glob(pathname, *, recursive=False) Return a possibly-empty list of path names that match *pathname*, which must be a string containing a path specification. *pathname* can be either absolute @@ -31,15 +31,23 @@ :file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken symlinks are included in the results (as in the shell). + If *recursive* is true, the pattern ``**`` will match a files and zero or + more directories and subdirectories. If the pattern is followed by a + ``os.sep``, only directories and subdirectories match. -.. function:: iglob(pathname) + .. versionchanged:: 3.4 + Support for recursive globs using ``**``. + + +.. function:: iglob(pathname, recursive=False) Return an :term:`iterator` which yields the same values as :func:`glob` without actually storing them all simultaneously. -For example, consider a directory containing only the following files: -:file:`1.gif`, :file:`2.txt`, and :file:`card.gif`. :func:`glob` will produce +For example, consider a directory containing the following files: +:file:`1.gif`, :file:`2.txt`, :file:`card.gif` and a subdirectory :file:`sub` +which contains only the file :file:`3.txt`. :func:`glob` will produce the following results. Notice how any leading components of the path are preserved. :: @@ -50,6 +58,10 @@ ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] + >>> glob.glob('**/*.txt', recursive=True) + ['2.txt', 'sub/3.txt'] + >>> glob.glob('./**/', recursive=True) + ['./', './sub/'] .. seealso:: diff -r 95379bf91df4 Lib/glob.py --- a/Lib/glob.py Wed Jan 02 12:32:10 2013 +0200 +++ b/Lib/glob.py Wed Jan 02 14:29:43 2013 +0200 @@ -6,19 +6,23 @@ __all__ = ["glob", "iglob"] -def glob(pathname): +def glob(pathname, *, recursive=False): """Return a list of paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. + If recursive is true, the pattern '**' will match a files and zero or + more directories and subdirectories. """ - return list(iglob(pathname)) + return list(iglob(pathname, recursive=recursive)) -def iglob(pathname): +def iglob(pathname, *, recursive=False): """Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. + If recursive is true, the pattern '**' will match a files and zero or + more directories and subdirectories. """ if not has_magic(pathname): if os.path.lexists(pathname): @@ -26,17 +30,23 @@ return dirname, basename = os.path.split(pathname) if not dirname: - yield from glob1(None, basename) + if recursive and basename in ('**', b'**'): + yield from glob2(dirname, basename) + else: + yield from glob1(dirname, basename) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path # contains magic characters (i.e. r'\\?\C:'). if dirname != pathname and has_magic(dirname): - dirs = iglob(dirname) + dirs = iglob(dirname, recursive=recursive) else: dirs = [dirname] if has_magic(basename): - glob_in_dir = glob1 + if recursive and basename in ('**', b'**'): + glob_in_dir = glob2 + else: + glob_in_dir = glob1 else: glob_in_dir = glob0 for dirname in dirs: @@ -72,6 +82,34 @@ return [basename] return [] +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def glob2(dirname, pattern): + assert pattern in ('**', b'**') + if dirname: + yield pattern[:0] + yield from _rlistdir(dirname) + +# Recursively yields relative pathnames inside a literal directory. + +def _rlistdir(dirname): + if not dirname: + if isinstance(dirname, bytes): + dirname = bytes(os.curdir, 'ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except os.error: + return + for x in names: + if not _ishidden(x): + yield x + path = os.path.join(dirname, x) if dirname else x + for y in _rlistdir(path): + yield os.path.join(x, y) + magic_check = re.compile('[*?[]') magic_check_bytes = re.compile(b'[*?[]') diff -r 95379bf91df4 Lib/test/test_glob.py --- a/Lib/test/test_glob.py Wed Jan 02 12:32:10 2013 +0200 +++ b/Lib/test/test_glob.py Wed Jan 02 14:29:43 2013 +0200 @@ -13,6 +13,9 @@ def norm(self, *parts): return os.path.normpath(os.path.join(self.tempdir, *parts)) + def joins(self, *tuples): + return [os.path.join(self.tempdir, *parts) for parts in tuples] + def mktemp(self, *parts): filename = self.norm(*parts) base, file = os.path.split(filename) @@ -38,17 +41,17 @@ def tearDown(self): shutil.rmtree(self.tempdir) - def glob(self, *parts): + def glob(self, *parts, **kwargs): if len(parts) == 1: pattern = parts[0] else: pattern = os.path.join(*parts) p = os.path.join(self.tempdir, pattern) - res = glob.glob(p) - self.assertEqual(list(glob.iglob(p)), res) + res = glob.glob(p, **kwargs) + self.assertEqual(list(glob.iglob(p, **kwargs)), res) bres = [os.fsencode(x) for x in res] - self.assertEqual(glob.glob(os.fsencode(p)), bres) - self.assertEqual(list(glob.iglob(os.fsencode(p))), bres) + self.assertEqual(glob.glob(os.fsencode(p), **kwargs), bres) + self.assertEqual(list(glob.iglob(os.fsencode(p), **kwargs)), bres) return res def assertSequencesEqual_noorder(self, l1, l2): @@ -169,6 +172,61 @@ eq(glob.glob('\\\\*\\*\\'), []) eq(glob.glob(b'\\\\*\\*\\'), []) + def rglob(self, *parts, **kwargs): + return self.glob(*parts, recursive=True, **kwargs) + + def test_recursive_glob(self): + eq = self.assertSequencesEqual_noorder + full = [('ZZZ',), + ('a',), ('a', 'D'), + ('a', 'bcd'), + ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), + ('a', 'bcd', 'efg', 'ha'), + ('aaa',), ('aaa', 'zzzF'), + ('aab',), ('aab', 'F'), + ('sym1',), ('sym2',), + ('sym3',), + ('sym3', 'EF'), + ('sym3', 'efg'), + ('sym3', 'efg', 'ha'), + ] + eq(self.rglob('**'), self.joins(('',), *full)) + eq(self.rglob('.', '**'), self.joins(('.',''), + *(('.',) + i for i in full))) + dirs = [('a', ''), ('a', 'bcd', ''), ('a', 'bcd', 'efg', ''), + ('aaa', ''), ('aab', ''), ('sym3', ''), ('sym3', 'efg', '')] + eq(self.rglob('**', ''), self.joins(('',), *dirs)) + + eq(self.rglob('a', '**'), self.joins( + ('a', ''), ('a', 'D'), ('a', 'bcd'), ('a', 'bcd', 'EF'), + ('a', 'bcd', 'efg'), ('a', 'bcd', 'efg', 'ha'))) + eq(self.rglob('a**'), self.joins(('a',), ('aaa',), ('aab',))) + eq(self.rglob('**', 'EF'), self.joins(('a', 'bcd', 'EF'), + ('sym3', 'EF'))) + eq(self.rglob('**', '*F'), self.joins( + ('a', 'bcd', 'EF'), ('aaa', 'zzzF'), ('aab', 'F'), + ('sym3', 'EF'))) + eq(self.rglob('**', '*F', ''), []) + eq(self.rglob('**', 'bcd', '*'), self.joins( + ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg'))) + eq(self.rglob('a', '**', 'bcd'), self.joins(('a', 'bcd'))) + + predir = os.path.abspath(os.curdir) + try: + os.chdir(self.tempdir) + join = os.path.join + eq(glob.glob('**', recursive=True), [join(*i) for i in full]) + eq(glob.glob(join('**', ''), recursive=True), + [join(*i) for i in dirs]) + eq(glob.glob(join('**','zz*F'), recursive=True), + [join('aaa', 'zzzF')]) + eq(glob.glob('**zz*F', recursive=True), []) + eq(glob.glob(join('**', 'EF'), recursive=True), + [join('a', 'bcd', 'EF'), join('sym3', 'EF')]) + finally: + os.chdir(predir) + def test_main(): run_unittest(GlobTests)