diff -r c820aa9c0c00 Doc/library/glob.rst --- a/Doc/library/glob.rst Fri Apr 20 18:04:03 2012 -0400 +++ b/Doc/library/glob.rst Wed Apr 25 13:11:07 2012 +0300 @@ -25,10 +25,15 @@ Return a possibly-empty list of path names that match *pathname*, which must be a string containing a path specification. *pathname* can be either absolute (like :file:`/usr/src/Python-1.5/Makefile`) or relative (like - :file:`../../Tools/\*/\*.gif`), and can contain shell-style wildcards. Broken + :file:`../../Tools/\*/\*.gif`), can contain shell-style wildcards and + ``**`` can be used to denote a recursive walk of a directory tree. Broken symlinks are included in the results (as in the shell). + .. versionchanged:: 3.3 + Support for recursive globs using ``**``. + + .. function:: iglob(pathname) Return an :term:`iterator` which yields the same values as :func:`glob` diff -r c820aa9c0c00 Lib/glob.py --- a/Lib/glob.py Fri Apr 20 18:04:03 2012 -0400 +++ b/Lib/glob.py Wed Apr 25 13:11:07 2012 +0300 @@ -6,6 +6,7 @@ __all__ = ["glob", "iglob"] + def glob(pathname): """Return a list of paths matching a pathname pattern. @@ -14,11 +15,40 @@ """ return list(iglob(pathname)) + def iglob(pathname): """Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. + The pattern may also contain '**' to denote a recursion root. + + """ + if '**' in pathname: + # More than one appearance of '**' is redundant as we would + # walk there anyway + recurse_root, pattern = pathname.split('**', 1) + recurse_root += '*/' + recurse_dirs = _glob_simple(recurse_root) + for path in recurse_dirs: + # Cases: + # pattern is '', prefix a '*' to match anything + # pattern is 'asdf/*asdf**/asdf', prefix a '*' to match any base + # path. + # pattern is '/asdf', in this case we started with 'base**/asdf' so + # we don't want to require a level of separation + # like 'base/*/asdf'. + pattern = '*' + pattern.lstrip('/') + yield from _rglob(os.path.join(path, pattern), path) + else: + yield from _glob_simple(pathname) + + +def _glob_simple(pathname): + """Return an iterator which yields the paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la fnmatch. + """ if not has_magic(pathname): if os.path.lexists(pathname): @@ -26,26 +56,32 @@ return dirname, basename = os.path.split(pathname) if not dirname: - for name in glob1(None, basename): + for name in _listdir_pattern(None, basename): yield name return if has_magic(dirname): - dirs = iglob(dirname) + dirs = _glob_simple(dirname) else: dirs = [dirname] if has_magic(basename): - glob_in_dir = glob1 + glob_in_dir = _listdir_pattern else: - glob_in_dir = glob0 + glob_in_dir = _listdir_basename for dirname in dirs: for name in glob_in_dir(dirname, basename): yield os.path.join(dirname, name) -# These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. `glob1` accepts a pattern while `glob0` -# takes a literal basename (so it only has to check for its existence). -def glob1(dirname, pattern): +def _rglob(pattern, dirbase): + """Recursively walk dirbase and yield pattern matches""" + for root, dirnames, filenames in os.walk(dirbase): + for fname in filenames: + fullpath = os.path.normpath(os.path.join(root, fname)) + if fnmatch.fnmatch(fullpath, pattern): + yield fullpath + + +def _listdir_pattern(dirname, pattern): if not dirname: if isinstance(pattern, bytes): dirname = bytes(os.curdir, 'ASCII') @@ -59,7 +95,8 @@ names = [x for x in names if x[0] != '.'] return fnmatch.filter(names, pattern) -def glob0(dirname, basename): + +def _listdir_basename(dirname, basename): if basename == '': # `os.path.split()` returns an empty basename for paths ending with a # directory separator. 'q*x/' should match only directories. @@ -74,6 +111,7 @@ magic_check = re.compile('[*?[]') magic_check_bytes = re.compile(b'[*?[]') + def has_magic(s): if isinstance(s, bytes): match = magic_check_bytes.search(s) diff -r c820aa9c0c00 Lib/test/test_glob.py --- a/Lib/test/test_glob.py Fri Apr 20 18:04:03 2012 -0400 +++ b/Lib/test/test_glob.py Wed Apr 25 13:11:07 2012 +0300 @@ -25,6 +25,7 @@ self.mktemp('ZZZ') self.mktemp('a', 'bcd', 'EF') self.mktemp('a', 'bcd', 'efg', 'ha') + self.mktemp('r', 'EF') if can_symlink(): os.symlink(self.norm('broken'), self.norm('sym1')) os.symlink(self.norm('broken'), self.norm('sym2')) @@ -105,6 +106,44 @@ eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym2'), [self.norm('sym2')]) + def test_rglob(self): + eq = self.assertSequencesEqual_noorder + fs = ('aab', 'F'), ('aaa', 'zzzF'), ('a', 'bcd', 'EF'), ('r', 'EF') + expected = [os.path.abspath(self.norm(*i)) for i in fs] + res = glob.glob(os.path.join(self.tempdir, '**F')) + eq(expected, [os.path.abspath(i) for i in res]) + + bcds = ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg', 'ha') + expected = [os.path.abspath(self.norm(*i)) for i in bcds] + pat = os.path.join(self.tempdir, '**/bcd/*') + res = glob.glob(pat) + eq(expected, [os.path.abspath(i) for i in res]) + + expected = [] + pat = os.path.join(self.tempdir, 'a/**/bcd') + res = glob.glob(pat) + eq(expected, [os.path.abspath(i) for i in res]) + + predir = os.path.abspath(os.curdir) + try: + os.chdir(self.norm('.')) + expected = [os.path.join('aaa', 'zzzF')] + res = glob.glob('**zz*F') + eq(expected, res) + + efs = ('r', 'EF'), ('a', 'bcd', 'EF') + expected = [os.path.join(*i) for i in efs] + res = glob.glob('**EF') + eq(expected, res) + + # shouldn't be yielded: deep = os.path.join('a', 'bcd', 'efg', 'ha') + expected = [] + res = glob.glob('*efg/ha') + eq(expected, res) + finally: + os.chdir(predir) + + def test_main(): run_unittest(GlobTests)