Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(3035)

Side by Side Diff: Lib/glob.py

Issue 13968: Add a recursive function to the glob package
Patch Set: Created 1 year, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 """Filename globbing utility.""" 1 """Filename globbing utility."""
2 2
3 import os 3 import os
4 import re 4 import re
5 import fnmatch 5 import fnmatch
6 6
7 __all__ = ["glob", "iglob"] 7 __all__ = ["glob", "iglob"]
8
8 9
9 def glob(pathname): 10 def glob(pathname):
10 """Return a list of paths matching a pathname pattern. 11 """Return a list of paths matching a pathname pattern.
11 12
12 The pattern may contain simple shell-style wildcards a la fnmatch. 13 The pattern may contain simple shell-style wildcards a la fnmatch.
13 14
storchaka 2012/11/04 18:43:53 The pattern may also contain '**' to denote a recu
14 """ 15 """
15 return list(iglob(pathname)) 16 return list(iglob(pathname))
16 17
18
17 def iglob(pathname): 19 def iglob(pathname):
20 """Return an iterator which yields the paths matching a pathname pattern.
21
22 The pattern may contain simple shell-style wildcards a la fnmatch.
23
24 The pattern may also contain '**' to denote a recursion root.
25
26 """
27 if '**' in pathname:
28 # More than one appearance of '**' is redundant as we would
29 # walk there anyway
30 recurse_root, pattern = pathname.split('**', 1)
storchaka 2012/11/04 18:43:53 What if ** happened inside a component name? I.e.
31 recurse_root += '*/'
32 recurse_dirs = _glob_simple(recurse_root)
33 for path in recurse_dirs:
34 # Cases:
35 # pattern is '', prefix a '*' to match anything
36 # pattern is 'asdf/*asdf**/asdf', prefix a '*' to match any base
37 # path.
38 # pattern is '/asdf', in this case we started with 'base**/asdf' so
39 # we don't want to require a level of separation
40 # like 'base/*/asdf'.
41 pattern = '*' + pattern.lstrip('/')
storchaka 2012/11/04 18:43:53 This can be moved outside a loop.
42 yield from _rglob(os.path.join(path, pattern), path)
43 else:
44 yield from _glob_simple(pathname)
45
46
47 def _glob_simple(pathname):
18 """Return an iterator which yields the paths matching a pathname pattern. 48 """Return an iterator which yields the paths matching a pathname pattern.
19 49
20 The pattern may contain simple shell-style wildcards a la fnmatch. 50 The pattern may contain simple shell-style wildcards a la fnmatch.
21 51
22 """ 52 """
23 if not has_magic(pathname): 53 if not has_magic(pathname):
24 if os.path.lexists(pathname): 54 if os.path.lexists(pathname):
25 yield pathname 55 yield pathname
26 return 56 return
27 dirname, basename = os.path.split(pathname) 57 dirname, basename = os.path.split(pathname)
28 if not dirname: 58 if not dirname:
29 for name in glob1(None, basename): 59 for name in _listdir_pattern(None, basename):
30 yield name 60 yield name
31 return 61 return
32 if has_magic(dirname): 62 if has_magic(dirname):
33 dirs = iglob(dirname) 63 dirs = _glob_simple(dirname)
34 else: 64 else:
35 dirs = [dirname] 65 dirs = [dirname]
36 if has_magic(basename): 66 if has_magic(basename):
37 glob_in_dir = glob1 67 glob_in_dir = _listdir_pattern
38 else: 68 else:
39 glob_in_dir = glob0 69 glob_in_dir = _listdir_basename
40 for dirname in dirs: 70 for dirname in dirs:
41 for name in glob_in_dir(dirname, basename): 71 for name in glob_in_dir(dirname, basename):
42 yield os.path.join(dirname, name) 72 yield os.path.join(dirname, name)
43 73
44 # These 2 helper functions non-recursively glob inside a literal directory.
storchaka 2012/11/04 18:43:53 Why are you removed this comment?
45 # They return a list of basenames. `glob1` accepts a pattern while `glob0`
46 # takes a literal basename (so it only has to check for its existence).
47 74
48 def glob1(dirname, pattern): 75 def _rglob(pattern, dirbase):
76 """Recursively walk dirbase and yield pattern matches"""
77 for root, dirnames, filenames in os.walk(dirbase):
78 for fname in filenames:
79 fullpath = os.path.normpath(os.path.join(root, fname))
storchaka 2012/11/04 18:43:53 os.path.normpath('///etc') is '/etc', but glob.glo
80 if fnmatch.fnmatch(fullpath, pattern):
81 yield fullpath
82
83
84 def _listdir_pattern(dirname, pattern):
49 if not dirname: 85 if not dirname:
50 if isinstance(pattern, bytes): 86 if isinstance(pattern, bytes):
51 dirname = bytes(os.curdir, 'ASCII') 87 dirname = bytes(os.curdir, 'ASCII')
52 else: 88 else:
53 dirname = os.curdir 89 dirname = os.curdir
54 try: 90 try:
55 names = os.listdir(dirname) 91 names = os.listdir(dirname)
56 except os.error: 92 except os.error:
57 return [] 93 return []
58 if pattern[0] != '.': 94 if pattern[0] != '.':
59 names = [x for x in names if x[0] != '.'] 95 names = [x for x in names if x[0] != '.']
60 return fnmatch.filter(names, pattern) 96 return fnmatch.filter(names, pattern)
61 97
62 def glob0(dirname, basename): 98
99 def _listdir_basename(dirname, basename):
63 if basename == '': 100 if basename == '':
64 # `os.path.split()` returns an empty basename for paths ending with a 101 # `os.path.split()` returns an empty basename for paths ending with a
65 # directory separator. 'q*x/' should match only directories. 102 # directory separator. 'q*x/' should match only directories.
66 if os.path.isdir(dirname): 103 if os.path.isdir(dirname):
67 return [basename] 104 return [basename]
68 else: 105 else:
69 if os.path.lexists(os.path.join(dirname, basename)): 106 if os.path.lexists(os.path.join(dirname, basename)):
70 return [basename] 107 return [basename]
71 return [] 108 return []
72 109
73 110
74 magic_check = re.compile('[*?[]') 111 magic_check = re.compile('[*?[]')
75 magic_check_bytes = re.compile(b'[*?[]') 112 magic_check_bytes = re.compile(b'[*?[]')
113
76 114
77 def has_magic(s): 115 def has_magic(s):
78 if isinstance(s, bytes): 116 if isinstance(s, bytes):
79 match = magic_check_bytes.search(s) 117 match = magic_check_bytes.search(s)
80 else: 118 else:
81 match = magic_check.search(s) 119 match = magic_check.search(s)
82 return match is not None 120 return match is not None
OLDNEW
« Doc/library/glob.rst ('K') | « Doc/library/glob.rst ('k') | Lib/test/test_glob.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld cbc36f91f3f7