| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 """Filename globbing utility.""" | 1 """Filename globbing utility.""" |
| 2 | 2 |
| 3 import os | 3 import os |
| 4 import re | 4 import re |
| 5 import fnmatch | 5 import fnmatch |
| 6 | 6 |
| 7 __all__ = ["glob", "iglob"] | 7 __all__ = ["glob", "iglob"] |
| 8 | |
| 8 | 9 |
| 9 def glob(pathname): | 10 def glob(pathname): |
| 10 """Return a list of paths matching a pathname pattern. | 11 """Return a list of paths matching a pathname pattern. |
| 11 | 12 |
| 12 The pattern may contain simple shell-style wildcards a la fnmatch. | 13 The pattern may contain simple shell-style wildcards a la fnmatch. |
| 13 | 14 |
|
storchaka
2012/11/04 18:43:53
The pattern may also contain '**' to denote a recu
| |
| 14 """ | 15 """ |
| 15 return list(iglob(pathname)) | 16 return list(iglob(pathname)) |
| 16 | 17 |
| 18 | |
| 17 def iglob(pathname): | 19 def iglob(pathname): |
| 20 """Return an iterator which yields the paths matching a pathname pattern. | |
| 21 | |
| 22 The pattern may contain simple shell-style wildcards a la fnmatch. | |
| 23 | |
| 24 The pattern may also contain '**' to denote a recursion root. | |
| 25 | |
| 26 """ | |
| 27 if '**' in pathname: | |
| 28 # More than one appearance of '**' is redundant as we would | |
| 29 # walk there anyway | |
| 30 recurse_root, pattern = pathname.split('**', 1) | |
|
storchaka
2012/11/04 18:43:53
What if ** happened inside a component name? I.e.
| |
| 31 recurse_root += '*/' | |
| 32 recurse_dirs = _glob_simple(recurse_root) | |
| 33 for path in recurse_dirs: | |
| 34 # Cases: | |
| 35 # pattern is '', prefix a '*' to match anything | |
| 36 # pattern is 'asdf/*asdf**/asdf', prefix a '*' to match any base | |
| 37 # path. | |
| 38 # pattern is '/asdf', in this case we started with 'base**/asdf' so | |
| 39 # we don't want to require a level of separation | |
| 40 # like 'base/*/asdf'. | |
| 41 pattern = '*' + pattern.lstrip('/') | |
|
storchaka
2012/11/04 18:43:53
This can be moved outside a loop.
| |
| 42 yield from _rglob(os.path.join(path, pattern), path) | |
| 43 else: | |
| 44 yield from _glob_simple(pathname) | |
| 45 | |
| 46 | |
| 47 def _glob_simple(pathname): | |
| 18 """Return an iterator which yields the paths matching a pathname pattern. | 48 """Return an iterator which yields the paths matching a pathname pattern. |
| 19 | 49 |
| 20 The pattern may contain simple shell-style wildcards a la fnmatch. | 50 The pattern may contain simple shell-style wildcards a la fnmatch. |
| 21 | 51 |
| 22 """ | 52 """ |
| 23 if not has_magic(pathname): | 53 if not has_magic(pathname): |
| 24 if os.path.lexists(pathname): | 54 if os.path.lexists(pathname): |
| 25 yield pathname | 55 yield pathname |
| 26 return | 56 return |
| 27 dirname, basename = os.path.split(pathname) | 57 dirname, basename = os.path.split(pathname) |
| 28 if not dirname: | 58 if not dirname: |
| 29 for name in glob1(None, basename): | 59 for name in _listdir_pattern(None, basename): |
| 30 yield name | 60 yield name |
| 31 return | 61 return |
| 32 if has_magic(dirname): | 62 if has_magic(dirname): |
| 33 dirs = iglob(dirname) | 63 dirs = _glob_simple(dirname) |
| 34 else: | 64 else: |
| 35 dirs = [dirname] | 65 dirs = [dirname] |
| 36 if has_magic(basename): | 66 if has_magic(basename): |
| 37 glob_in_dir = glob1 | 67 glob_in_dir = _listdir_pattern |
| 38 else: | 68 else: |
| 39 glob_in_dir = glob0 | 69 glob_in_dir = _listdir_basename |
| 40 for dirname in dirs: | 70 for dirname in dirs: |
| 41 for name in glob_in_dir(dirname, basename): | 71 for name in glob_in_dir(dirname, basename): |
| 42 yield os.path.join(dirname, name) | 72 yield os.path.join(dirname, name) |
| 43 | 73 |
| 44 # These 2 helper functions non-recursively glob inside a literal directory. | |
|
storchaka
2012/11/04 18:43:53
Why are you removed this comment?
| |
| 45 # They return a list of basenames. `glob1` accepts a pattern while `glob0` | |
| 46 # takes a literal basename (so it only has to check for its existence). | |
| 47 | 74 |
| 48 def glob1(dirname, pattern): | 75 def _rglob(pattern, dirbase): |
| 76 """Recursively walk dirbase and yield pattern matches""" | |
| 77 for root, dirnames, filenames in os.walk(dirbase): | |
| 78 for fname in filenames: | |
| 79 fullpath = os.path.normpath(os.path.join(root, fname)) | |
|
storchaka
2012/11/04 18:43:53
os.path.normpath('///etc') is '/etc', but glob.glo
| |
| 80 if fnmatch.fnmatch(fullpath, pattern): | |
| 81 yield fullpath | |
| 82 | |
| 83 | |
| 84 def _listdir_pattern(dirname, pattern): | |
| 49 if not dirname: | 85 if not dirname: |
| 50 if isinstance(pattern, bytes): | 86 if isinstance(pattern, bytes): |
| 51 dirname = bytes(os.curdir, 'ASCII') | 87 dirname = bytes(os.curdir, 'ASCII') |
| 52 else: | 88 else: |
| 53 dirname = os.curdir | 89 dirname = os.curdir |
| 54 try: | 90 try: |
| 55 names = os.listdir(dirname) | 91 names = os.listdir(dirname) |
| 56 except os.error: | 92 except os.error: |
| 57 return [] | 93 return [] |
| 58 if pattern[0] != '.': | 94 if pattern[0] != '.': |
| 59 names = [x for x in names if x[0] != '.'] | 95 names = [x for x in names if x[0] != '.'] |
| 60 return fnmatch.filter(names, pattern) | 96 return fnmatch.filter(names, pattern) |
| 61 | 97 |
| 62 def glob0(dirname, basename): | 98 |
| 99 def _listdir_basename(dirname, basename): | |
| 63 if basename == '': | 100 if basename == '': |
| 64 # `os.path.split()` returns an empty basename for paths ending with a | 101 # `os.path.split()` returns an empty basename for paths ending with a |
| 65 # directory separator. 'q*x/' should match only directories. | 102 # directory separator. 'q*x/' should match only directories. |
| 66 if os.path.isdir(dirname): | 103 if os.path.isdir(dirname): |
| 67 return [basename] | 104 return [basename] |
| 68 else: | 105 else: |
| 69 if os.path.lexists(os.path.join(dirname, basename)): | 106 if os.path.lexists(os.path.join(dirname, basename)): |
| 70 return [basename] | 107 return [basename] |
| 71 return [] | 108 return [] |
| 72 | 109 |
| 73 | 110 |
| 74 magic_check = re.compile('[*?[]') | 111 magic_check = re.compile('[*?[]') |
| 75 magic_check_bytes = re.compile(b'[*?[]') | 112 magic_check_bytes = re.compile(b'[*?[]') |
| 113 | |
| 76 | 114 |
| 77 def has_magic(s): | 115 def has_magic(s): |
| 78 if isinstance(s, bytes): | 116 if isinstance(s, bytes): |
| 79 match = magic_check_bytes.search(s) | 117 match = magic_check_bytes.search(s) |
| 80 else: | 118 else: |
| 81 match = magic_check.search(s) | 119 match = magic_check.search(s) |
| 82 return match is not None | 120 return match is not None |
| OLD | NEW |