Message147128
I should probably update that posted recipe to my latest version (which adds "excluded_files" and "excluded_dirs" parameters).
However, since I've been dealing with remote filesystems where os.listdir() and os.stat() calls from the local machine aren't possible lately, I also think we may need to reconsider how this is structured and look at the idea of building a more effective pipeline model that permits more efficient modes of interaction.
Let's take 'os.walk' as the base primitive - the basis of the pipeline will always be an iterator that produces 3-tuples of a base name, a list of subdirectories and a list of files. The filtering pipeline elements will require that the underlying walk include "topdown=True" and pay attention to changes in the subdirectory list.
Then consider the following possible pipeline elements:
def filter_dirs(walk_iter, *include_filters, exclude_filters=()):
def should_include(dirname):
return any(fnmatch(dirname, include) for include in include_filters)
def should_exclude(dirname):
return any(fnmatch(dirname, include) for exclude in exclude_filters)
for dirpath, subdirs, files in walk_iter:
subdirs[:] = [subdir for subdir in subdirs
if should_include(subdir) and not should_exclude(subdir)]
yield dirpath, subdirs, files
def filter_files(walk_iter, *include_filters, exclude_filters=()):
def should_include(dirname):
return any(fnmatch(dirname, include) for include in include_filters)
def should_exclude(dirname):
return any(fnmatch(dirname, include) for exclude in exclude_filters)
for dirpath, subdirs, files in walk_iter:
files[:] = [fname for fname in files
if should_include(fname) and not should_exclude(fname)]
yield dirpath, subdirs, files
def limit_depth(walk_iter, depth):
if depth < 0:
msg = "Depth limit greater than 0 ({!r} provided)"
raise ValueError(msg.format(depth))
sep = os.sep
for top, subdirs, files in walk_iter:
yield top, subdirs, files
initial_depth = top.count(sep)
if depth == 0:
subdirs[:] = []
break
for dirpath, subdirs, files in walk_iter:
yield dirpath, subdirs, files
current_depth = dirpath.count(sep) - initial_depth
if current_depth >= depth:
subdirs[:] = []
def detect_symlink_loops(walk_iter, onloop=None):
if onloop is None:
def onloop(path):
msg = "Symlink {!r} refers to a parent directory, skipping\n"
sys.stderr.write(msg.format(path))
sys.stderr.flush()
for top, subdirs, files in walk_iter:
yield top, subdirs, files
real_top = os.path.abspath(os.path.realpath(top))
break
for dirpath, subdirs, files in walk_iter:
if os.path.islink(dirpath):
# We just descended into a directory via a symbolic link
# Check if we're referring to a directory that is
# a parent of our nominal directory
relative = os.path.relpath(dirpath, top)
nominal_path = os.path.join(real_top, relative)
real_path = os.path.abspath(os.path.realpath(dirpath))
path_fragments = zip(nominal_path.split(sep), real_path.split(sep))
for nominal, real in path_fragments:
if nominal != real:
break
else:
if not onloop(dirpath):
subdirs[:] = []
continue
yield dirpath, subdirs, files
And pipeline terminators:
def walk_dirs(walk_iter):
for dirpath, subdirs, files in walk_iter:
yield dirpath
def walk_files(walk_iter):
for dirpath, subdirs, files in walk_iter:
for fname in files:
yield os.path.join(dirpath, fname)
def walk_all(walk_iter):
for dirpath, subdirs, files in walk_iter:
yield dirpath
for fname in files:
yield os.path.join(dirpath, fname)
The pipeline terminators could then be combined with ordinary iterable consumers like comprehensions:
base_walk = detect_symlink_loops(os.walk(os.path.abspath(base_dir, followlinks=True)))
depth_limited_walk = limit_depth(base_walk, 2)
filtered_walk = filter_dirs(filter_files(depth_limited_walk, "*.py"), "*.pyp")
tree_info = {path, os.stat(path) for path in walk_all(filtered_walk)} |
|
Date |
User |
Action |
Args |
2011-11-06 00:43:33 | ncoghlan | set | recipients:
+ ncoghlan, vstinner, eric.araujo |
2011-11-06 00:43:33 | ncoghlan | set | messageid: <1320540213.89.0.365463113335.issue13229@psf.upfronthosting.co.za> |
2011-11-06 00:43:33 | ncoghlan | link | issue13229 messages |
2011-11-06 00:43:32 | ncoghlan | create | |
|