diff -r 69f90308e327 Doc/library/glob.rst --- a/Doc/library/glob.rst Mon Jan 11 09:21:02 2016 +0200 +++ b/Doc/library/glob.rst Mon Jan 11 10:25:57 2016 +0200 @@ -15,7 +15,7 @@ The :mod:`glob` module finds all the pat according to the rules used by the Unix shell, although results are returned in arbitrary order. No tilde expansion is done, but ``*``, ``?``, and character ranges expressed with ``[]`` will be correctly matched. This is done by using -the :func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and +the :func:`os.scandir` and :func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a subshell. Note that unlike :func:`fnmatch.fnmatch`, :mod:`glob` treats filenames beginning with a dot (``.``) as special cases. (For tilde and shell variable expansion, use :func:`os.path.expanduser` and diff -r 69f90308e327 Doc/whatsnew/3.6.rst --- a/Doc/whatsnew/3.6.rst Mon Jan 11 09:21:02 2016 +0200 +++ b/Doc/whatsnew/3.6.rst Mon Jan 11 10:25:57 2016 +0200 @@ -167,6 +167,10 @@ Optimizations * Optimize :meth:`bytes.fromhex` and :meth:`bytearray.fromhex`: they are now between 2x and 3.5x faster. (Contributed by Victor Stinner in :issue:`25401`). +* Optimized :func:`~glob.glob` and :func:`~glob.iglob` functions in the + :mod:`glob` module; they are now about 3--6 times faster. + (Contributed by Serhiy Storchaka in :issue:`25596`). + Build and C API Changes ======================= diff -r 69f90308e327 Lib/glob.py --- a/Lib/glob.py Mon Jan 11 09:21:02 2016 +0200 +++ b/Lib/glob.py Mon Jan 11 10:25:57 2016 +0200 @@ -30,15 +30,16 @@ def iglob(pathname, *, recursive=False): If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ - it = _iglob(pathname, recursive) + it = _iglob(pathname, recursive, False) if recursive and _isrecursive(pathname): s = next(it) # skip empty string assert not s return it -def _iglob(pathname, recursive): +def _iglob(pathname, recursive, dironly): dirname, basename = os.path.split(pathname) if not has_magic(pathname): + assert not dironly if basename: if os.path.lexists(pathname): yield pathname @@ -49,47 +50,39 @@ def _iglob(pathname, recursive): return if not dirname: if recursive and _isrecursive(basename): - yield from glob2(dirname, basename) + yield from _glob2(dirname, basename, dironly) else: - yield from glob1(dirname, basename) + yield from _glob1(dirname, basename, dironly) return # `os.path.split()` returns the argument itself as a dirname if it is a # drive or UNC path. Prevent an infinite recursion if a drive or UNC path # contains magic characters (i.e. r'\\?\C:'). if dirname != pathname and has_magic(dirname): - dirs = _iglob(dirname, recursive) + dirs = _iglob(dirname, recursive, True) else: dirs = [dirname] if has_magic(basename): if recursive and _isrecursive(basename): - glob_in_dir = glob2 + glob_in_dir = _glob2 else: - glob_in_dir = glob1 + glob_in_dir = _glob1 else: - glob_in_dir = glob0 + glob_in_dir = _glob0 for dirname in dirs: - for name in glob_in_dir(dirname, basename): + for name in glob_in_dir(dirname, basename, dironly): yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. -# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# They return a list of basenames. _glob1 accepts a pattern while _glob0 # takes a literal basename (so it only has to check for its existence). -def glob1(dirname, pattern): - if not dirname: - if isinstance(pattern, bytes): - dirname = bytes(os.curdir, 'ASCII') - else: - dirname = os.curdir - try: - names = os.listdir(dirname) - except OSError: - return [] +def _glob1(dirname, pattern, dironly): + names = list(_iterdir(dirname, dironly)) if not _ishidden(pattern): - names = [x for x in names if not _ishidden(x)] + names = (x for x in names if not _ishidden(x)) return fnmatch.filter(names, pattern) -def glob0(dirname, basename): +def _glob0(dirname, basename, dironly): if not basename: # `os.path.split()` returns an empty basename for paths ending with a # directory separator. 'q*x/' should match only directories. @@ -100,30 +93,48 @@ def glob0(dirname, basename): return [basename] return [] +# Following functions are not public but can be used by third-party code. + +def glob0(dirname, pattern): + return _glob0(dirname, pattern, False) + +def glob1(dirname, pattern): + return _glob1(dirname, pattern, False) + # This helper function recursively yields relative pathnames inside a literal # directory. -def glob2(dirname, pattern): +def _glob2(dirname, pattern, dironly): assert _isrecursive(pattern) yield pattern[:0] - yield from _rlistdir(dirname) + yield from _rlistdir(dirname, dironly) -# Recursively yields relative pathnames inside a literal directory. -def _rlistdir(dirname): +# If dironly is false, yields all file names inside a directory. +# If dironly is true, yields only directory names. +def _iterdir(dirname, dironly): if not dirname: if isinstance(dirname, bytes): dirname = bytes(os.curdir, 'ASCII') else: dirname = os.curdir try: - names = os.listdir(dirname) - except os.error: + for entry in os.scandir(dirname): + try: + if not dironly or entry.is_dir(): + yield entry.name + except OSError: + pass + except OSError: return + +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname, dironly): + names = list(_iterdir(dirname, dironly)) for x in names: if not _ishidden(x): yield x path = os.path.join(dirname, x) if dirname else x - for y in _rlistdir(path): + for y in _rlistdir(path, dironly): yield os.path.join(x, y)