# HG changeset patch # User Tim Golden # Date 1352128429 0 # Node ID 4f62bdd4391d9fe2c53e22663e36e121aa29d449 # Parent 48228fb874c199b58fc65ce4eb11df66c78de606 * * * [PATCH 1/2] Curly brace expansion in glob. From 8b5a26c00a7eb423d794154c494289345552a3cc Mon Sep 17 00:00:00 2001 The glob module is now capable of performing brace expansion before filtering the resulting list with fnmatch. Braces may be nested, and the expansion is performed in the same cases as for UNIX shells. For example, the following patterns will be expanded: - {foo,bar} will be expanded to ['foo', 'bar'] - {foo,{bar,baz}} will be expanded to ['foobaz', 'barbaz'] However, the following patterns will be left unexpanded: - foo{} - foo{bar - foo{bar} - foobar} --- Doc/library/glob.rst | 10 ++++--- Lib/glob.py | 69 +++++++++++++++++++++++++++++++++++------------- Lib/test/test_glob.py | 37 ++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 23 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -10,16 +10,13 @@ **Source code:** :source:`Lib/glob.py` -------------- - The :mod:`glob` module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell. No tilde expansion is done, but -``*``, ``?``, and character ranges expressed with ``[]`` will be correctly -matched. This is done by using the :func:`os.listdir` and -:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a -subshell. (For tilde and shell variable expansion, use +``*``, ``?``, character ranges expressed with ``[]`` and list of options +expressed with ``{}`` will be correctly matched. This is done by using the +:func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by +actually invoking a subshell. (For tilde and shell variable expansion, use :func:`os.path.expanduser` and :func:`os.path.expandvars`.) - - .. function:: glob(pathname) Return a possibly-empty list of path names that match *pathname*, which must be @@ -47,8 +44,8 @@ ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] - - + >>> glob.glob('?.{gif,txt}') + ['1.gif', '2.txt'] .. seealso:: Module :mod:`fnmatch` diff --git a/Lib/glob.py b/Lib/glob.py --- a/Lib/glob.py +++ b/Lib/glob.py @@ -14,6 +14,7 @@ """ return list(iglob(pathname)) + def iglob(pathname): """Return an iterator which yields the paths matching a pathname pattern. @@ -24,21 +25,24 @@ if os.path.lexists(pathname): yield pathname return - dirname, basename = os.path.split(pathname) - if not dirname: - yield from glob1(None, basename) - return - if has_magic(dirname): - dirs = iglob(dirname) - else: - dirs = [dirname] - if has_magic(basename): - glob_in_dir = glob1 - else: - glob_in_dir = glob0 - for dirname in dirs: - for name in glob_in_dir(dirname, basename): - yield os.path.join(dirname, name) + pathnames = expand_braces(pathname) + for pathname in pathnames: + dirname, basename = os.path.split(pathname) + if not dirname: + yield from glob1(None, basename) + return + + if has_magic(dirname): + dirs = iglob(dirname) + else: + dirs = [dirname] + if has_magic(basename): + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. # They return a list of basenames. `glob1` accepts a pattern while `glob0` @@ -70,12 +74,31 @@ return [] -magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') - +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]') def has_magic(s): if isinstance(s, bytes): match = magic_check_bytes.search(s) else: match = magic_check.search(s) return match is not None + +brace_matcher = re.compile(r'.*(\{.+?[^\\]\})') +def expand_braces(text): + res = set() + + match = brace_matcher.search(text) + if match is not None: + sub = match.group(1) + open_brace, close_brace = match.span(1) + if "," in sub: + for pat in sub.strip('{}').split(','): + res.update(expand_braces(text[:open_brace] + pat + text[close_brace:])) + + else: + res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:])) + + else: + res.add(text.replace('\\}', '}')) + + return res diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -5,7 +5,7 @@ import os import shutil -class GlobTests(unittest.TestCase): +class GlobTestsBase(unittest.TestCase): def norm(self, *parts): return os.path.normpath(os.path.join(self.tempdir, *parts)) @@ -45,6 +45,8 @@ def assertSequencesEqual_noorder(self, l1, l2): self.assertEqual(set(l1), set(l2)) +class GlobTests(GlobTestsBase): + def test_glob_literal(self): eq = self.assertSequencesEqual_noorder eq(self.glob('a'), [self.norm('a')]) @@ -105,9 +107,67 @@ eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym2'), [self.norm('sym2')]) +class GlobBracesTests(GlobTestsBase): + + def setUp(self): + super(GlobBracesTests, self).setUp() + self.mktemp('c{}d') + self.mktemp('c{deg') + self.mktemp('c{dfg') + self.mktemp('cd{f}g') + self.mktemp('ce{f}g') + self.mktemp('cdf}g') + self.mktemp('cef}g') + + def match_pattern_with_results(self, patterns, paths): + expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]] + actual = [os.path.normpath(g) for g in self.glob(*patterns)] + self.assertSequencesEqual_noorder(actual, expected) + + def test_two_terms(self): + self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]]) + + def test_missing_first_plus_nested(self): + self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']]) + + def test_one_subpath_with_two_file_terms(self): + self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']]) + + def test_two_subpath_terms_with_two_file_terms(self): + self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')]) + + def test_two_subpath_terms_with_wildcard_file_term(self): + self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')]) + + def test_wildcard_subpath_with_file_missing_first_term(self): + self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')]) + + # + # Edge cases where braces should not be expanded + # + def test_empty_braces(self): + self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')]) + + def test_missing_end_brace(self): + self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg'])) + + def test_second_brace_one_term(self): + self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g'])) + + def test_outer_term_missing_first_brace(self): + self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g'])) + + # + # Braces containing folder separators + # + def test_embedded_separator1(self): + self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')]) + + def test_embedded_separator2(self): + self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')]) def test_main(): - run_unittest(GlobTests) + run_unittest(GlobTests, GlobBracesTests) if __name__ == "__main__":