From 8b5a26c00a7eb423d794154c494289345552a3cc Mon Sep 17 00:00:00 2001 From: Mathieu Bridon Date: Sat, 5 Mar 2011 11:13:39 +0800 Subject: [PATCH 1/2] Curly brace expansion in glob. The glob module is now capable of performing brace expansion before filtering the resulting list with fnmatch. Braces may be nested, and the expansion is performed in the same cases as for UNIX shells. For example, the following patterns will be expanded: - {foo,bar} will be expanded to ['foo', 'bar'] - {foo,{bar,baz}} will be expanded to ['foobaz', 'barbaz'] However, the following patterns will be left unexpanded: - foo{} - foo{bar - foo{bar} - foobar} --- Doc/library/glob.rst | 10 ++++--- Lib/glob.py | 69 +++++++++++++++++++++++++++++++++++------------- Lib/test/test_glob.py | 37 ++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 23 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 3d31c11..e695a4a 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -13,10 +13,10 @@ The :mod:`glob` module finds all the pathnames matching a specified pattern according to the rules used by the Unix shell. No tilde expansion is done, but -``*``, ``?``, and character ranges expressed with ``[]`` will be correctly -matched. This is done by using the :func:`os.listdir` and -:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a -subshell. (For tilde and shell variable expansion, use +``*``, ``?``, character ranges expressed with ``[]`` and list of options +expressed with ``{}`` will be correctly matched. This is done by using the +:func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by +actually invoking a subshell. (For tilde and shell variable expansion, use :func:`os.path.expanduser` and :func:`os.path.expandvars`.) @@ -47,6 +47,8 @@ preserved. :: ['1.gif', 'card.gif'] >>> glob.glob('?.gif') ['1.gif'] + >>> glob.glob('?.{gif,txt}') + ['1.gif', '2.txt'] .. seealso:: diff --git a/Lib/glob.py b/Lib/glob.py index c5f5f69..41b67a3 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -25,28 +25,33 @@ def iglob(pathname): if os.path.lexists(pathname): yield pathname return - dirname, basename = os.path.split(pathname) - if not dirname: - for name in glob1(None, basename): - yield name - return - if has_magic(dirname): - dirs = iglob(dirname) - else: - dirs = [dirname] - if has_magic(basename): - glob_in_dir = glob1 - else: - glob_in_dir = glob0 - for dirname in dirs: - for name in glob_in_dir(dirname, basename): - yield os.path.join(dirname, name) + + pathnames = expand_braces(pathname) + for pathname in pathnames: + dirname, basename = os.path.split(pathname) + if not dirname: + for name in glob1(None, basename): + yield name + else: + if has_magic(dirname): + dirs = iglob(dirname) + else: + dirs = [dirname] + if has_magic(basename): + glob_in_dir = glob1 + else: + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) # These 2 helper functions non-recursively glob inside a literal directory. # They return a list of basenames. `glob1` accepts a pattern while `glob0` # takes a literal basename (so it only has to check for its existence). def glob1(dirname, pattern): + res = list() + if not dirname: if isinstance(pattern, bytes): dirname = bytes(os.curdir, 'ASCII') @@ -58,7 +63,9 @@ def glob1(dirname, pattern): return [] if pattern[0] != '.': names = [x for x in names if x[0] != '.'] - return fnmatch.filter(names, pattern) + res.extend(fnmatch.filter(names, pattern)) + + return res def glob0(dirname, basename): if basename == '': @@ -72,8 +79,8 @@ def glob0(dirname, basename): return [] -magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]') def has_magic(s): if isinstance(s, bytes): @@ -81,3 +88,27 @@ def has_magic(s): else: match = magic_check.search(s) return match is not None + +def expand_braces(orig): + r = r'.*(\{.+?[^\\]\})' + p = re.compile(r) + + s = orig[:] + res = list() + + m = p.search(s) + if m is not None: + sub = m.group(1) + open_brace = s.find(sub) + close_brace = open_brace + len(sub) - 1 + if sub.find(',') != -1: + for pat in sub.strip('{}').split(','): + res.extend(expand_braces(s[:open_brace] + pat + s[close_brace+1:])) + + else: + res.extend(expand_braces(s[:open_brace] + sub.replace('}', '\\}') + s[close_brace+1:])) + + else: + res.append(s.replace('\\}', '}')) + + return list(set(res)) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 1560a6b..e64a933 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -25,6 +25,13 @@ class GlobTests(unittest.TestCase): self.mktemp('ZZZ') self.mktemp('a', 'bcd', 'EF') self.mktemp('a', 'bcd', 'efg', 'ha') + self.mktemp('c{}d') + self.mktemp('c{deg') + self.mktemp('c{dfg') + self.mktemp('cd{f}g') + self.mktemp('ce{f}g') + self.mktemp('cdf}g') + self.mktemp('cef}g') if can_symlink(): os.symlink(self.norm('broken'), self.norm('sym1')) os.symlink(self.norm('broken'), self.norm('sym2')) @@ -105,6 +112,36 @@ class GlobTests(unittest.TestCase): eq(self.glob('sym1'), [self.norm('sym1')]) eq(self.glob('sym2'), [self.norm('sym2')]) + def test_glob_curly_braces(self): + eq = self.assertSequencesEqual_noorder + eq(self.glob('a{aa,ab}'), map(self.norm, ['aaa', 'aab'])) + eq(self.glob('a{,a{a,b}}'), map(self.norm, ['a', 'aaa', 'aab'])) + eq(self.glob('a', '{D,bcd}'), map(self.norm, [os.path.join('a', 'D'), + os.path.join('a', 'bcd')])) + eq(self.glob('{aaa,aab}', '{F,zzzF}'), map(self.norm, + [os.path.join('aaa', 'zzzF'), + os.path.join('aab', 'F')])) + eq(self.glob('aa{a,b}', '*F'), map(self.norm, + [os.path.join('aaa', 'zzzF'), + os.path.join('aab', 'F')])) + eq(self.glob('aa?', '{,zzz}F'), map(self.norm, + [os.path.join('aaa', 'zzzF'), + os.path.join('aab', 'F')])) + + # test expansion with folder separators inside the braces + eq(self.glob('a/{D,bcd/{EF,efg}}'), map(self.norm, + [os.path.join('a', 'D'), + os.path.join('a', 'bcd', 'EF'), + os.path.join('a', 'bcd', 'efg')])) + eq(self.glob('aa{a/zzz,b/}F'), map(self.norm, + [os.path.join('aaa', 'zzzF'), + os.path.join('aab', 'F')])) + + # test some edge cases where braces must not be expanded + eq(self.glob('c{}d'), [self.norm('c{}d')]) + eq(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg'])) + eq(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g'])) + eq(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g'])) def test_main(): run_unittest(GlobTests) -- 1.7.4