diff --git a/Doc/library/filecmp.rst b/Doc/library/filecmp.rst --- a/Doc/library/filecmp.rst +++ b/Doc/library/filecmp.rst @@ -66,16 +66,25 @@ The :class:`dircmp` class ------------------------- -.. class:: dircmp(a, b, ignore=None, hide=None) +.. class:: dircmp(a, b, ignore=None, hide=None, match=None) - Construct a new directory comparison object, to compare the directories *a* - and *b*. *ignore* is a list of names to ignore, and defaults to - :attr:`filecmp.DEFAULT_IGNORES`. *hide* is a list of names to hide, and - defaults to ``[os.curdir, os.pardir]``. + Construct a new directory comparison object, to compare the + directories *a* and *b*. *ignore* is an iterable of name patterns + to ignore, and defaults to :attr:`filecmp.DEFAULT_IGNORES`. *hide* + is an iterable of additional name patterns to ignore, and defaults + to ``[os.curdir, os.pardir]``. + + *match* is a function used to compare the names of directories and + files to the patterns provided in *ignore* and *hide*. A directory + entry *name* is ignored if ``match(name, pattern)`` evaluates to + ``True`` for any *pattern* in *ignore* or *hide*. If *match* is + ``None``, standard object equality testing is used. The :class:`dircmp` class compares files by doing *shallow* comparisons as described for :func:`filecmp.cmp`. + .. versionchanged:: 3.5 + The :class:`dircmp` class provides the following methods: .. method:: report() @@ -182,9 +191,11 @@ Here is a simplified example of using the ``subdirs`` attribute to search -recursively through two directories to show common different files:: +recursively through two directories to show common different files, +ignoring files ending with ``.tmp``:: >>> from filecmp import dircmp + >>> from fnmatch import fnmatch >>> def print_diff_files(dcmp): ... for name in dcmp.diff_files: ... print("diff_file %s found in %s and %s" % (name, dcmp.left, @@ -192,6 +203,7 @@ ... for sub_dcmp in dcmp.subdirs.values(): ... print_diff_files(sub_dcmp) ... - >>> dcmp = dircmp('dir1', 'dir2') # doctest: +SKIP + >>> dcmp = dircmp('dir1', 'dir2', ignore=('*.tmp',), + ... match=fnmatch) # doctest: +SKIP >>> print_diff_files(dcmp) # doctest: +SKIP diff --git a/Lib/filecmp.py b/Lib/filecmp.py --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -12,7 +12,7 @@ import os import stat -from itertools import filterfalse +from itertools import filterfalse, chain __all__ = ['clear_cache', 'cmp', 'dircmp', 'cmpfiles', 'DEFAULT_IGNORES'] @@ -86,12 +86,14 @@ class dircmp: """A class that manages the comparison of 2 directories. - dircmp(a, b, ignore=None, hide=None) + dircmp(a, b, ignore=None, hide=None, match=None) A and B are directories. - IGNORE is a list of names to ignore, - defaults to DEFAULT_IGNORES. - HIDE is a list of names to hide, - defaults to [os.curdir, os.pardir]. + IGNORE and HIDE are iterables of name patterns to ignore. + IGNORE defaults to DEFAULT_IGNORES, and HIDE defaults to + [os.curdir, os.pardir]. + MATCH is the the function used to compare directory entry + names to the HIDE and IGNORE patterns. It defaults to + string comparison. High level usage: x = dircmp(dir1, dir2) @@ -117,7 +119,7 @@ subdirs: a dictionary of dircmp objects, keyed by names in common_dirs. """ - def __init__(self, a, b, ignore=None, hide=None): # Initialize + def __init__(self, a, b, ignore=None, hide=None, match=None): # Initialize self.left = a self.right = b if hide is None: @@ -128,12 +130,14 @@ self.ignore = DEFAULT_IGNORES else: self.ignore = ignore + self.match = match def phase0(self): # Compare everything except common subdirectories - self.left_list = _filter(os.listdir(self.left), - self.hide+self.ignore) - self.right_list = _filter(os.listdir(self.right), - self.hide+self.ignore) + patterns = set(chain(self.hide, self.ignore)) + self.left_list = _filter(os.listdir(self.left), patterns, + self.match) + self.right_list = _filter(os.listdir(self.right), patterns, + self.match) self.left_list.sort() self.right_list.sort() @@ -191,7 +195,8 @@ for x in self.common_dirs: a_x = os.path.join(self.left, x) b_x = os.path.join(self.right, x) - self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide) + self.subdirs[x] = dircmp(a_x, b_x, self.ignore, self.hide, + self.match) def phase4_closure(self): # Recursively call phase4() on subdirectories self.phase4() @@ -281,10 +286,21 @@ return 2 -# Return a copy with items that occur in skip removed. -# -def _filter(flist, skip): - return list(filterfalse(skip.__contains__, flist)) +def _filter(flist, skip, match=None): + """Return items in *flist* that don't match *skip* + + Matching is done using *match*. If *match* is `None`, + equality comparison is used. + """ + if match is None: + return list(filterfalse(skip.__contains__, flist)) + + def _match_any(name): + for pattern in skip: + if match(name, pattern): + return True + return False + return list(filterfalse(_match_any, flist)) # Demonstration and testing. diff --git a/Lib/test/test_filecmp.py b/Lib/test/test_filecmp.py --- a/Lib/test/test_filecmp.py +++ b/Lib/test/test_filecmp.py @@ -1,6 +1,7 @@ import os, filecmp, shutil, tempfile import unittest from test import support +import fnmatch class FileCompareTestCase(unittest.TestCase): def setUp(self): @@ -145,6 +146,50 @@ self.assertEqual(d.same_files, ['file']) self.assertEqual(d.diff_files, ['file2']) + def _assert_no_differences(self, d): + self.assertEqual(d.left_list, ['file']) + self.assertListEqual(d.right_list, ['file', 'file2']) + self.assertEqual(d.common, ['file']) + self.assertEqual(d.left_only, []) + self.assertEqual(d.right_only, ['file2']) + self.assertEqual(d.same_files, ['file']) + self.assertEqual(d.diff_files, []) + + def test_dircmp_ignore_fnmatch(self): + with open(os.path.join(self.dir, 'file2.tmp'), 'w') as fh: + fh.write('Ignored contents.\n') + + left_dir, right_dir = self.dir, self.dir_diff + d = filecmp.dircmp(self.dir, self.dir_diff, ignore=('*.tmp',), + match=fnmatch.fnmatch) + self._assert_no_differences(d) + + def test_dircmp_ignore_eq(self): + with open(os.path.join(self.dir, 'file2.tmp'), 'w') as fh: + fh.write('Ignored contents.\n') + + left_dir, right_dir = self.dir, self.dir_diff + d = filecmp.dircmp(self.dir, self.dir_diff, ignore=('file2.tmp',), + match=None) + self._assert_no_differences(d) + + def test_dircmp_hide_fnmatch(self): + with open(os.path.join(self.dir, 'file2.tmp'), 'w') as fh: + fh.write('Ignored contents.\n') + + left_dir, right_dir = self.dir, self.dir_diff + d = filecmp.dircmp(self.dir, self.dir_diff, hide=('*.tmp',), + match=fnmatch.fnmatch) + self._assert_no_differences(d) + + def test_dircmp_hide_eq(self): + with open(os.path.join(self.dir, 'file2.tmp'), 'w') as fh: + fh.write('Ignored contents.\n') + + left_dir, right_dir = self.dir, self.dir_diff + d = filecmp.dircmp(self.dir, self.dir_diff, hide=('file2.tmp',), + match=None) + self._assert_no_differences(d) def test_main(): support.run_unittest(FileCompareTestCase, DirCompareTestCase) diff --git a/Misc/ACKS b/Misc/ACKS --- a/Misc/ACKS +++ b/Misc/ACKS @@ -20,6 +20,7 @@ Ali Afshar Jim Ahlstrom Farhan Ahmad +Michael Amrhein Matthew Ahrens Nir Aides Yaniv Aknin @@ -923,6 +924,7 @@ Charles-François Natali Vilmos Nebehaj Fredrik Nehr +Oliver Nelson Tony Nelson Trent Nelson Chad Netzer