diff -r d891ad8aeb80 Doc/library/os.path.rst --- a/Doc/library/os.path.rst Sat Jul 12 18:26:03 2014 +0300 +++ b/Doc/library/os.path.rst Sun Jul 13 21:30:38 2014 +0300 @@ -66,11 +66,24 @@ empty string (``''``). +.. function:: commonpath(paths) + + Return the longest common sub-path of each pathname in the sequence + *paths*. Raise ValueError if *paths* contains both absolute and relative + pathnames, or if *paths* is empty. Unlike :func:`commonprefix`, this + returns a valid path. + + Availability: Unix, Windows + + .. versionadded:: 3.5 + + .. function:: commonprefix(list) - Return the longest path prefix (taken character-by-character) that is a prefix - of all paths in *list*. If *list* is empty, return the empty string (``''``). - Note that this may return invalid paths because it works a character at a time. + Return the longest path prefix (taken character-by-character) that is a + prefix of all paths in *list*. If *list* is empty, return the empty string + (``''``). Note that this may return invalid paths because it works a + character at a time. To obtain a valid path, see :func:`commonpath`. .. function:: dirname(path) diff -r d891ad8aeb80 Lib/ntpath.py --- a/Lib/ntpath.py Sat Jul 12 18:26:03 2014 +0300 +++ b/Lib/ntpath.py Sun Jul 13 21:30:38 2014 +0300 @@ -17,7 +17,7 @@ "ismount", "expanduser","expandvars","normpath","abspath", "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", - "samefile", "sameopenfile", "samestat",] + "samefile", "sameopenfile", "samestat", "commonpath"] # strings representing various path-related bits and pieces # These are primarily for export; internally, they are hardcoded. @@ -597,6 +597,72 @@ return join(*rel_list) +# Return the longest common sub-path of the sequence of paths given as input. +# The function is case-insensitive and 'separator-insensitive', i.e. if the +# only difference between two paths is the use of '\' versus '/' as separator, +# they are deemed to be equal. +# +# However, the returned path will have the standard '\' separator (even if the +# given paths had the alternative '/' separator) and will have the case of the +# first path given in the sequence. Additionally, any trailing separator is +# stripped from the returned path. + +def commonpath(paths): + """Given a sequence of path names, returns the longest common sub-path.""" + + if not paths: + raise ValueError('commonpath() arg is an empty sequence') + + if isinstance(paths[0], bytes): + sep = b'\\' + altsep = b'/' + curdir = b'.' + else: + sep = '\\' + altsep = '/' + curdir = '.' + + drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths] + + try: + split_paths = [p.split(sep) for d, p in drivesplits] + except TypeError: + valid_types = all(isinstance(p, (str, bytes)) for p in paths) + if valid_types: + # Must have a mixture of text and binary data. + raise TypeError("Can't mix strings and bytes in paths") from None + raise + + if len(set(p[:1] == sep for d, p in drivesplits)) != 1: + raise ValueError("Can't mix absolute and relative paths") + + # Check that all drive letters or UNC paths match. The check is made only + # now otherwise type errors for mixing strings and bytes would not be + # caught. + if len(set(d for d, p in drivesplits)) != 1: + raise ValueError("Paths don't have the same drive") + + drive, path = splitdrive(paths[0].replace(altsep, sep)) + common = path.split(sep) + common = [c for c in common if c and c != curdir] + + split_paths = [[c for c in s if c and c != curdir] for s in split_paths] + s1 = min(split_paths) + s2 = max(split_paths) + for i, c in enumerate(s1): + if c != s2[i]: + common = common[:i] + break + else: + common = common[:len(s1)] + + prefix = drive + sep if isabs(paths[0]) else drive + if not common: + return prefix + else: + return prefix + sep.join(common) + + # determine if two files are in fact the same file try: # GetFinalPathNameByHandle is available starting with Windows 6.0. diff -r d891ad8aeb80 Lib/posixpath.py --- a/Lib/posixpath.py Sat Jul 12 18:26:03 2014 +0300 +++ b/Lib/posixpath.py Sun Jul 13 21:30:38 2014 +0300 @@ -22,7 +22,8 @@ "ismount", "expanduser","expandvars","normpath","abspath", "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames","relpath"] + "devnull","realpath","supports_unicode_filenames","relpath", + "commonpath"] # Strings representing various path-related bits and pieces. # These are primarily for export; internally, they are hardcoded. @@ -457,3 +458,49 @@ if not rel_list: return curdir return join(*rel_list) + + +# Return the longest common sub-path of the sequence of paths given as input. +# The paths are not normalized before comparing them (this is the +# responsibility of the caller). Any trailing separator is stripped from the +# returned path. + +def commonpath(paths): + """Given a sequence of path names, returns the longest common sub-path.""" + + if not paths: + raise ValueError('commonpath() arg is an empty sequence') + + if isinstance(paths[0], bytes): + sep = b'/' + curdir = b'.' + else: + sep = '/' + curdir = '.' + + try: + split_paths = [path.split(sep) for path in paths] + except TypeError: + valid_types = all(isinstance(p, (str, bytes)) for p in paths) + if valid_types: + # Must have a mixture of text and binary data + raise TypeError("Can't mix strings and bytes in paths") from None + raise + + if len(set(p[:1] == sep for p in paths)) != 1: + raise ValueError("Can't mix absolute and relative paths") + + split_paths = [[c for c in s if c and c != curdir] for s in split_paths] + s1 = min(split_paths) + s2 = max(split_paths) + common = s1 + for i, c in enumerate(s1): + if c != s2[i]: + common = s1[:i] + break + + prefix = sep if isabs(paths[0]) else sep[:0] + if not common: + return prefix + else: + return prefix + sep.join(common) diff -r d891ad8aeb80 Lib/test/test_ntpath.py --- a/Lib/test/test_ntpath.py Sat Jul 12 18:26:03 2014 +0300 +++ b/Lib/test/test_ntpath.py Sun Jul 13 21:30:38 2014 +0300 @@ -328,6 +328,75 @@ tester('ntpath.relpath("/a/b", "/a/b")', '.') tester('ntpath.relpath("c:/foo", "C:/FOO")', '.') + def test_commonpath(self): + def check(paths, expected): + tester(('ntpath.commonpath(%r)' % paths).replace('\\\\', '\\'), + expected) + def check_error(exc, paths): + self.assertRaises(exc, ntpath.commonpath, paths) + self.assertRaises(exc, ntpath.commonpath, + [os.fsencode(p) for p in paths]) + + self.assertRaises(ValueError, ntpath.commonpath, []) + check_error(ValueError, ['C:\\Program Files', 'Program Files']) + check_error(ValueError, ['C:\\Program Files', 'C:Program Files']) + check_error(ValueError, ['\\Program Files', 'Program Files']) + check_error(ValueError, ['Program Files', 'C:\\Program Files']) + check(['C:\\Program Files'], 'C:\\Program Files') + check(['C:\\Program Files', 'C:\\Program Files'], 'C:\\Program Files') + check(['C:\\Program Files\\', 'C:\\Program Files'], + 'C:\\Program Files') + check(['C:\\Program Files\\', 'C:\\Program Files\\'], + 'C:\\Program Files') + check(['C:\\\\Program Files', 'C:\\Program Files\\\\'], + 'C:\\Program Files') + check(['C:\\.\\Program Files', 'C:\\Program Files\\.'], + 'C:\\Program Files') + check(['C:\\', 'C:\\bin'], 'C:\\') + check(['C:\\Program Files', 'C:\\bin'], 'C:\\') + check(['C:\\Program Files', 'C:\\Program Files\\Bar'], + 'C:\\Program Files') + check(['C:\\Program Files\\Foo', 'C:\\Program Files\\Bar'], + 'C:\\Program Files') + check(['C:\\Program Files', 'C:\\Projects'], 'C:\\') + check(['C:\\Program Files\\', 'C:\\Projects'], 'C:\\') + + check(['C:\\Program Files\\Foo', 'C:/Program Files/Bar'], + 'C:\\Program Files') + check(['C:\\Program Files\\Foo', 'c:/program files/bar'], + 'C:\\Program Files') + check(['c:/program files/bar', 'C:\\Program Files\\Foo'], + 'c:\\program files') + + check_error(ValueError, ['C:\\Program Files', 'D:\\Program Files']) + + check(['spam'], 'spam') + check(['spam', 'spam'], 'spam') + check(['spam', 'alot'], '') + check(['and\\jam', 'and\\spam'], 'and') + check(['and\\\\jam', 'and\\spam\\\\'], 'and') + check(['and\\.\\jam', '.\\and\\spam'], 'and') + check(['and\\jam', 'and\\spam', 'alot'], '') + check(['and\\jam', 'and\\spam', 'and'], 'and') + check(['C:and\\jam', 'C:and\\spam'], 'C:and') + + check([''], '') + check(['', 'spam\\alot'], '') + check_error(ValueError, ['', '\\spam\\alot']) + + self.assertRaises(TypeError, ntpath.commonpath, + [b'C:\\Program Files', 'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + [b'C:\\Program Files', 'Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + [b'Program Files', 'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['C:\\Program Files', b'C:\\Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['C:\\Program Files', b'Program Files\\Foo']) + self.assertRaises(TypeError, ntpath.commonpath, + ['Program Files', b'C:\\Program Files\\Foo']) + def test_sameopenfile(self): with TemporaryFile() as tf1, TemporaryFile() as tf2: # Make sure the same file is really the same diff -r d891ad8aeb80 Lib/test/test_posixpath.py --- a/Lib/test/test_posixpath.py Sat Jul 12 18:26:03 2014 +0300 +++ b/Lib/test/test_posixpath.py Sun Jul 13 21:30:38 2014 +0300 @@ -538,6 +538,60 @@ finally: os.getcwdb = real_getcwdb + def test_commonpath(self): + def check(paths, expected): + self.assertEqual(posixpath.commonpath(paths), expected) + self.assertEqual(posixpath.commonpath([os.fsencode(p) for p in paths]), + os.fsencode(expected)) + def check_error(exc, paths): + self.assertRaises(exc, posixpath.commonpath, paths) + self.assertRaises(exc, posixpath.commonpath, + [os.fsencode(p) for p in paths]) + + self.assertRaises(ValueError, posixpath.commonpath, []) + check_error(ValueError, ['/usr', 'usr']) + check_error(ValueError, ['usr', '/usr']) + + check(['/usr/local'], '/usr/local') + check(['/usr/local', '/usr/local'], '/usr/local') + check(['/usr/local/', '/usr/local'], '/usr/local') + check(['/usr/local/', '/usr/local/'], '/usr/local') + check(['/usr//local', '//usr/local'], '/usr/local') + check(['/usr/./local', '/./usr/local'], '/usr/local') + check(['/', '/dev'], '/') + check(['/usr', '/dev'], '/') + check(['/usr/lib/', '/usr/lib/python3'], '/usr/lib') + check(['/usr/lib/', '/usr/lib64/'], '/usr') + + check(['/usr/lib', '/usr/lib64'], '/usr') + check(['/usr/lib/', '/usr/lib64'], '/usr') + + check(['spam'], 'spam') + check(['spam', 'spam'], 'spam') + check(['spam', 'alot'], '') + check(['and/jam', 'and/spam'], 'and') + check(['and//jam', 'and/spam//'], 'and') + check(['and/./jam', './and/spam'], 'and') + check(['and/jam', 'and/spam', 'alot'], '') + check(['and/jam', 'and/spam', 'and'], 'and') + + check([''], '') + check(['', 'spam/alot'], '') + check_error(ValueError, ['', '/spam/alot']) + + self.assertRaises(TypeError, posixpath.commonpath, + [b'/usr/lib/', '/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + [b'/usr/lib/', 'usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + [b'usr/lib/', '/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['/usr/lib/', b'/usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['/usr/lib/', b'usr/lib/python3']) + self.assertRaises(TypeError, posixpath.commonpath, + ['usr/lib/', b'/usr/lib/python3']) + class PosixCommonTest(test_genericpath.CommonTest, unittest.TestCase): pathmodule = posixpath