diff -r 5d6658b0eca8 Doc/library/os.path.rst --- a/Doc/library/os.path.rst Mon Apr 20 03:34:29 2009 -0700 +++ b/Doc/library/os.path.rst Mon Apr 20 03:35:39 2009 -0700 @@ -23,11 +23,6 @@ their parameters. The result is an object of the same type, if a path or file name is returned. -.. warning:: - - On Windows, many of these functions do not properly support UNC pathnames. - :func:`splitunc` and :func:`ismount` do handle them correctly. - .. note:: @@ -267,10 +262,20 @@ .. function:: splitdrive(path) Split the pathname *path* into a pair ``(drive, tail)`` where *drive* is either - a drive specification or the empty string. On systems which do not use drive + a mount point or the empty string. On systems which do not use drive specifications, *drive* will always be the empty string. In all cases, ``drive + tail`` will be the same as *path*. + On Windows, splits a pathname into drive/UNC sharepoint and relative path. + + If the path contains a drive letter, drive will contain everything + up to and including the colon. + e.g. ``splitdrive("c:/dir")`` returns ``("c:", "/dir")`` + + If the path contains a UNC path, drive will contain the host name + and share, up to but not including the fourth separator. + e.g. ``splitdrive("//host/computer/dir")`` returns ``("//host/computer", "/dir")`` + .. function:: splitext(path) @@ -282,12 +287,16 @@ .. function:: splitunc(path) + .. deprecated:: 3.1 + Use *splitdrive* instead. + Split the pathname *path* into a pair ``(unc, rest)`` so that *unc* is the UNC mount point (such as ``r'\\host\mount'``), if present, and *rest* the rest of the path (such as ``r'\path\file.ext'``). For paths containing drive letters, *unc* will always be the empty string. Availability: Windows. + .. data:: supports_unicode_filenames True if arbitrary Unicode strings can be used as file names (within limitations diff -r 5d6658b0eca8 Lib/ntpath.py --- a/Lib/ntpath.py Mon Apr 20 03:34:29 2009 -0700 +++ b/Lib/ntpath.py Mon Apr 20 03:35:39 2009 -0700 @@ -10,6 +10,7 @@ import stat import genericpath from genericpath import * +import warnings __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", @@ -34,6 +35,12 @@ altsep = '/' devnull = 'nul' +def _get_empty(path): + if isinstance(path, bytes): + return b'' + else: + return '' + def _get_sep(path): if isinstance(path, bytes): return b'\\' @@ -76,50 +83,67 @@ # Return whether a path is absolute. -# Trivial in Posix, harder on the Mac or MS-DOS. -# For DOS it is absolute if it starts with a slash or backslash (current +# Trivial in Posix, harder on Windows. +# For Windows it is absolute if it starts with a slash or backslash (current # volume), or if a pathname after the volume letter and colon / UNC resource # starts with a slash or backslash. def isabs(s): """Test whether a path is absolute""" s = splitdrive(s)[1] - return len(s) > 0 and s[:1] in _get_bothseps(s) + return s and s[0] in _get_bothseps(s) # Join two (or more) paths. -def join(a, *p): +def join(path, *p): """Join two or more pathname components, inserting "\\" as needed. If any component is an absolute path, all previous path components will be discarded.""" - sep = _get_sep(a) - seps = _get_bothseps(a) - colon = _get_colon(a) - path = a + sep = _get_sep(path) + seps = _get_bothseps(path) + colon = _get_colon(path) for b in p: - b_wins = 0 # set to 1 iff b makes path irrelevant + b_wins = False # set to True iff b makes path irrelevant if not path: - b_wins = 1 + b_wins = True elif isabs(b): # This probably wipes out path so far. However, it's more - # complicated if path begins with a drive letter: + # complicated if path begins with a drive letter. You get a+b + # (minus redundant slashes) in these four cases: # 1. join('c:', '/a') == 'c:/a' - # 2. join('c:/', '/a') == 'c:/a' - # But - # 3. join('c:/a', '/b') == '/b' - # 4. join('c:', 'd:/') = 'd:/' - # 5. join('c:/', 'd:/') = 'd:/' - if path[1:2] != colon or b[1:2] == colon: - # Path doesn't start with a drive letter, or cases 4 and 5. - b_wins = 1 + # 2. join('//computer/share', '/a') == '//computer/share/a' + # 3. join('c:/', '/a') == 'c:/a' + # 4. join('//computer/share/', '/a') == '//computer/share/a' + # But b wins in all of these cases: + # 5. join('c:/a', '/b') == '/b' + # 6. join('//computer/share/a', '/b') == '/b' + # 7. join('c:', 'd:/') == 'd:/' + # 8. join('c:', '//computer/share/') == '//computer/share/' + # 9. join('//computer/share', 'd:/') == 'd:/' + # 10. join('//computer/share', '//computer/share/') == '//computer/share/' + # 11. join('c:/', 'd:/') == 'd:/' + # 12. join('c:/', '//computer/share/') == '//computer/share/' + # 13. join('//computer/share/', 'd:/') == 'd:/' + # 14. join('//computer/share/', '//computer/share/') == '//computer/share/' + b_prefix, b_rest = splitdrive(b) - # Else path has a drive letter, and b doesn't but is absolute. - elif len(path) > 3 or (len(path) == 3 and - path[-1:] not in seps): - # case 3 - b_wins = 1 + # if b has a prefix, it always wins. + if b_prefix: + b_wins = True + else: + # b doesn't have a prefix. + # but isabs(b) returned true. + # and therefore b_rest[0] must be a slash. + # (but let's check that.) + assert(b_rest and b_rest[0] in seps) + + # so, b still wins if path has a rest that's more than a sep. + # you get a+b if path_rest is empty or only has a sep. + # (see cases 1-4 for times when b loses.) + path_rest = splitdrive(path)[1] + b_wins = path_rest and path_rest not in seps if b_wins: path = b @@ -152,22 +176,58 @@ # colon) and the path specification. # It is always true that drivespec + pathspec == p def splitdrive(p): - """Split a pathname into drive and path specifiers. Returns a 2-tuple -"(drive,path)"; either part may be empty""" - if p[1:2] == _get_colon(p): - return p[0:2], p[2:] - return p[:0], p + """Split a pathname into drive/UNC sharepoint and relative path specifiers. +Returns a 2-tuple (drive_or_unc, path); either part may be empty. + +If you assign + result = splitdrive(p) +It is always true that: + split[0] + split[1] == p + +If the path contained a drive letter, drive_or_unc will contain everything +up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir") + +If the path contained a UNC path, the drive_or_unc will contain the host name +and share up to but not including the fourth separator. +e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir") + +Paths cannot contain both a drive letter or a UNC path. +""" + empty = _get_empty(p) + if len(p) > 1: + sep = _get_sep(p) + normp = normcase(p) + if normp[0:2] == sep*2: + # is a UNC path: + # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter + # \\machine\mountpoint\directories... + # directory ^^^^^^^^^^^^^^^ + index = normp.find(sep, 2) + if index == -1: + return empty, p + index = normp.find(sep, index + 1) + if index == -1: + index = len(p) + return p[:index], p[index:] + if normp[1:2] == _get_colon(p): + return p[:2], p[2:] + return empty, p # Parse UNC paths def splitunc(p): - """Split a pathname into UNC mount point and relative path specifiers. + """Deprecated since Python 3.1. Please use splitdrive() instead; + it now handles UNC paths. + + Split a pathname into UNC mount point and relative path specifiers. Return a 2-tuple (unc, rest); either part may be empty. If unc is not empty, it has the form '//host/mount' (or similar using backslashes). unc+rest is always the input path. Paths containing drive letters never have an UNC part. """ + warnings.warn("ntpath.splitunc is deprecated, use ntpath.splitdrive instead", + DeprecationWarning) sep = _get_sep(p) if not p[1:2]: return p[:0], p # Drive letter present @@ -256,12 +316,11 @@ def ismount(path): """Test whether a path is a mount point (defined as root of drive)""" - unc, rest = splitunc(path) seps = _get_bothseps(path) - if unc: - return rest in p[:0] + seps - p = splitdrive(path)[1] - return len(p) == 1 and p[0] in seps + root, rest = splitdrive(path) + if root and root[0] in seps: + return (not rest) or (rest in seps) + return rest in seps # Expand paths beginning with '~' or '~user'. @@ -442,28 +501,16 @@ def normpath(path): """Normalize path, eliminating double slashes, etc.""" sep = _get_sep(path) + altsep = _get_altsep(path) dotdot = _get_dot(path) * 2 path = path.replace(_get_altsep(path), sep) prefix, path = splitdrive(path) - # We need to be careful here. If the prefix is empty, and the path starts - # with a backslash, it could either be an absolute path on the current - # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It - # is therefore imperative NOT to collapse multiple backslashes blindly in - # that case. - # The code below preserves multiple backslashes when there is no drive - # letter. This means that the invalid filename \\\a\b is preserved - # unchanged, where a\\\b is normalised to a\b. It's not clear that there - # is any better behaviour for such edge cases. - if not prefix: - # No drive letter - preserve initial backslashes - while path[:1] == sep: - prefix = prefix + sep - path = path[1:] - else: - # We have a drive letter - collapse initial backslashes - if path.startswith(sep): - prefix = prefix + sep - path = path.lstrip(sep) + + # collapse initial backslashes + if path.startswith(sep): + prefix = prefix + sep + path = path.lstrip(sep) + comps = path.split(sep) i = 0 while i < len(comps): @@ -528,22 +575,22 @@ if not path: raise ValueError("no path specified") - start_list = abspath(start).split(sep) - path_list = abspath(path).split(sep) - if start_list[0].lower() != path_list[0].lower(): - unc_path, rest = splitunc(path) - unc_start, rest = splitunc(start) - if bool(unc_path) ^ bool(unc_start): - raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)" - % (path, start)) - else: - raise ValueError("path is on drive %s, start on drive %s" - % (path_list[0], start_list[0])) + + start_abs = abspath(start) + path_abs = abspath(path) + start_drive, start_rest = splitdrive(start_abs) + path_drive, path_rest = splitdrive(path_abs) + if normcase(start_drive) != normcase(path_drive): + error = "path is on mount '{0}', start on mount '{1}'".format( + path_drive, start_drive) + raise ValueError(error) + + start_list = start_rest.lower().split(sep) + path_list = path_rest.lower().split(sep) # Work out how much of the filepath is shared by start and path. for i in range(min(len(start_list), len(path_list))): - if start_list[i].lower() != path_list[i].lower(): + if start_list[i] != path_list[i]: break - else: i += 1 if isinstance(path, bytes): diff -r 5d6658b0eca8 Lib/test/test_ntpath.py --- a/Lib/test/test_ntpath.py Mon Apr 20 03:34:29 2009 -0700 +++ b/Lib/test/test_ntpath.py Mon Apr 20 03:35:39 2009 -0700 @@ -30,6 +30,7 @@ raise TestFailed("%s should return: %s but returned: %s" \ %(str(fn), str(wantResult), repr(gotResult))) + class TestNtpath(unittest.TestCase): def test_splitext(self): tester('ntpath.splitext("foo.ext")', ('foo', '.ext')) @@ -48,11 +49,9 @@ ('c:', '\\foo\\bar')) tester('ntpath.splitdrive("c:/foo/bar")', ('c:', '/foo/bar')) - - def test_splitunc(self): - tester('ntpath.splitunc("\\\\conky\\mountpoint\\foo\\bar")', + tester('ntpath.splitdrive("\\\\conky\\mountpoint\\foo\\bar")', ('\\\\conky\\mountpoint', '\\foo\\bar')) - tester('ntpath.splitunc("//conky/mountpoint/foo/bar")', + tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")', ('//conky/mountpoint', '/foo/bar')) def test_split(self): @@ -62,10 +61,10 @@ tester('ntpath.split("c:\\")', ('c:\\', '')) tester('ntpath.split("\\\\conky\\mountpoint\\")', - ('\\\\conky\\mountpoint', '')) + ('\\\\conky\\mountpoint\\', '')) tester('ntpath.split("c:/")', ('c:/', '')) - tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint', '')) + tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint/', '')) def test_isabs(self): tester('ntpath.isabs("c:\\")', 1) @@ -116,6 +115,33 @@ tester("ntpath.join('a\\', '')", 'a\\') tester("ntpath.join('a\\', '', '', '', '')", 'a\\') + # from comment in ntpath.join + tester("ntpath.join('c:', '/a')", 'c:/a') + tester("ntpath.join('//computer/share', '/a')", '//computer/share/a') + tester("ntpath.join('c:/', '/a')", 'c:/a') + tester("ntpath.join('//computer/share/', '/a')", '//computer/share/a') + tester("ntpath.join('c:/a', '/b')", '/b') + tester("ntpath.join('//computer/share/a', '/b')", '/b') + tester("ntpath.join('c:', 'd:/')", 'd:/') + tester("ntpath.join('c:', '//computer/share/')", '//computer/share/') + tester("ntpath.join('//computer/share', 'd:/')", 'd:/') + tester("ntpath.join('//computer/share', '//computer/share/')", '//computer/share/') + tester("ntpath.join('c:/', 'd:/')", 'd:/') + tester("ntpath.join('c:/', '//computer/share/')", '//computer/share/') + tester("ntpath.join('//computer/share/', 'd:/')", 'd:/') + tester("ntpath.join('//computer/share/', '//computer/share/')", '//computer/share/') + + tester("ntpath.join('c:', '//computer/share/')", '//computer/share/') + tester("ntpath.join('c:/', '//computer/share/')", '//computer/share/') + tester("ntpath.join('c:/', '//computer/share/a/b')", '//computer/share/a/b') + + tester("ntpath.join('\\\\computer\\share\\', 'a', 'b')", '\\\\computer\\share\\a\\b') + tester("ntpath.join('\\\\computer\\share', 'a', 'b')", '\\\\computer\\share\\a\\b') + tester("ntpath.join('\\\\computer\\share', 'a\\b')", '\\\\computer\\share\\a\\b') + tester("ntpath.join('//computer/share/', 'a', 'b')", '//computer/share/a\\b') + tester("ntpath.join('//computer/share', 'a', 'b')", '//computer/share\\a\\b') + tester("ntpath.join('//computer/share', 'a/b')", '//computer/share\\a/b') + def test_normpath(self): tester("ntpath.normpath('A//////././//.//B')", r'A\B') tester("ntpath.normpath('A/./B')", r'A\B') @@ -178,10 +204,9 @@ # from any platform. try: import nt + tester('ntpath.abspath("C:\\")', "C:\\") except ImportError: pass - else: - tester('ntpath.abspath("C:\\")', "C:\\") def test_relpath(self): currentdir = os.path.split(os.getcwd())[-1]