diff -r 09fa20da2872 Doc/library/os.path.rst --- a/Doc/library/os.path.rst Mon Apr 20 01:19:55 2015 -0700 +++ b/Doc/library/os.path.rst Mon Apr 20 12:02:35 2015 +0300 @@ -223,7 +223,7 @@ the :mod:`glob` module.) Join one or more path components intelligently. The return value is the concatenation of *path* and any members of *\*paths* with exactly one - directory separator (``os.sep``) following each non-empty part except the + directory separator (:attr:`os.sep`) following each non-empty part except the last, meaning that the result will only end in a separator if the last part is empty. If a component is an absolute path, all previous components are thrown away and joining continues from the absolute path @@ -319,8 +319,8 @@ the :mod:`glob` module.) *path* is empty, both *head* and *tail* are empty. Trailing slashes are stripped from *head* unless it is the root (one or more slashes only). In all cases, ``join(head, tail)`` returns a path to the same location as *path* - (but the strings may differ). Also see the functions :func:`dirname` and - :func:`basename`. + (but the strings may differ). Also see the functions :func:`dirname`, + :func:`basename` and :func:`splitpath`. .. function:: splitdrive(path) @@ -349,6 +349,49 @@ the :mod:`glob` module.) returns ``('.cshrc', '')``. +.. function:: splitpath(path) + + Split the pathname *path* into a list of components using the same + algorithm as :func:`split`. Equivalent to:: + + def splitpath(path): + head, tail = split(path) + if head == path: + return [head] + return splitpath(head) + [tail] + + * The first element is always the root component (for an absolute path), + or an empty string (for a relative path). + + * The last element is an empty string if the path name ended in a directory + separator, except when the path is a root directory. + + * ``join(*splitpath(path))`` returns a path to the same location as *path* + (but the strings may differ). + + Examples on Windows:: + + >>> splitpath('C:\\Program Files\\Python\\python.exe') + ['C:\\', 'Program Files', 'Python', 'python.exe'] + >>> splitpath('C:Program Files\\Python\\python.exe') + ['C:', 'Program Files', 'Python', 'python.exe'] + >>> splitpath('Program Files\\Python\\python.exe') + ['', 'Program Files', 'Python', 'python.exe'] + >>> splitpath('C:\\Program Files\\Python\\') + ['C:\\', 'Program Files', 'Python', ''] + + Examples on Unix:: + + >>> splitpath('/usr/bin/python') + ['/', 'usr', 'bin', 'python'] + >>> splitpath('usr/bin/python') + ['', 'usr', 'bin', 'python'] + >>> splitpath('/usr/bin/') + ['/', 'usr', 'bin', ''] + + .. versionadded:: 3.5 + + .. function:: splitunc(path) .. deprecated:: 3.1 diff -r 09fa20da2872 Lib/genericpath.py --- a/Lib/genericpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/genericpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -131,6 +131,19 @@ def _splitext(p, sep, altsep, extsep): return p, p[:0] +def _splitpath(p, split): + """Split the pathname into a list of components.""" + # Inefficient generic implementation + result = [] + while True: + head, tail = split(p) + if head == p: + result.append(head) + break + result.append(tail) + p = head + return result[::-1] + def _check_arg_types(funcname, *args): hasstr = hasbytes = False for s in args: diff -r 09fa20da2872 Lib/macpath.py --- a/Lib/macpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/macpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -10,7 +10,7 @@ from genericpath import * "getatime","getctime", "islink","exists","lexists","isdir","isfile", "expanduser","expandvars","normpath","abspath", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames"] + "devnull","realpath","supports_unicode_filenames", "splitpath"] # strings representing various path-related bits and pieces # These are primarily for export; internally, they are hardcoded. @@ -86,6 +86,11 @@ def split(s): return path, file +def splitpath(p): + return genericpath._splitpath(p, split) +splitpath.__doc__ = genericpath._splitpath.__doc__ + + def splitext(p): if isinstance(p, bytes): return genericpath._splitext(p, b':', altsep, b'.') diff -r 09fa20da2872 Lib/ntpath.py --- a/Lib/ntpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/ntpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -5,6 +5,7 @@ Instead of importing this module directl module as os.path. """ +import re import os import sys import stat @@ -17,7 +18,7 @@ from genericpath import * "ismount", "expanduser","expandvars","normpath","abspath", "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", - "samefile", "sameopenfile", "samestat", "commonpath"] + "samefile", "sameopenfile", "samestat", "commonpath", "splitpath"] # strings representing various path-related bits and pieces # These are primarily for export; internally, they are hardcoded. @@ -32,6 +33,9 @@ if 'ce' in sys.builtin_module_names: defpath = '\\Windows' devnull = 'nul' +seps_re = re.compile(r'[\\/]+') +bseps_re = re.compile(br'[\\/]+') + def _get_bothseps(path): if isinstance(path, bytes): return b'\\/' @@ -210,6 +214,22 @@ def split(p): return d + head, tail +def splitpath(p): + if isinstance(p, bytes): + seps = bseps_re + else: + seps = seps_re + anchor, path = splitdrive(p) + m = seps.match(path) + if m is not None: + anchor += m.group() + path = path[m.end():] + if not path: + return [anchor] + return [anchor] + seps.split(path) +splitpath.__doc__ = genericpath._splitpath.__doc__ + + # Split a path in root and extension. # The extension is everything starting at the last dot in the last # pathname component; the root is everything before that. diff -r 09fa20da2872 Lib/posixpath.py --- a/Lib/posixpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/posixpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -23,7 +23,7 @@ from genericpath import * "samefile","sameopenfile","samestat", "curdir","pardir","sep","pathsep","defpath","altsep","extsep", "devnull","realpath","supports_unicode_filenames","relpath", - "commonpath"] + "commonpath", "splitpath"] # Strings representing various path-related bits and pieces. # These are primarily for export; internally, they are hardcoded. @@ -105,6 +105,22 @@ def split(p): return head, tail +def splitpath(p): + sep = _get_sep(p) + sepc = sep[0] + for i, c in enumerate(p): + if c != sepc: + break + else: + return [p] + result = [p[:i]] + result.extend(filter(None, p[i:].split(sep))) + if p[-1] == sepc: + result.append(p[:0]) + return result +splitpath.__doc__ = genericpath._splitpath.__doc__ + + # Split a path in root and extension. # The extension is everything starting at the last dot in the last # pathname component; the root is everything before that. diff -r 09fa20da2872 Lib/test/test_macpath.py --- a/Lib/test/test_macpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/test/test_macpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -1,4 +1,6 @@ +import genericpath import macpath +import os from test import support, test_genericpath import unittest @@ -46,6 +48,29 @@ class MacPathTestCase(unittest.TestCase) self.assertEqual(split(b":conky:mountpoint:"), (b':conky:mountpoint', b'')) + def test_splitpath(self): + def check(path, expected): + self.assertEqual(genericpath._splitpath(path, macpath.split), + expected) + self.assertEqual(genericpath._splitpath(os.fsencode(path), + macpath.split), + [os.fsencode(p) for p in expected]) + self.assertEqual(macpath.splitpath(path), expected) + self.assertEqual(macpath.splitpath(os.fsencode(path)), + [os.fsencode(p) for p in expected]) + + check('foo', ['', 'foo']) + check(':foo', ['', 'foo']) + check('foo:', ['foo:']) + check('foo:bar', ['foo:', 'bar']) + check('foo::bar', ['foo:', 'bar']) + check('foo:::bar', ['foo:', '', 'bar']) + check('foo:bar:', ['foo:', 'bar', '']) + check('conky:mountpoint:foo:bar', + ['conky:', 'mountpoint', 'foo', 'bar']) + check(':', ['', '']) + check(':conky:mountpoint:', ['', 'conky', 'mountpoint', '']) + def test_join(self): join = macpath.join self.assertEqual(join('a', 'b'), ':a:b') diff -r 09fa20da2872 Lib/test/test_ntpath.py --- a/Lib/test/test_ntpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/test/test_ntpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -1,3 +1,4 @@ +import genericpath import ntpath import os import sys @@ -31,6 +32,8 @@ def tester(fn, wantResult): wantResult = os.fsencode(wantResult) elif isinstance(wantResult, tuple): wantResult = tuple(os.fsencode(r) for r in wantResult) + elif isinstance(wantResult, list): + wantResult = [os.fsencode(r) for r in wantResult] gotResult = eval(fn) if wantResult != gotResult: @@ -107,6 +110,54 @@ class TestNtpath(unittest.TestCase): tester('ntpath.split("c:/")', ('c:/', '')) tester('ntpath.split("//conky/mountpoint/")', ('//conky/mountpoint/', '')) + def test_splitpath(self): + def check(path, expected): + tester(('genericpath._splitpath(%r, ntpath.split)' % path) + .replace('\\\\', '\\'), expected) + tester(('ntpath.splitpath(%r)' % path).replace('\\\\', '\\'), + expected) + + check('foo\\bar', ['', 'foo', 'bar']) + check('foo/bar', ['', 'foo', 'bar']) + check('', ['']) + check('foo\\bar\\', ['', 'foo', 'bar', '']) + check('foo/bar/', ['', 'foo', 'bar', '']) + check('c:foo\\bar', ['c:', 'foo', 'bar']) + check('c:foo/bar', ['c:', 'foo', 'bar']) + check('c:', ['c:']) + + check('\\foo\\bar', ['\\', 'foo', 'bar']) + check('/foo/bar', ['/', 'foo', 'bar']) + check('\\', ['\\']) + check('/', ['/']) + check('c:\\foo\\bar', ['c:\\', 'foo', 'bar']) + check('c:/foo/bar', ['c:/', 'foo', 'bar']) + check('c:\\', ['c:\\']) + check('c:/', ['c:/']) + check('c:\\\\foo\\bar', ['c:\\\\', 'foo', 'bar']) + check('c://foo/bar', ['c://', 'foo', 'bar']) + + check('\\\\conky\\mountpoint\\foo\\bar', + ['\\\\conky\\mountpoint\\', 'foo', 'bar']) + check('//conky/mountpoint/foo/bar', + ['//conky/mountpoint/', 'foo', 'bar']) + check('\\\\conky\\mountpoint\\', ['\\\\conky\\mountpoint\\']) + check('//conky/mountpoint/', ['//conky/mountpoint/']) + check('\\\\\\conky\\mountpoint\\foo\\bar', + ['\\\\\\', 'conky', 'mountpoint', 'foo', 'bar']) + check('///conky/mountpoint/foo/bar', + ['///', 'conky', 'mountpoint', 'foo', 'bar']) + check('\\\\conky\\\\mountpoint\\foo\\bar', + ['\\\\', 'conky', 'mountpoint', 'foo', 'bar']) + check('//conky//mountpoint/foo/bar', + ['//', 'conky', 'mountpoint', 'foo', 'bar']) + + def test_splitpath_long_path(self): + self.assertEqual(ntpath.splitpath('c:' + '/spam' * 10**5), + ['c:/'] + ['spam'] * 10**5) + self.assertEqual(ntpath.splitpath(b'c:' + b'/spam' * 10**5), + [b'c:/'] + [b'spam'] * 10**5) + def test_isabs(self): tester('ntpath.isabs("c:\\")', 1) tester('ntpath.isabs("\\\\conky\\mountpoint\\")', 1) diff -r 09fa20da2872 Lib/test/test_posixpath.py --- a/Lib/test/test_posixpath.py Mon Apr 20 01:19:55 2015 -0700 +++ b/Lib/test/test_posixpath.py Mon Apr 20 12:02:35 2015 +0300 @@ -1,3 +1,4 @@ +import genericpath import itertools import os import posixpath @@ -70,6 +71,32 @@ class PosixPathTest(unittest.TestCase): self.assertEqual(posixpath.split(b"////foo"), (b"////", b"foo")) self.assertEqual(posixpath.split(b"//foo//bar"), (b"//foo", b"bar")) + def test_splitpath(self): + def check(path, expected): + self.assertEqual(genericpath._splitpath(path, posixpath.split), + expected) + self.assertEqual(genericpath._splitpath(os.fsencode(path), + posixpath.split), + [os.fsencode(p) for p in expected]) + self.assertEqual(posixpath.splitpath(path), expected) + self.assertEqual(posixpath.splitpath(os.fsencode(path)), + [os.fsencode(p) for p in expected]) + + check('foo/bar', ['', 'foo', 'bar']) + check('foo/bar/', ['', 'foo', 'bar', '']) + check('', ['']) + check('.', ['', '.']) + check('/foo/bar', ['/', 'foo', 'bar']) + check('/', ['/']) + check('////foo/bar', ['////', 'foo', 'bar']) + check('//foo//bar', ['//', 'foo', 'bar']) + + def test_splitpath_long_path(self): + self.assertEqual(list(posixpath.splitpath('/spam' * 10**5)), + ['/'] + ['spam'] * 10**5) + self.assertEqual(list(posixpath.splitpath(b'/spam' * 10**5)), + [b'/'] + [b'spam'] * 10**5) + def splitextTest(self, path, filename, ext): self.assertEqual(posixpath.splitext(path), (filename, ext)) self.assertEqual(posixpath.splitext("/" + path), ("/" + filename, ext))