diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 92631b2..323d0bf 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -244,8 +244,11 @@ the :mod:`glob` module.) .. function:: realpath(path) Return the canonical path of the specified filename, eliminating any symbolic - links encountered in the path (if they are supported by the operating system). + links encountered in the path (if they are supported by the operating + system). + .. versionchanged:: 3.5 + Symbolic links are now resolved on Windows. .. function:: relpath(path, start=os.curdir) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index ee1a215..9cf1cd0 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -539,8 +539,121 @@ else: # use native Windows method on Windows path = os.getcwd() return normpath(path) -# realpath is a no-op on systems without islink support -realpath = abspath +try: + # GetFinalPathNameByHandle is available starting with Windows 6.0. + # Windows XP and non-Windows OS'es will mock _getfinalpathname. + if sys.getwindowsversion()[:2] >= (6, 0): + from nt import _getfinalpathname + else: + raise ImportError +except (AttributeError, ImportError): + # realpath is a no-op on systems without _getfinalpathname support. + realpath = abspath +else: + def realpath(path): + if isinstance(path, str): + prefix = '\\\\?\\' + unc_prefix = prefix + 'UNC' + new_unc_prefix = '\\' + cwd = os.getcwd() + else: + prefix = b'\\\\?\\' + unc_prefix = prefix + b'UNC' + new_unc_prefix = b'\\' + cwd = os.getcwdb() + had_prefix = path.startswith(prefix) + path, ok = _resolve_path(cwd, path, {}) + # The path returned by _getfinalpathname will always start with \\?\ - + # strip off that prefix unless it was already provided on the original + # path. + if not had_prefix: + # For UNC paths, the prefix will actually be \\?\UNC - handle that + # case as well. + if path.startswith(unc_prefix): + path = new_unc_prefix + path[len(unc_prefix):] + elif path.startswith(prefix): + path = path[len(prefix):] + return path + + def _resolve_path(path, rest, seen): + # Windows normalizes the path before resolving symlinks; be sure to + # follow the same behavior. + rest = normpath(rest) + + if isinstance(rest, str): + sep = '\\' + else: + sep = b'\\' + + if isabs(rest): + drive, rest = splitdrive(rest) + path = drive + sep + rest = rest[1:] + + while rest: + name, _, rest = rest.partition(sep) + new_path = join(path, name) if path else name + if exists(new_path): + if not rest: + # The whole path exists. Resolve it using the OS. + path = _getfinalpathname(new_path) + else: + # The OS can resolve `new_path`; keep traversing the path. + path = new_path + elif not lexists(new_path): + # `new_path` does not exist on the filesystem at all. Use the + # OS to resolve `path`, if it exists, and then append the + # remainder. + if exists(path): + path = _getfinalpathname(path) + rest = join(name, rest) if rest else name + return join(path, rest), True + else: + # We have a symbolic link that the OS cannot resolve. Try to + # resolve it ourselves. + + # On Windows, symbolic link resolution can be partially or + # fully disabled [1]. The end result of a disabled symlink + # appears the same as a broken symlink (lexists() returns True + # but exists() returns False). And in both cases, the link can + # still be read using readlink(). Call stat() and check the + # resulting error code to ensure we don't circumvent the + # Windows symbolic link restrictions. + # [1] https://technet.microsoft.com/en-us/library/cc754077.aspx + try: + os.stat(new_path) + except OSError as e: + # WinError 1463: The symbolic link cannot be followed + # because its type is disabled. + if e.winerror == 1463: + raise + + key = normcase(new_path) + if key in seen: + # This link has already been seen; try to use the + # previously resolved value. + path = seen[key] + if path is None: + # It has not yet been resolved, which means we must + # have a symbolic link loop. Return what we have + # resolved so far plus the remainder of the path (who + # cares about the Zen of Python?). + return ((join(new_path, rest) if rest else new_path), + False) + else: + # Mark this link as in the process of being resolved. + seen[key] = None + # Try to resolve it. + path, ok = _resolve_path(path, os.readlink(new_path), seen) + if ok: + # Resolution succeded; store the resolved value. + seen[key] = path + else: + # Resolution failed; punt. + return (join(path, rest) if rest else path), False + return path, True + + # Win9x family and earlier have no Unicode filename support. supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and sys.getwindowsversion()[3] >= 2) @@ -589,23 +702,6 @@ def relpath(path, start=None): raise -# determine if two files are in fact the same file -try: - # GetFinalPathNameByHandle is available starting with Windows 6.0. - # Windows XP and non-Windows OS'es will mock _getfinalpathname. - if sys.getwindowsversion()[:2] >= (6, 0): - from nt import _getfinalpathname - else: - raise ImportError -except (AttributeError, ImportError): - # On Windows XP and earlier, two files are the same if their absolute - # pathnames are the same. - # Non-Windows operating systems fake this method with an XP - # approximation. - def _getfinalpathname(f): - return normcase(abspath(f)) - - try: # The genericpath.isdir implementation uses os.stat and checks the mode # attribute to tell whether or not the path is a directory. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index c8d84a7..2192541 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -3,11 +3,23 @@ import os import sys import unittest import warnings -from test.support import TestFailed +from test.support import TestFailed, TESTFN from test import support, test_genericpath from tempfile import TemporaryFile +ABSTFN = os.path.abspath(support.TESTFN) +ABSTFNB = os.fsencode(ABSTFN) + + +try: + ntpath._getfinalpathname +except AttributeError: + HAVE_GETFINALPATHNAME = False +else: + HAVE_GETFINALPATHNAME = True + + def tester(fn, wantResult): fn = fn.replace("\\", "\\\\") gotResult = eval(fn) @@ -39,6 +51,15 @@ def tester(fn, wantResult): class TestNtpath(unittest.TestCase): + + def setUp(self): + self.tearDown() + + def tearDown(self): + for suffix in ["", "1", "2", "a", "c", "y"]: + support.rmtree(support.TESTFN + suffix) + support.unlink(support.TESTFN + suffix) + def test_splitext(self): tester('ntpath.splitext("foo.ext")', ('foo', '.ext')) tester('ntpath.splitext("/foo/foo.ext")', ('/foo/foo', '.ext')) @@ -213,6 +234,149 @@ class TestNtpath(unittest.TestCase): tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL') tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z') + def test_realpath_curdir(self): + tester("ntpath.realpath('.')", os.getcwd()) + tester("ntpath.realpath('./.')", os.getcwd()) + tester("ntpath.realpath('/'.join(['.'] * 100))", os.getcwd()) + tester("ntpath.realpath('.\\.')", os.getcwd()) + tester("ntpath.realpath('\\'.join(['.'] * 100))", os.getcwd()) + + def test_realpath_pardir(self): + tester("ntpath.realpath('..')", os.path.dirname(os.getcwd())) + tester("ntpath.realpath('../..')", + os.path.dirname(os.path.dirname(os.getcwd()))) + tester("ntpath.realpath('/'.join(['..'] * 50))", + os.path.splitdrive(os.getcwd())[0] + '\\') + tester("ntpath.realpath('..\\..')", + os.path.dirname(os.path.dirname(os.getcwd()))) + tester("ntpath.realpath('\\'.join(['..'] * 50))", + os.path.splitdrive(os.getcwd())[0] + '\\') + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_basic(self): + try: + os.symlink(ABSTFN + "1", ABSTFN) + self.assertEqual(ntpath.realpath(ABSTFN), ABSTFN + "1") + self.assertEqual(ntpath.realpath(ABSTFNB), + os.fsencode(ABSTFN + "1")) + finally: + support.unlink(ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_relative(self): + try: + os.symlink(os.path.relpath(ABSTFN + "1"), ABSTFN) + self.assertEqual(ntpath.realpath(ABSTFN), ABSTFN + "1") + finally: + support.unlink(ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_broken_symlinks(self): + try: + old_path = os.path.abspath('.') + + os.mkdir(ABSTFN) + os.chdir(ABSTFN) + os.mkdir("subdir") + os.chdir("subdir") + os.symlink(".", "recursive") + os.symlink("..", "parent") + os.chdir("..") + os.symlink(".", "self") + os.symlink("missing", "broken") + os.symlink(r"broken\bar", "broken1") + os.symlink(r"self\self\broken", "broken2") + os.symlink(r"subdir\parent\subdir\parent\broken", "broken3") + os.symlink(ABSTFN + r"\broken", "broken4") + os.symlink(r"recursive\..\broken", "broken5") + + self.assertEqual(ntpath.realpath("broken"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath(r"broken\foo"), + ABSTFN + r"\missing\foo") + self.assertEqual(ntpath.realpath(r"broken1"), + ABSTFN + r"\missing\bar") + self.assertEqual(ntpath.realpath(r"broken1\baz"), + ABSTFN + r"\missing\bar\baz") + self.assertEqual(ntpath.realpath("broken2"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken3"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken4"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken5"), + ABSTFN + r"\missing") + + self.assertEqual(ntpath.realpath(b"broken"), + ABSTFNB + rb"\missing") + self.assertEqual(ntpath.realpath(rb"broken\foo"), + ABSTFNB + rb"\missing\foo") + self.assertEqual(ntpath.realpath(rb"broken1"), + ABSTFNB + rb"\missing\bar") + self.assertEqual(ntpath.realpath(rb"broken1\baz"), + ABSTFNB + rb"\missing\bar\baz") + self.assertEqual(ntpath.realpath(b"broken2"), + ABSTFNB + rb"\missing") + self.assertEqual(ntpath.realpath(rb"broken3"), + ABSTFNB + rb"\missing") + self.assertEqual(ntpath.realpath(b"broken4"), + ABSTFNB + rb"\missing") + self.assertEqual(ntpath.realpath(b"broken5"), + ABSTFNB + rb"\missing") + finally: + os.chdir(old_path) + support.rmtree(ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_loops(self): + # Bug #930024, return the path unchanged if we get into an infinite + # symlink loop. + try: + old_path = os.path.abspath('.') + os.symlink(ABSTFN, ABSTFN) + self.assertEqual(ntpath.realpath(ABSTFN), ABSTFN) + + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN + "1") + self.assertEqual(ntpath.realpath(ABSTFN + "2"), ABSTFN + "2") + + self.assertEqual(ntpath.realpath(ABSTFN + "1\\x"), ABSTFN + "1\\x") + self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."), + os.path.dirname(ABSTFN)) + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), + os.path.dirname(ABSTFN) + "\\x") + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + os.path.basename(ABSTFN) + "y"), + ABSTFN + "x") + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + os.path.basename(ABSTFN) + "1"), + ABSTFN + "1") + + os.symlink(os.path.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertEqual(ntpath.realpath(ABSTFN + "a"), ABSTFN + "a\\b") + + os.symlink("..\\" + os.path.basename(os.path.dirname(ABSTFN)) + + "\\" + os.path.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertEqual(ntpath.realpath(ABSTFN + "c"), ABSTFN + "c") + + # Test using relative path as well. + os.chdir(os.path.dirname(ABSTFN)) + self.assertEqual(ntpath.realpath(os.path.basename(ABSTFN)), ABSTFN) + finally: + os.chdir(old_path) + support.unlink(ABSTFN) + support.unlink(ABSTFN + "1") + support.unlink(ABSTFN + "2") + support.unlink(ABSTFN + "y") + support.unlink(ABSTFN + "c") + support.unlink(ABSTFN + "a") + def test_expandvars(self): with support.EnvironmentVarGuard() as env: env.clear() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index e28a3db..e28a50a 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -2568,10 +2568,7 @@ class OSErrorTests(unittest.TestCase): if hasattr(os, "lchmod"): funcs.append((self.filenames, os.lchmod, 0o777)) if hasattr(os, "readlink"): - if sys.platform == "win32": - funcs.append((self.unicode_filenames, os.readlink,)) - else: - funcs.append((self.filenames, os.readlink,)) + funcs.append((self.filenames, os.readlink,)) for filenames, func, *func_args in funcs: for name in filenames: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 118a380..347bc9c 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1586,12 +1586,12 @@ attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG * static int has_GetFinalPathNameByHandle = -1; static DWORD (CALLBACK *Py_GetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD); +static DWORD (CALLBACK *Py_GetFinalPathNameByHandleA)(HANDLE, LPSTR, DWORD, + DWORD); static int check_GetFinalPathNameByHandle() { HINSTANCE hKernel32; - DWORD (CALLBACK *Py_GetFinalPathNameByHandleA)(HANDLE, LPSTR, DWORD, - DWORD); /* only recheck */ if (-1 == has_GetFinalPathNameByHandle) @@ -4847,7 +4847,7 @@ posix__getfullpathname(PyObject *self, PyObject *args) /*[clinic input] os._getfinalpathname - path: unicode + path : path_t / A helper function for samepath on windows. @@ -4863,81 +4863,113 @@ PyDoc_STRVAR(os__getfinalpathname__doc__, {"_getfinalpathname", (PyCFunction)os__getfinalpathname, METH_VARARGS, os__getfinalpathname__doc__}, static PyObject * -os__getfinalpathname_impl(PyModuleDef *module, PyObject *path); +os__getfinalpathname_impl(PyModuleDef *module, path_t *path); static PyObject * os__getfinalpathname(PyModuleDef *module, PyObject *args) { PyObject *return_value = NULL; - PyObject *path; + path_t path = PATH_T_INITIALIZE("_getfinalpathname", "path", 0, 0); if (!PyArg_ParseTuple(args, - "U:_getfinalpathname", - &path)) + "O&:_getfinalpathname", + path_converter, &path)) goto exit; - return_value = os__getfinalpathname_impl(module, path); + return_value = os__getfinalpathname_impl(module, &path); exit: + /* Cleanup for path */ + path_cleanup(&path); + return return_value; } static PyObject * -os__getfinalpathname_impl(PyModuleDef *module, PyObject *path) -/*[clinic end generated code: output=4563c6eacf1b0881 input=71d5e89334891bf4]*/ +os__getfinalpathname_impl(PyModuleDef *module, path_t *path) +/*[clinic end generated code: output=73d616048bd8be5d input=bb30e839f01d1d5f]*/ { HANDLE hFile; int buf_size; - wchar_t *target_path; + char *narrow_buffer; + wchar_t *wide_buffer; int result_length; PyObject *result; - wchar_t *path_wchar; - - path_wchar = PyUnicode_AsUnicode(path); - if (path_wchar == NULL) - return NULL; - if(!check_GetFinalPathNameByHandle()) { - /* If the OS doesn't have GetFinalPathNameByHandle, return a - NotImplementedError. */ + if (!check_GetFinalPathNameByHandle()) { return PyErr_Format(PyExc_NotImplementedError, "GetFinalPathNameByHandle not available on this platform"); } - hFile = CreateFileW( - path_wchar, - 0, /* desired access */ - 0, /* share mode */ - NULL, /* security attributes */ - OPEN_EXISTING, - /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ - FILE_FLAG_BACKUP_SEMANTICS, - NULL); - - if(hFile == INVALID_HANDLE_VALUE) - return win32_error_object("CreateFileW", path); + if (path->wide) + hFile = CreateFileW( + path->wide, + 0, /* desired access */ + 0, /* share mode */ + NULL, /* security attributes */ + OPEN_EXISTING, + /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ + FILE_FLAG_BACKUP_SEMANTICS, + NULL); + else + hFile = CreateFileA( + path->narrow, + 0, /* desired access */ + 0, /* share mode */ + NULL, /* security attributes */ + OPEN_EXISTING, + /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ + FILE_FLAG_BACKUP_SEMANTICS, + NULL); + + if (hFile == INVALID_HANDLE_VALUE) + return path_error(path); /* We have a good handle to the target, use it to determine the target path name. */ - buf_size = Py_GetFinalPathNameByHandleW(hFile, 0, 0, VOLUME_NAME_NT); + if (path->wide) + buf_size = Py_GetFinalPathNameByHandleW(hFile, 0, 0, VOLUME_NAME_NT); + else + buf_size = Py_GetFinalPathNameByHandleA(hFile, 0, 0, VOLUME_NAME_NT); - if(!buf_size) - return win32_error_object("GetFinalPathNameByHandle", path); + if (!buf_size) + return path_error(path); - target_path = (wchar_t *)PyMem_Malloc((buf_size+1)*sizeof(wchar_t)); - if(!target_path) - return PyErr_NoMemory(); + if (path->wide) { + wide_buffer = (wchar_t *)PyMem_Malloc((buf_size + 1) + * sizeof(wchar_t)); + if (!wide_buffer) + return PyErr_NoMemory(); - result_length = Py_GetFinalPathNameByHandleW(hFile, target_path, - buf_size, VOLUME_NAME_DOS); - if(!result_length) - return win32_error_object("GetFinalPathNamyByHandle", path); + result_length = Py_GetFinalPathNameByHandleW(hFile, wide_buffer, + buf_size, + VOLUME_NAME_DOS); + } + else { + narrow_buffer = (char *)PyMem_Malloc((buf_size + 1) * sizeof(char)); + if (!narrow_buffer) + return PyErr_NoMemory(); + + result_length = Py_GetFinalPathNameByHandleA(hFile, narrow_buffer, + buf_size, + VOLUME_NAME_DOS); + } + + if (!result_length) + return path_error(path); - if(!CloseHandle(hFile)) - return win32_error_object("CloseHandle", path); + if (!CloseHandle(hFile)) + return path_error(path); - target_path[result_length] = 0; - result = PyUnicode_FromWideChar(target_path, result_length); - PyMem_Free(target_path); + if (path->wide) { + wide_buffer[result_length] = 0; + result = PyUnicode_FromWideChar(wide_buffer, result_length); + PyMem_Free(wide_buffer); + } + else { + narrow_buffer[result_length] = 0; + result = PyBytes_FromStringAndSize(narrow_buffer, result_length); + PyMem_Free(narrow_buffer); + } return result; } @@ -10056,11 +10088,11 @@ exit: static PyObject * win_readlink(PyObject *self, PyObject *args, PyObject *kwargs) { - wchar_t *path; + path_t path = PATH_T_INITIALIZE("readlink", "path", 0, 0); DWORD n_bytes_returned; DWORD io_result; - PyObject *po, *result; - int dir_fd; + PyObject *unicode_result, *result = NULL; + int dir_fd; HANDLE reparse_point_handle; char target_buffer[MAXIMUM_REPARSE_DATA_BUFFER_SIZE]; @@ -10069,33 +10101,40 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs) static char *keywords[] = {"path", "dir_fd", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "U|$O&:readlink", keywords, - &po, - dir_fd_unavailable, &dir_fd - )) - return NULL; - - path = PyUnicode_AsUnicode(po); - if (path == NULL) - return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|$O&:readlink", keywords, + path_converter, &path, + dir_fd_unavailable, &dir_fd)) + goto exit; /* First get a handle to the reparse point */ Py_BEGIN_ALLOW_THREADS - reparse_point_handle = CreateFileW( - path, - 0, - 0, - 0, - OPEN_EXISTING, - FILE_FLAG_OPEN_REPARSE_POINT|FILE_FLAG_BACKUP_SEMANTICS, - 0); + if (path.wide) + reparse_point_handle = CreateFileW( + path.wide, + 0, + 0, + 0, + OPEN_EXISTING, + FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, + 0); + else + reparse_point_handle = CreateFileA( + path.narrow, + 0, + 0, + 0, + OPEN_EXISTING, + FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS, + 0); Py_END_ALLOW_THREADS - if (reparse_point_handle==INVALID_HANDLE_VALUE) - return win32_error_object("readlink", po); + if (reparse_point_handle == INVALID_HANDLE_VALUE) { + result = path_error(&path); + goto exit; + } Py_BEGIN_ALLOW_THREADS - /* New call DeviceIoControl to read the reparse point */ + /* Now call DeviceIoControl to read the reparse point */ io_result = DeviceIoControl( reparse_point_handle, FSCTL_GET_REPARSE_POINT, @@ -10107,20 +10146,31 @@ win_readlink(PyObject *self, PyObject *args, PyObject *kwargs) CloseHandle(reparse_point_handle); Py_END_ALLOW_THREADS - if (io_result==0) - return win32_error_object("readlink", po); + if (io_result == 0) { + result = path_error(&path); + goto exit; + } if (rdb->ReparseTag != IO_REPARSE_TAG_SYMLINK) { PyErr_SetString(PyExc_ValueError, "not a symbolic link"); - return NULL; + goto exit; } print_name = rdb->SymbolicLinkReparseBuffer.PathBuffer + rdb->SymbolicLinkReparseBuffer.PrintNameOffset; - result = PyUnicode_FromWideChar(print_name, - rdb->SymbolicLinkReparseBuffer.PrintNameLength/2); + unicode_result = PyUnicode_FromWideChar(print_name, + rdb->SymbolicLinkReparseBuffer.PrintNameLength / 2); + if (unicode_result) + if (path.wide) + result = unicode_result; + else { + result = PyUnicode_EncodeFSDefault(unicode_result); + Py_DECREF(unicode_result); + } +exit: + path_cleanup(&path); return result; }