diff --git a/Include/pyerrors.h b/Include/pyerrors.h index fb6281c..ce72f19 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -251,6 +251,8 @@ PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithUnicodeFilename( int, const Py_UNICODE *); #endif PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErr(int); +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject( + int, PyObject *); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObject( PyObject *,int, PyObject *); PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilename( diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 826be87..a86ca01 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -596,8 +596,6 @@ else: # use native Windows method on Windows path = os.getcwd() return normpath(path) -# realpath is a no-op on systems without islink support -realpath = abspath # Win9x family and earlier have no Unicode filename support. supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and sys.getwindowsversion()[3] >= 2) @@ -646,19 +644,73 @@ try: # Windows XP and non-Windows OS'es will mock _getfinalpathname. if sys.getwindowsversion()[:2] >= (6, 0): from nt import _getfinalpathname + def realpath(path): + path = abspath(path) + prefix = '\\\\?\\' + unc_prefix = prefix + 'UNC' + new_unc_prefix = '\\' + if not isinstance(path, str): + fs_encoding = sys.getfilesystemencoding() + prefix = prefix.encode(fs_encoding) + unc_prefix = unc_prefix.encode(fs_encoding) + new_unc_prefix = new_unc_prefix.encode(fs_encoding) + had_prefix = path.startswith(prefix) + tail_path = None + history = set() + while not exists(path): + # strip off and save any trailing part of the path that doesn't + # actually exist on the filesystem + while not lexists(path): + head, tail = split(path) + tail_path = join(tail, tail_path) if tail_path else tail + if head == path: + # the path doesn't exist at all - simply normalize it + # and return + return normpath(join(head, tail_path)) + path = head + if not exists(path): + # the symbolic link is broken, but we can use os.readlink + # to determine what it points to + key = normcase(path) + if key in history: + raise IOError(None, 'Cannot resolve recursive symlink', + path) + history.add(key) + path = join(dirname(path), os.readlink(path)) + path = _getfinalpathname(path) + # the path returned by _getfileanpathname will always start with + # \\?\ - strip off that prefix unless it was already provided on + # the original path + if not had_prefix: + # for UNC paths, the prefix will actually be \\?\UNC - handle + # that case as well + if path.startswith(unc_prefix): + path = new_unc_prefix + path[len(unc_prefix):] + else: + path = path[len(prefix):] + # return the final pathname, plus any trailing part which does not + # exist on the filesystem + return join(path, normpath(tail_path)) if tail_path else path else: raise ImportError except (AttributeError, ImportError): - # On Windows XP and earlier, two files are the same if their absolute - # pathnames are the same. + # On Windows XP and earlier, two files are the same if their normalized + # absolute pathnames are the same. # Non-Windows operating systems fake this method with an XP # approximation. - def _getfinalpathname(f): - return normcase(abspath(f)) + def _getfinalpathname(path, force_unicode=False): + if force_unicode and not isinstance(path, str): + f = bytes(path).decode(sys.getfilesystemencoding()) + return normcase(normpath(abspath(path))) + # realpath is a no-op on systems without islink support + realpath = normpath(abspath) def samefile(f1, f2): "Test whether two pathnames reference the same actual file" - return _getfinalpathname(f1) == _getfinalpathname(f2) + # force _getfinalpathname to always use Unicode even if bytes objects were + # supplied to prevent running into path length limitations with the Windows + # ASCII API + return _getfinalpathname(f1, True) == _getfinalpathname(f2, True) try: diff --git a/Misc/ACKS b/Misc/ACKS index 9c2483c..b2e03f1 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -409,6 +409,7 @@ Manus Hand Milton L. Hankins Stephen Hansen Barry Hantman +Daniel Harding Lynda Hardman Derek Harland Jason Harper diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 8b8fc8f..bdd7f8a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -593,6 +593,63 @@ win32_decode_filename(PyObject *obj) } return unicode; } + +typedef struct +{ + /* False if the buffer is raw bytes (in the system code page); true if the + buffer is in wide characters (UTF-16). */ + int is_unicode; + union { + void *raw; + CHAR *bytes; + WCHAR *utf16; + } buffer; + /* The Python object from which the data was derived. May be NULL. */ + PyObject *source; +} WindowsString; + +static int +WindowsStringConverter(PyObject* arg, void* addr) { + WindowsString *output; + PyObject *bytes; + Py_ssize_t length; + + output = (WindowsString*)addr; + if (arg == NULL) + { + PyMem_Free(output->buffer.raw); + return 1; + } + if (PyUnicode_Check(arg)) { + output->is_unicode = 1; + output->buffer.utf16 = PyUnicode_AsWideCharString(arg, NULL); + if (!output->buffer.utf16) + return 0; + } + else + { + bytes = PyBytes_FromObject(arg); + if (!bytes) + return 0; + if (win32_warn_bytes_api()) { + Py_DECREF(bytes); + return 0; + } + output->is_unicode = 0; + length = PyBytes_GET_SIZE(bytes); + output->buffer.raw = PyMem_Malloc(length + 1); + if (!output->buffer.raw) + { + Py_DECREF(bytes); + return 0; + } + memcpy(output->buffer.raw, PyBytes_AS_STRING(bytes), length + 1); + Py_DECREF(bytes); + } + output->source = arg; + return Py_CLEANUP_SUPPORTED; +} + #endif /* MS_WINDOWS */ /* Return a dictionary corresponding to the POSIX environment table */ @@ -759,6 +816,27 @@ win32_error_unicode(char* function, wchar_t* filename) } static PyObject * +win32_error_string(char *function, WindowsString *filename) +{ + /* XXX - see win32_error for comments on 'function' */ + errno = GetLastError(); + if (filename) + { + if (filename->source) + return PyErr_SetFromWindowsErrWithFilenameObject(errno, + filename->source); + else if (filename->is_unicode) + return PyErr_SetFromWindowsErrWithUnicodeFilename(errno, + filename->buffer.utf16); + else + return PyErr_SetFromWindowsErrWithFilename(errno, + filename->buffer.bytes); + } + else + return PyErr_SetFromWindowsErr(errno); +} + +static PyObject * win32_error_object(char* function, PyObject* filename) { /* XXX - see win32_error for comments on 'function' */ @@ -1161,28 +1239,32 @@ attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG * } /* Grab GetFinalPathNameByHandle dynamically from kernel32 */ -static int has_GetFinalPathNameByHandle = 0; +/* Track status of loading GetFinalPathNameByHandle functions: will be 0 if + loading has not been attemped yet, 1 if the functions were loaded + successfully, and -1 if they could not be loaded */ +static int loaded_GetFinalPathNameByHandle = 0; +static DWORD (CALLBACK *Py_GetFinalPathNameByHandleA)(HANDLE, LPSTR, DWORD, + DWORD); static DWORD (CALLBACK *Py_GetFinalPathNameByHandleW)(HANDLE, LPWSTR, DWORD, DWORD); static int check_GetFinalPathNameByHandle() { HINSTANCE hKernel32; - DWORD (CALLBACK *Py_GetFinalPathNameByHandleA)(HANDLE, LPSTR, DWORD, - DWORD); - /* only recheck */ - if (!has_GetFinalPathNameByHandle) + if (!loaded_GetFinalPathNameByHandle) { hKernel32 = GetModuleHandleW(L"KERNEL32"); *(FARPROC*)&Py_GetFinalPathNameByHandleA = GetProcAddress(hKernel32, "GetFinalPathNameByHandleA"); *(FARPROC*)&Py_GetFinalPathNameByHandleW = GetProcAddress(hKernel32, "GetFinalPathNameByHandleW"); - has_GetFinalPathNameByHandle = Py_GetFinalPathNameByHandleA && - Py_GetFinalPathNameByHandleW; + if (Py_GetFinalPathNameByHandleA && Py_GetFinalPathNameByHandleW) + loaded_GetFinalPathNameByHandle = 1; + else + loaded_GetFinalPathNameByHandle = -1; } - return has_GetFinalPathNameByHandle; + return loaded_GetFinalPathNameByHandle > 0; } static BOOL @@ -3047,69 +3129,147 @@ posix__getfullpathname(PyObject *self, PyObject *args) return PyBytes_FromString(outbuf); } /* end of posix__getfullpathname */ +static HANDLE +CreateFileWS(WindowsString *wsFileName, DWORD dwDesiredAccess, + DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, + DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, + HANDLE hTemplateFile) { + HANDLE result; + Py_BEGIN_ALLOW_THREADS + if (wsFileName->is_unicode) + result = CreateFileW(wsFileName->buffer.utf16, dwDesiredAccess, + dwShareMode, lpSecurityAttributes, + dwCreationDisposition, dwFlagsAndAttributes, + hTemplateFile); + else + result = CreateFileA(wsFileName->buffer.bytes, dwDesiredAccess, + dwShareMode, lpSecurityAttributes, + dwCreationDisposition, dwFlagsAndAttributes, + hTemplateFile); + Py_END_ALLOW_THREADS + return result; +} - -/* A helper function for samepath on windows */ +/* A helper function for realpath and samepath on Windows */ static PyObject * posix__getfinalpathname(PyObject *self, PyObject *args) { + WindowsString path; + PyObject *force_unicode = NULL, *result = NULL; HANDLE hFile; - int buf_size; - wchar_t *target_path; - int result_length; - PyObject *po, *result; - wchar_t *path; + int use_unicode; + DWORD buffer_size, result_length; + void *result_buffer; - if (!PyArg_ParseTuple(args, "U|:_getfinalpathname", &po)) - return NULL; - path = PyUnicode_AsUnicode(po); - if (path == NULL) + if (!PyArg_ParseTuple(args, "O&|O:_getfinalpathname", + WindowsStringConverter, &path, &force_unicode)) return NULL; - if(!check_GetFinalPathNameByHandle()) { + if (force_unicode) + { + use_unicode = PyObject_IsTrue(force_unicode); + if (use_unicode == -1) + goto exit1; + } + else + use_unicode = path.is_unicode; + + if (!check_GetFinalPathNameByHandle()) { /* If the OS doesn't have GetFinalPathNameByHandle, return a NotImplementedError. */ - return PyErr_Format(PyExc_NotImplementedError, + PyErr_Format(PyExc_NotImplementedError, "GetFinalPathNameByHandle not available on this platform"); + goto exit1; } - hFile = CreateFileW( - path, - 0, /* desired access */ - 0, /* share mode */ + hFile = CreateFileWS( + &path, + FILE_READ_ATTRIBUTES, /* desired access */ + FILE_SHARE_READ, /* share mode */ NULL, /* security attributes */ OPEN_EXISTING, /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */ - FILE_FLAG_BACKUP_SEMANTICS, + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_BACKUP_SEMANTICS, NULL); - if(hFile == INVALID_HANDLE_VALUE) - return win32_error_object("CreateFileW", po); + if (hFile == INVALID_HANDLE_VALUE) + { + win32_error_string("CreateFile", &path); + goto exit2; + } /* We have a good handle to the target, use it to determine the target path name. */ - buf_size = Py_GetFinalPathNameByHandleW(hFile, 0, 0, VOLUME_NAME_NT); + Py_BEGIN_ALLOW_THREADS + if (use_unicode) + { + /* Because the supplied buffer has insufficient space, result_length + *will* include space for the terminating null character. */ + result_length = Py_GetFinalPathNameByHandleW(hFile, NULL, 0, + VOLUME_NAME_DOS); + buffer_size = result_length * sizeof(WCHAR); + } + else + { + result_length = Py_GetFinalPathNameByHandleA(hFile, NULL, 0, + VOLUME_NAME_DOS); + /* Contrary to documentation, the return value of + GetFinalPathNameByHandleA (at least on Windows 7) does not include + space for the terminating null charater when the supplied buffer has + insufficent space. Allocate an extra character to compensate for + this. */ + buffer_size = (result_length + 1) * sizeof(CHAR); + } + Py_END_ALLOW_THREADS - if(!buf_size) - return win32_error_object("GetFinalPathNameByHandle", po); + if (!result_length) + { + win32_error_string("GetFinalPathNameByHandle", &path); + goto exit2; + } - target_path = (wchar_t *)malloc((buf_size+1)*sizeof(wchar_t)); - if(!target_path) - return PyErr_NoMemory(); + result_buffer = malloc(buffer_size); + if (!result_buffer) + { + PyErr_NoMemory(); + goto exit2; + } - result_length = Py_GetFinalPathNameByHandleW(hFile, target_path, - buf_size, VOLUME_NAME_DOS); - if(!result_length) - return win32_error_object("GetFinalPathNamyByHandle", po); + Py_BEGIN_ALLOW_THREADS + if (use_unicode) + /* Because the buffer now has sufficient space, result_length will + *not* include space for the terminating null character. */ + result_length = Py_GetFinalPathNameByHandleW(hFile, result_buffer, + buffer_size, + VOLUME_NAME_DOS); + else + result_length = Py_GetFinalPathNameByHandleA(hFile, result_buffer, + buffer_size, + VOLUME_NAME_DOS); + Py_END_ALLOW_THREADS - if(!CloseHandle(hFile)) - return win32_error_object("CloseHandle", po); + if (!result_length) + { + win32_error_string("GetFinalPathNameByHandle", &path); + goto exit3; + } - target_path[result_length] = 0; - result = PyUnicode_FromWideChar(target_path, result_length); - free(target_path); + if (use_unicode) + result = PyUnicode_FromWideChar(result_buffer, result_length); + else + /* Because (according to the Windows documentation) Windows 7 behaves + differently than Windows Vista regarding whether or not the return + value from GetFinalPathNameByHandleA includes the terminating null + character, don't try to use PyBytes_FromStringAndSize, but fall back + to PyBytes_FromString. */ + result = PyBytes_FromString(result_buffer); +exit3: + free(result_buffer); +exit2: + CloseHandle(hFile); +exit1: + PyMem_Free(path.buffer.raw); return result; - } /* end of posix__getfinalpathname */ static PyObject * diff --git a/Python/errors.c b/Python/errors.c index 626b16e..db2d344 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -591,6 +591,15 @@ PyObject *PyErr_SetFromWindowsErr(int ierr) return PyErr_SetExcFromWindowsErrWithFilename(PyExc_WindowsError, ierr, NULL); } + +PyObject *PyErr_SetFromWindowsErrWithFilenameObject( + int ierr, + PyObject *filenameObject) +{ + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_WindowsError, + ierr, filenameObject); +} + PyObject *PyErr_SetFromWindowsErrWithFilename( int ierr, const char *filename)