diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -103,10 +103,6 @@ typedef wchar_t Py_UNICODE; # endif #endif -#if defined(MS_WINDOWS) -# define HAVE_MBCS -#endif - #ifdef HAVE_WCHAR_H /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ # ifdef _HAVE_BSDI @@ -1449,7 +1445,7 @@ PyAPI_FUNC(PyObject *) PyUnicode_Transla ); #endif -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ @@ -1492,7 +1488,14 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCo const char *errors /* error handling */ ); -#endif /* HAVE_MBCS */ +PyAPI_FUNC(PyObject*) _Py_EncodeCodePage( + int code_page, /* code page number */ + const wchar_t *str, /* string */ + Py_ssize_t len, /* string length */ + const char *errors /* error handling */ + ); + +#endif /* MS_WINDOWS */ /* --- Decimal Encoder ---------------------------------------------------- */ diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1604,6 +1604,15 @@ class ExtendedAttributeTests(unittest.Te self._check_xattrs(getxattr, setxattr, removexattr, listxattr) +@unittest.skipUnless(sys.platform == "win32", "Win32 specific tests") +class Win32UnicodeTests(unittest.TestCase): + def test_undecodable_names(self): + parent = os.getcwd() + parent_bytes = os.fsencode(parent) + with support.temp_cwd(support.TESTFN_UNENCODABLE): + self.assertRaises(UnicodeEncodeError, os.getcwdb) + self.assertRaises(UnicodeEncodeError, os.listdir, parent_bytes) + @support.reap_threads def test_main(): support.run_unittest( @@ -1628,6 +1637,7 @@ def test_main(): TestSendfile, ProgramPriorityTests, ExtendedAttributeTests, + Win32UnicodeTests, ) if __name__ == "__main__": diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -587,7 +587,7 @@ charmap_decode(PyObject *self, return codec_tuple(unicode, pbuf.len); } -#ifdef HAVE_MBCS +#ifdef HAVE_MSWINDOWS static PyObject * mbcs_decode(PyObject *self, @@ -637,7 +637,7 @@ code_page_decode(PyObject *self, return codec_tuple(decoded, consumed); } -#endif /* HAVE_MBCS */ +#endif /* HAVE_MSWINDOWS */ /* --- Encoder ------------------------------------------------------------ */ @@ -1011,7 +1011,7 @@ charmap_build(PyObject *self, PyObject * return PyUnicode_BuildEncodingMap(map); } -#ifdef HAVE_MBCS +#ifdef HAVE_MSWINDOWS static PyObject * mbcs_encode(PyObject *self, @@ -1059,7 +1059,7 @@ code_page_encode(PyObject *self, return v; } -#endif /* HAVE_MBCS */ +#endif /* HAVE_MSWINDOWS */ /* --- Error handler registry --------------------------------------------- */ @@ -1146,7 +1146,7 @@ static PyMethodDef _codecs_functions[] = {"charmap_decode", charmap_decode, METH_VARARGS}, {"charmap_build", charmap_build, METH_VARARGS}, {"readbuffer_encode", readbuffer_encode, METH_VARARGS}, -#ifdef HAVE_MBCS +#ifdef HAVE_MSWINDOWS {"mbcs_encode", mbcs_encode, METH_VARARGS}, {"mbcs_decode", mbcs_decode, METH_VARARGS}, {"code_page_encode", code_page_encode, METH_VARARGS}, diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -375,6 +375,14 @@ static int win32_can_symlink = 0; #endif #endif +#ifdef MS_WINDOWS +static PyObject* +encode_filename(const wchar_t *str, Py_ssize_t len) +{ + return _Py_EncodeCodePage(CP_ACP, str, len, NULL); +} +#endif + /* A helper used by a number of POSIX-only functions */ #ifndef MS_WINDOWS static int @@ -682,7 +690,7 @@ posix_error_with_allocated_filename(PyOb #ifdef MS_WINDOWS static PyObject * -win32_error(char* function, const char* filename) +win32_error(const char* function, const char* filename) { /* XXX We should pass the function name along in the future. (winreg.c also wants to pass the function name.) @@ -2356,39 +2364,42 @@ posix_lchown(PyObject *self, PyObject *a static PyObject * posix_getcwd(int use_bytes) { - char buf[1026]; - char *res; - #ifdef MS_WINDOWS - if (!use_bytes) { - wchar_t wbuf[1026]; - wchar_t *wbuf2 = wbuf; - PyObject *resobj; - DWORD len; - Py_BEGIN_ALLOW_THREADS - len = GetCurrentDirectoryW(sizeof wbuf/ sizeof wbuf[0], wbuf); - /* If the buffer is large enough, len does not include the - terminating \0. If the buffer is too small, len includes - the space needed for the terminator. */ - if (len >= sizeof wbuf/ sizeof wbuf[0]) { - wbuf2 = malloc(len * sizeof(wchar_t)); - if (wbuf2) - len = GetCurrentDirectoryW(len, wbuf2); - } - Py_END_ALLOW_THREADS + wchar_t wbuf[1026]; + wchar_t *wbuf2 = wbuf; + PyObject *resobj; + DWORD len; + const char *funcname = use_bytes?"getcwdb":"getcwdu"; + + Py_BEGIN_ALLOW_THREADS + len = GetCurrentDirectoryW(Py_ARRAY_LENGTH(wbuf), wbuf); + /* If the buffer is large enough, len does not include the + terminating \0. If the buffer is too small, len includes + the space needed for the terminator. */ + if (len >= Py_ARRAY_LENGTH(wbuf)) { + wbuf2 = malloc(len * sizeof(wchar_t)); if (!wbuf2) { PyErr_NoMemory(); return NULL; } - if (!len) { - if (wbuf2 != wbuf) free(wbuf2); - return win32_error("getcwdu", NULL); - } + len = GetCurrentDirectoryW(len, wbuf2); + } + Py_END_ALLOW_THREADS + if (!len) { + if (wbuf2 != wbuf) + free(wbuf2); + return win32_error(funcname, NULL); + } + if (use_bytes) + resobj = encode_filename(wbuf2, len); + else resobj = PyUnicode_FromWideChar(wbuf2, len); - if (wbuf2 != wbuf) free(wbuf2); - return resobj; - } -#endif + if (wbuf2 != wbuf) + free(wbuf2); + return resobj; +#else + char buf[1026]; + char *res; Py_BEGIN_ALLOW_THREADS #if defined(PYOS_OS2) && defined(PYCC_GCC) @@ -2401,7 +2412,9 @@ posix_getcwd(int use_bytes) return posix_error(); if (use_bytes) return PyBytes_FromStringAndSize(buf, strlen(buf)); - return PyUnicode_DecodeFSDefault(buf); + else + return PyUnicode_DecodeFSDefault(buf); +#endif } PyDoc_STRVAR(posix_getcwd__doc__, @@ -2511,140 +2524,72 @@ posix_listdir(PyObject *self, PyObject * in separate files instead of having them all here... */ #if defined(MS_WINDOWS) && !defined(HAVE_OPENDIR) - PyObject *d, *v; + PyObject *d = NULL, *v; HANDLE hFindFile; BOOL result; - WIN32_FIND_DATA FileData; - PyObject *opath; + PyObject *opath = NULL; char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */ char *bufptr = namebuf; Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */ - + int use_bytes; PyObject *po = NULL; + WIN32_FIND_DATAW wFileData; + wchar_t *wnamebuf = NULL, *po_wchars; + if (PyArg_ParseTuple(args, "|U:listdir", &po)) { - WIN32_FIND_DATAW wFileData; - wchar_t *wnamebuf, *po_wchars; - - if (po == NULL) { /* Default arg: "." */ - po_wchars = L"."; - len = 1; - } else { - po_wchars = PyUnicode_AsUnicodeAndSize(po, &len); - if (po_wchars == NULL) - return NULL; - } - /* Overallocate for \\*.*\0 */ - wnamebuf = malloc((len + 5) * sizeof(wchar_t)); - if (!wnamebuf) { - PyErr_NoMemory(); + use_bytes = 0; + } + else { + if (!PyArg_ParseTuple(args, "O&:listdir", + PyUnicode_FSDecoder, &po)) return NULL; - } - wcscpy(wnamebuf, po_wchars); - if (len > 0) { - wchar_t wch = wnamebuf[len-1]; - if (wch != L'/' && wch != L'\\' && wch != L':') - wnamebuf[len++] = L'\\'; - wcscpy(wnamebuf + len, L"*.*"); - } - if ((d = PyList_New(0)) == NULL) { - free(wnamebuf); - return NULL; - } - Py_BEGIN_ALLOW_THREADS - hFindFile = FindFirstFileW(wnamebuf, &wFileData); - Py_END_ALLOW_THREADS - if (hFindFile == INVALID_HANDLE_VALUE) { - int error = GetLastError(); - if (error == ERROR_FILE_NOT_FOUND) { - free(wnamebuf); - return d; - } - Py_DECREF(d); - win32_error_unicode("FindFirstFileW", wnamebuf); - free(wnamebuf); - return NULL; - } - do { - /* Skip over . and .. */ - if (wcscmp(wFileData.cFileName, L".") != 0 && - wcscmp(wFileData.cFileName, L"..") != 0) { - v = PyUnicode_FromUnicode(wFileData.cFileName, wcslen(wFileData.cFileName)); - if (v == NULL) { - Py_DECREF(d); - d = NULL; - break; - } - if (PyList_Append(d, v) != 0) { - Py_DECREF(v); - Py_DECREF(d); - d = NULL; - break; - } - Py_DECREF(v); - } - Py_BEGIN_ALLOW_THREADS - result = FindNextFileW(hFindFile, &wFileData); - Py_END_ALLOW_THREADS - /* FindNextFile sets error to ERROR_NO_MORE_FILES if - it got to the end of the directory. */ - if (!result && GetLastError() != ERROR_NO_MORE_FILES) { - Py_DECREF(d); - win32_error_unicode("FindNextFileW", wnamebuf); - FindClose(hFindFile); - free(wnamebuf); - return NULL; - } - } while (result == TRUE); - - if (FindClose(hFindFile) == FALSE) { - Py_DECREF(d); - win32_error_unicode("FindClose", wnamebuf); - free(wnamebuf); - return NULL; - } - free(wnamebuf); - return d; - } - /* Drop the argument parsing error as narrow strings - are also valid. */ - PyErr_Clear(); - - if (!PyArg_ParseTuple(args, "O&:listdir", - PyUnicode_FSConverter, &opath)) - return NULL; - if (PyBytes_GET_SIZE(opath)+1 > MAX_PATH) { - PyErr_SetString(PyExc_ValueError, "path too long"); - Py_DECREF(opath); - return NULL; - } - strcpy(namebuf, PyBytes_AsString(opath)); - len = PyObject_Size(opath); - Py_DECREF(opath); + use_bytes = 1; + } + + if (po == NULL) { /* Default arg: "." */ + po_wchars = L"."; + len = 1; + } else { + po_wchars = PyUnicode_AsUnicodeAndSize(po, &len); + if (po_wchars == NULL) + goto error; + } + /* Overallocate for \\*.*\0 */ + wnamebuf = malloc((len + 5) * sizeof(wchar_t)); + if (!wnamebuf) { + PyErr_NoMemory(); + goto error; + } + wcscpy(wnamebuf, po_wchars); if (len > 0) { - char ch = namebuf[len-1]; - if (ch != SEP && ch != ALTSEP && ch != ':') - namebuf[len++] = '/'; - strcpy(namebuf + len, "*.*"); - } - - if ((d = PyList_New(0)) == NULL) - return NULL; - - Py_BEGIN_ALLOW_THREADS - hFindFile = FindFirstFile(namebuf, &FileData); + wchar_t wch = wnamebuf[len-1]; + if (wch != L'/' && wch != L'\\' && wch != L':') + wnamebuf[len++] = L'\\'; + wcscpy(wnamebuf + len, L"*.*"); + } + d = PyList_New(0); + if (d == NULL) + goto error; + Py_BEGIN_ALLOW_THREADS + hFindFile = FindFirstFileW(wnamebuf, &wFileData); Py_END_ALLOW_THREADS if (hFindFile == INVALID_HANDLE_VALUE) { int error = GetLastError(); if (error == ERROR_FILE_NOT_FOUND) - return d; - Py_DECREF(d); - return win32_error("FindFirstFile", namebuf); + goto done; + win32_error_unicode("FindFirstFileW", wnamebuf); + goto error; } do { /* Skip over . and .. */ - if (strcmp(FileData.cFileName, ".") != 0 && - strcmp(FileData.cFileName, "..") != 0) { - v = PyBytes_FromString(FileData.cFileName); + if (wcscmp(wFileData.cFileName, L".") != 0 && + wcscmp(wFileData.cFileName, L"..") != 0) + { + len = wcslen(wFileData.cFileName); + if (use_bytes) + v = encode_filename(wFileData.cFileName, len); + else + v = PyUnicode_FromUnicode(wFileData.cFileName, len); if (v == NULL) { Py_DECREF(d); d = NULL; @@ -2659,23 +2604,30 @@ posix_listdir(PyObject *self, PyObject * Py_DECREF(v); } Py_BEGIN_ALLOW_THREADS - result = FindNextFile(hFindFile, &FileData); + result = FindNextFileW(hFindFile, &wFileData); Py_END_ALLOW_THREADS /* FindNextFile sets error to ERROR_NO_MORE_FILES if it got to the end of the directory. */ if (!result && GetLastError() != ERROR_NO_MORE_FILES) { - Py_DECREF(d); - win32_error("FindNextFile", namebuf); + win32_error_unicode("FindNextFileW", wnamebuf); FindClose(hFindFile); - return NULL; + goto error; } } while (result == TRUE); if (FindClose(hFindFile) == FALSE) { - Py_DECREF(d); - return win32_error("FindClose", namebuf); - } - + win32_error_unicode("FindClose", wnamebuf); + goto error; + } + goto done; + +error: + Py_CLEAR(d); +done: + if (wnamebuf) + free(wnamebuf); + if (use_bytes) + Py_DECREF(po); return d; #elif defined(PYOS_OS2) diff --git a/Modules/timemodule.c b/Modules/timemodule.c --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -43,7 +43,7 @@ static long main_thread; #endif /* MS_WINDOWS */ #endif /* !__WATCOMC__ || __QNX__ */ -#if defined(HAVE_MBCS) +#if defined(MS_WINDOWS) # define TZNAME_ENCODING "mbcs" #else # define TZNAME_ENCODING "utf-8" diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -429,7 +429,7 @@ _PyUnicode_CheckConsistency(void *op, in } #endif -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS static OSVERSIONINFOEX winver; #endif @@ -2887,7 +2887,7 @@ PyUnicode_Decode(const char *s, (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return PyUnicode_DecodeLatin1(s, size, errors); -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS else if (strcmp(lower, "mbcs") == 0) return PyUnicode_DecodeMBCS(s, size, errors); #endif @@ -3034,7 +3034,7 @@ PyUnicode_AsEncodedObject(PyObject *unic PyObject * PyUnicode_EncodeFSDefault(PyObject *unicode) { -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), NULL); @@ -3121,7 +3121,7 @@ PyUnicode_AsEncodedString(PyObject *unic (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) return _PyUnicode_AsLatin1String(unicode, errors); -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS else if (strcmp(lower, "mbcs") == 0) return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), @@ -3206,7 +3206,7 @@ PyUnicode_DecodeFSDefault(const char *s) PyObject* PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) { -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS return PyUnicode_DecodeMBCS(s, size, NULL); #elif defined(__APPLE__) return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); @@ -3318,6 +3318,10 @@ PyUnicode_FSDecoder(PyObject* arg, void* return 0; } } + if (PyUnicode_READY(output) < 0) { + Py_DECREF(output); + return 0; + } if (findchar(PyUnicode_DATA(output), PyUnicode_KIND(output), PyUnicode_GET_LENGTH(output), 0, 1) >= 0) { PyErr_SetString(PyExc_TypeError, "embedded NUL character"); @@ -6893,7 +6897,7 @@ PyUnicode_AsASCIIString(PyObject *unicod return _PyUnicode_AsASCIIString(unicode, NULL); } -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS /* --- MBCS codecs for Windows -------------------------------------------- */ @@ -7382,7 +7386,6 @@ encode_code_page_errors(UINT code_page, PyObject *exc = NULL; PyObject *encoding_obj = NULL; char *encoding; - int err; Py_ssize_t startpos, newpos, newoutsize; PyObject *rep; int ret = -1; @@ -7569,39 +7572,37 @@ encode_code_page_chunk(UINT code_page, P return done; } -static PyObject * -encode_code_page(int code_page, - const Py_UNICODE *p, Py_ssize_t size, - const char *errors) +PyObject* +_Py_EncodeCodePage(int code_page, + const wchar_t *str, Py_ssize_t size, + const char *errors) { PyObject *outbytes = NULL; int ret; + int chunk; if (code_page < 0) { PyErr_SetString(PyExc_ValueError, "invalid code page number"); return NULL; } + do + { #ifdef NEED_RETRY - retry: - if (size > INT_MAX) - ret = encode_code_page_chunk(code_page, &outbytes, p, INT_MAX, errors); - else -#endif - ret = encode_code_page_chunk(code_page, &outbytes, p, (int)size, errors); - - if (ret < 0) { - Py_XDECREF(outbytes); - return NULL; - } - -#ifdef NEED_RETRY - if (size > INT_MAX) { - p += INT_MAX; - size -= INT_MAX; - goto retry; - } -#endif + if (size > INT_MAX) + chunk = INT_MAX; + else +#endif + chunk = (int)size; + ret = encode_code_page_chunk(code_page, &outbytes, str, chunk, errors); + if (ret < 0) { + Py_XDECREF(outbytes); + return NULL; + } + + str += chunk; + size -= chunk; + } while (0 < size); return outbytes; } @@ -7611,7 +7612,7 @@ PyUnicode_EncodeMBCS(const Py_UNICODE *p Py_ssize_t size, const char *errors) { - return encode_code_page(CP_ACP, p, size, errors); + return _Py_EncodeCodePage(CP_ACP, p, size, errors); } PyObject * @@ -7621,10 +7622,11 @@ PyUnicode_EncodeCodePage(int code_page, { const Py_UNICODE *p; Py_ssize_t size; + p = PyUnicode_AsUnicodeAndSize(unicode, &size); if (p == NULL) return NULL; - return encode_code_page(code_page, p, size, errors); + return _Py_EncodeCodePage(code_page, p, size, errors); } PyObject * @@ -7641,7 +7643,7 @@ PyUnicode_AsMBCSString(PyObject *unicode #undef NEED_RETRY -#endif /* HAVE_MBCS */ +#endif /* MS_WINDOWS */ /* --- Character Mapping Codec -------------------------------------------- */ @@ -13928,7 +13930,7 @@ int _PyUnicode_Init(void) PyType_Ready(&EncodingMapType); -#ifdef HAVE_MBCS +#ifdef MS_WINDOWS winver.dwOSVersionInfoSize = sizeof(winver); if (!GetVersionEx((OSVERSIONINFO*)&winver)) { PyErr_SetFromWindowsErr(0); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -21,7 +21,7 @@ Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the values for Py_FileSystemDefaultEncoding! */ -#ifdef HAVE_MBCS +#ifdef HAVE_WINDOWS const char *Py_FileSystemDefaultEncoding = "mbcs"; int Py_HasFileSystemDefaultEncoding = 1; #elif defined(__APPLE__)