Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (révision 79453) +++ Python/pythonrun.c (copie de travail) @@ -240,6 +240,22 @@ /* initialize builtin exceptions */ _PyExc_Init(); +#if defined(HAVE_LANGINFO_H) && defined(CODESET) + /* On Unix, set the file system encoding according to the + user's preference, if the CODESET names a well-known + Python codec, and Py_FileSystemDefaultEncoding isn't + initialized by other means. Also set the encoding of + stdin and stdout if these are terminals. */ + + codeset = get_codeset(); + if (codeset) { + if (!Py_FileSystemDefaultEncoding) + Py_FileSystemDefaultEncoding = codeset; + else + free(codeset); + } +#endif + sysmod = _PySys_Init(); if (sysmod == NULL) Py_FatalError("Py_Initialize: can't initialize sys"); @@ -264,22 +280,6 @@ _PyImportHooks_Init(); -#if defined(HAVE_LANGINFO_H) && defined(CODESET) - /* On Unix, set the file system encoding according to the - user's preference, if the CODESET names a well-known - Python codec, and Py_FileSystemDefaultEncoding isn't - initialized by other means. Also set the encoding of - stdin and stdout if these are terminals. */ - - codeset = get_codeset(); - if (codeset) { - if (!Py_FileSystemDefaultEncoding) - Py_FileSystemDefaultEncoding = codeset; - else - free(codeset); - } -#endif - if (install_sigs) initsigs(); /* Signal handling stuff, including initintr() */ Index: Python/import.c =================================================================== --- Python/import.c (révision 79453) +++ Python/import.c (copie de travail) @@ -1388,7 +1388,7 @@ return NULL; if (PyUnicode_Check(v)) { v = PyUnicode_AsEncodedString(v, - Py_FileSystemDefaultEncoding, NULL); + Py_FileSystemDefaultEncoding, "surrogateescape"); if (v == NULL) return NULL; } @@ -3284,19 +3284,23 @@ static int NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path; Py_ssize_t pathlen; if (!_PyArg_NoKeywords("NullImporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "es:NullImporter", - Py_FileSystemDefaultEncoding, &path)) + if (!PyArg_ParseTuple(args, "O&:NullImporter", + PyUnicode_FSConverter, &opath)) return -1; - pathlen = strlen(path); + assert(PyBytes_Check(opath)); + + path = PyBytes_AS_STRING(opath); + pathlen = PyBytes_GET_SIZE(opath); if (pathlen == 0) { - PyMem_Free(path); + Py_DECREF(opath); PyErr_SetString(PyExc_ImportError, "empty pathname"); return -1; } else { @@ -3305,7 +3309,7 @@ int rv; rv = stat(path, &statbuf); - PyMem_Free(path); + Py_DECREF(opath); if (rv == 0) { /* it exists */ if (S_ISDIR(statbuf.st_mode)) { @@ -3322,7 +3326,7 @@ * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs. */ rv = GetFileAttributesA(path); - PyMem_Free(path); + Py_DECREF(opath); if (rv != INVALID_FILE_ATTRIBUTES) { /* it exists */ if (rv & FILE_ATTRIBUTE_DIRECTORY) { Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 79453) +++ Objects/unicodeobject.c (copie de travail) @@ -1443,6 +1443,14 @@ if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); + if (errors != NULL + && strcmp(encoding, "utf-8") == 0 + && strcmp(errors, "surrogateescape") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) @@ -1492,8 +1500,14 @@ strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 && !PyThreadState_GET()->interp->codecs_initialized) return PyUnicode_AsASCIIString(unicode); + } else if (strcmp(errors, "surrogateescape") == 0) { + if (strcmp(encoding, "utf-8") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); } + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) Index: Modules/getpath.c =================================================================== --- Modules/getpath.c (révision 79453) +++ Modules/getpath.c (copie de travail) @@ -131,6 +131,53 @@ static wchar_t *module_search_path = NULL; static wchar_t *lib_python = L"lib/python" VERSION; +char* +_Py_wchar2char(const wchar_t* arg) +{ + char* res; + size_t i, argsize; + size_t len, allocated; + int converted; + const wchar_t *in; + char* out; + + argsize = wcslen(arg); + allocated = argsize * MB_CUR_MAX; + if (allocated / MB_CUR_MAX != argsize) + return NULL; + res = PyMem_Malloc(allocated + 1); + if (res == NULL) + return NULL; + + len = allocated; + in = arg; + out = res; + for (i=0; i < argsize; i++) { + if (len < MB_CUR_MAX) + goto error; + if (*in >= 0xd800 && *in <= 0xdfff) { + *out = *in - 0xd800; + out++; + len--; + ++in; + } else { + converted = wctomb(out, *in); + if (converted == (size_t)-1) { + goto error; + } + len -= converted; + out += converted; + ++in; + } + } + *out = '\0'; + return res; + +error: + PyMem_Free(res); + return NULL; +} + /* In principle, this should use HAVE__WSTAT, and _wstat should be detected by autoconf. However, no current POSIX system provides that function, so testing for @@ -139,28 +186,45 @@ static int _wstat(const wchar_t* path, struct stat *buf) { - char fname[PATH_MAX]; - size_t res = wcstombs(fname, path, sizeof(fname)); - if (res == (size_t)-1) { + char *fname; + int res; + fname = _Py_wchar2char(path); + if (fname == NULL) { errno = EINVAL; return -1; } - return stat(fname, buf); + res = stat(fname, buf); + PyMem_Free(fname); + return res; } #endif #ifndef MS_WINDOWS +wchar_t* _Py_char2wchar(char* arg); +#endif + +#ifndef MS_WINDOWS static wchar_t* _wgetcwd(wchar_t *buf, size_t size) { char fname[PATH_MAX]; + wchar_t* wfname; if (getcwd(fname, PATH_MAX) == NULL) return NULL; - if (mbstowcs(buf, fname, size) >= size) { + wfname = _Py_char2wchar(fname); + if (wfname == NULL) + goto error; + if (size < wcslen(wfname)) + goto error; + wcscpy(buf, wfname); + PyMem_Free(wfname); + return buf; + +error: + if (wfname != NULL) + PyMem_Free(wfname); errno = ERANGE; return NULL; - } - return buf; } #endif @@ -169,14 +233,16 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) { char cbuf[PATH_MAX]; - char cpath[PATH_MAX]; + char *cpath; int res; - size_t r1 = wcstombs(cpath, path, PATH_MAX); - if (r1 == (size_t)-1 || r1 >= PATH_MAX) { + size_t r1; + cpath = _Py_wchar2char(path); + if (cpath == NULL) { errno = EINVAL; return -1; } res = (int)readlink(cpath, cbuf, PATH_MAX); + PyMem_Free(cpath); if (res == -1) return -1; if (res == PATH_MAX) { @@ -184,6 +250,7 @@ return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ + /* FIXME: use _Py_char2wchar */ r1 = mbstowcs(buf, cbuf, bufsiz); if (r1 == -1) { errno = EINVAL; @@ -461,6 +528,7 @@ #endif if (_path) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(wpath, _path, MAXPATHLEN+1); path = wpath; if (r == (size_t)-1 || r > MAXPATHLEN) { @@ -488,6 +556,7 @@ * absolutize() should help us out below */ else if(0 == _NSGetExecutablePath(execpath, &nsexeclength) && execpath[0] == SEP) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(progpath, execpath, MAXPATHLEN+1); if (r == (size_t)-1 || r > MAXPATHLEN) { /* Could not convert execpath, or it's too long. */ @@ -626,6 +695,7 @@ bufsz = 0; if (_rtpypath) { + /* FIXME: use _Py_char2wchar */ size_t s = mbstowcs(rtpypath, _rtpypath, sizeof(rtpypath)/sizeof(wchar_t)); if (s == (size_t)-1 || s >=sizeof(rtpypath)) /* XXX deal with errors more gracefully */ Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (révision 79453) +++ Modules/zipimport.c (copie de travail) @@ -60,26 +60,33 @@ static int zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path, *p, *prefix, buf[MAXPATHLEN+2]; size_t len; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) + if (!PyArg_ParseTuple(args, "O&:zipimporter", PyUnicode_FSConverter, &opath)) return -1; - len = strlen(path); + assert(PyBytes_Check(opath)); + + path = PyBytes_AS_STRING(opath); + len = PyBytes_GET_SIZE(opath); if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); + Py_DECREF(opath); return -1; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); + Py_DECREF(opath); return -1; } strcpy(buf, path); + Py_DECREF(opath); #ifdef ALTSEP for (p = buf; *p; p++) { Index: Modules/python.c =================================================================== --- Modules/python.c (révision 79453) +++ Modules/python.c (copie de travail) @@ -14,8 +14,8 @@ return Py_Main(argc, argv); } #else -static wchar_t* -char2wchar(char* arg) +wchar_t* +_Py_char2wchar(char* arg) { wchar_t *res; #ifdef HAVE_BROKEN_MBSTOWCS @@ -143,7 +143,7 @@ oldloc = strdup(setlocale(LC_ALL, NULL)); setlocale(LC_ALL, ""); for (i = 0; i < argc; i++) { - argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]); + argv_copy2[i] = argv_copy[i] = _Py_char2wchar(argv[i]); if (!argv_copy[i]) return 1; }