Index: Python/errors.c =================================================================== --- Python/errors.c (révision 79902) +++ Python/errors.c (copie de travail) @@ -446,7 +446,7 @@ PyObject * PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename) { - PyObject *name = filename ? PyUnicode_FromString(filename) : NULL; + PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); Py_XDECREF(name); return result; Index: Python/ceval.c =================================================================== --- Python/ceval.c (révision 79902) +++ Python/ceval.c (copie de travail) @@ -1176,6 +1176,8 @@ #endif #if defined(Py_DEBUG) || defined(LLTRACE) filename = _PyUnicode_AsString(co->co_filename); + if (filename == NULL) + PyErr_Clear(); #endif why = WHY_NOT; Index: Python/traceback.c =================================================================== --- Python/traceback.c (révision 79902) +++ Python/traceback.c (copie de travail) @@ -138,7 +138,7 @@ { int i; int fd = -1; - PyObject *v; + PyObject *v, *v2; Py_ssize_t _npath; int npath; size_t taillen; @@ -169,10 +169,17 @@ } if (!PyUnicode_Check(v)) continue; - path = _PyUnicode_AsStringAndSize(v, &len); + + v2 = PyUnicode_AsEncodedString(v, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (!v2) + continue; + path = PyBytes_AS_STRING(v2); + len = PyBytes_GET_SIZE(v2); if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) continue; /* Too long */ strcpy(namebuf, path); + Py_DECREF(v2); if (strlen(namebuf) != len) continue; /* v contains '\0' */ if (len > 0 && namebuf[len-1] != SEP) @@ -291,13 +298,21 @@ { int err = 0; char linebuf[2000]; + PyObject *line; if (filename == NULL || name == NULL) return -1; /* This is needed by Emacs' compile command */ #define FMT " File \"%.500s\", line %d, in %.500s\n" PyOS_snprintf(linebuf, sizeof(linebuf), FMT, filename, lineno, name); - err = PyFile_WriteString(linebuf, f); + + line = PyUnicode_DecodeUTF8(linebuf, strlen(linebuf), "backslashreplace"); + if (line != NULL) { + err = PyFile_WriteObject(line, f, Py_PRINT_RAW); + Py_DECREF(line); + } else { + err = 1; + } if (err != 0) return err; return _Py_DisplaySourceLine(f, filename, lineno, 4); @@ -306,6 +321,7 @@ static int tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit) { + PyObject *filename; int err = 0; long depth = 0; PyTracebackObject *tb1 = tb; @@ -315,11 +331,17 @@ } while (tb != NULL && err == 0) { if (depth <= limit) { - err = tb_displayline(f, - _PyUnicode_AsString( - tb->tb_frame->f_code->co_filename), - tb->tb_lineno, - _PyUnicode_AsString(tb->tb_frame->f_code->co_name)); + filename = PyUnicode_AsEncodedString(tb->tb_frame->f_code->co_filename, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (filename != NULL) { + err = tb_displayline(f, + PyBytes_AS_STRING(filename), + tb->tb_lineno, + _PyUnicode_AsString(tb->tb_frame->f_code->co_name)); + Py_DECREF(filename); + } else { + err = 1; + } } depth--; tb = tb->tb_next; Index: Python/ast.c =================================================================== --- Python/ast.c (révision 79902) +++ Python/ast.c (copie de travail) @@ -101,7 +101,7 @@ static void ast_error_finish(const char *filename) { - PyObject *type, *value, *tback, *errstr, *loc, *tmp; + PyObject *type, *value, *tback, *errstr, *loc, *tmp, *filename_obj; long lineno; assert(PyErr_Occurred()); @@ -125,8 +125,14 @@ Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) { + Py_DECREF(errstr); + return; + } + tmp = Py_BuildValue("(OlOO)", filename_obj, lineno, Py_None, loc); Py_DECREF(loc); + Py_DECREF(filename_obj); if (!tmp) { Py_DECREF(errstr); return; Index: Python/import.c =================================================================== --- Python/import.c (révision 79902) +++ Python/import.c (copie de travail) @@ -546,6 +546,7 @@ _PyImport_FixupExtension(PyObject *mod, char *name, char *filename) { PyObject *modules, *dict; + PyObject *filename_obj; struct PyModuleDef *def; if (extensions == NULL) { extensions = PyDict_New(); @@ -583,7 +584,11 @@ if (def->m_base.m_copy == NULL) return -1; } - PyDict_SetItemString(extensions, filename, (PyObject*)def); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) + return -1; + PyDict_SetItem(extensions, filename_obj, (PyObject*)def); + Py_DECREF(filename_obj); return 0; } @@ -591,10 +596,15 @@ _PyImport_FindExtension(char *name, char *filename) { PyObject *mod, *mdict; + PyObject *filename_obj; PyModuleDef* def; if (extensions == NULL) return NULL; - def = (PyModuleDef*)PyDict_GetItemString(extensions, filename); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) + return NULL; + def = (PyModuleDef*)PyDict_GetItem(extensions, filename_obj); + Py_DECREF(filename_obj); if (def == NULL) return NULL; if (def->m_size == -1) { @@ -1015,6 +1025,8 @@ PyCodeObject *co; PyObject *m; + + if (fstat(fileno(fp), &st) != 0) { PyErr_Format(PyExc_RuntimeError, "unable to get file status from '%s'", @@ -1084,7 +1096,8 @@ len = strlen(file); /* match '*.py?' */ if (len > MAXPATHLEN || PyOS_strnicmp(&file[len-4], ".py", 3) != 0) { - return PyUnicode_DecodeFSDefault(file); + u = PyUnicode_DecodeFSDefault(file); + return u; } strncpy(py, file, len-1); @@ -1388,7 +1401,7 @@ return NULL; if (PyUnicode_Check(v)) { v = PyUnicode_AsEncodedString(v, - Py_FileSystemDefaultEncoding, NULL); + Py_FileSystemDefaultEncoding, "surrogateescape"); if (v == NULL) return NULL; } @@ -3284,19 +3297,23 @@ static int NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path; Py_ssize_t pathlen; if (!_PyArg_NoKeywords("NullImporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "es:NullImporter", - Py_FileSystemDefaultEncoding, &path)) + if (!PyArg_ParseTuple(args, "O&:NullImporter", + PyUnicode_FSConverter, &opath)) return -1; - pathlen = strlen(path); + assert(PyBytes_Check(opath)); + + path = PyBytes_AS_STRING(opath); + pathlen = PyBytes_GET_SIZE(opath); if (pathlen == 0) { - PyMem_Free(path); + Py_DECREF(opath); PyErr_SetString(PyExc_ImportError, "empty pathname"); return -1; } else { @@ -3305,7 +3322,7 @@ int rv; rv = stat(path, &statbuf); - PyMem_Free(path); + Py_DECREF(opath); if (rv == 0) { /* it exists */ if (S_ISDIR(statbuf.st_mode)) { @@ -3322,7 +3339,7 @@ * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs. */ rv = GetFileAttributesA(path); - PyMem_Free(path); + Py_DECREF(opath); if (rv != INVALID_FILE_ATTRIBUTES) { /* it exists */ if (rv & FILE_ATTRIBUTE_DIRECTORY) { Index: Python/_warnings.c =================================================================== --- Python/_warnings.c (révision 79902) +++ Python/_warnings.c (copie de travail) @@ -498,6 +498,9 @@ /* Setup filename. */ *filename = PyDict_GetItemString(globals, "__file__"); if (*filename != NULL) { + + /* FIXME: Don't use _PyUnicode_AsString */ +#if 0 Py_ssize_t len = PyUnicode_GetSize(*filename); const char *file_str = _PyUnicode_AsString(*filename); if (file_str == NULL || (len < 0 && PyErr_Occurred())) @@ -517,6 +520,10 @@ } else Py_INCREF(*filename); +#else + Py_INCREF(*filename); +#endif + } else { const char *module_str = _PyUnicode_AsString(*module); Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 79902) +++ Objects/unicodeobject.c (copie de travail) @@ -1445,6 +1445,14 @@ if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); + if (errors != NULL + && strcmp(encoding, "utf-8") == 0 + && strcmp(errors, "surrogateescape") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) @@ -1494,8 +1502,14 @@ strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 && !PyThreadState_GET()->interp->codecs_initialized) return PyUnicode_AsASCIIString(unicode); + } else if (strcmp(errors, "surrogateescape") == 0) { + if (strcmp(encoding, "utf-8") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); } + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) @@ -1594,19 +1608,19 @@ if (Py_FileSystemDefaultEncoding) { #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { - return PyUnicode_DecodeMBCS(s, size, "replace"); + return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); } #elif defined(__APPLE__) if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { - return PyUnicode_DecodeUTF8(s, size, "replace"); + return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } #endif return PyUnicode_Decode(s, size, Py_FileSystemDefaultEncoding, - "replace"); + "surrogateescape"); } else { - return PyUnicode_DecodeUTF8(s, size, "replace"); + return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } } Index: Objects/fileobject.c =================================================================== --- Objects/fileobject.c (révision 79902) +++ Objects/fileobject.c (copie de travail) @@ -41,10 +41,10 @@ if (stream == NULL) return NULL; if (name != NULL) { - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) + nameobj = PyUnicode_DecodeFSDefault(name); + if (nameobj == NULL) { PyErr_Clear(); - else { + } else { if (PyObject_SetAttrString(stream, "name", nameobj) < 0) PyErr_Clear(); Py_DECREF(nameobj); Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (révision 79902) +++ Parser/tokenizer.c (copie de travail) @@ -467,10 +467,16 @@ if (io == NULL) goto cleanup; - if (tok->filename) - stream = PyObject_CallMethod(io, "open", "ssis", - tok->filename, "r", -1, enc); - else + if (tok->filename) { + PyObject *filename = PyUnicode_DecodeFSDefault(tok->filename); + if (filename != NULL) { + stream = PyObject_CallMethod(io, "open", "Osis", + filename, "r", -1, enc); + Py_DECREF(filename); + } else { + stream = NULL; + } + } else stream = PyObject_CallMethod(io, "open", "isisOOO", fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False); if (stream == NULL) Index: Lib/unittest/runner.py =================================================================== --- Lib/unittest/runner.py (révision 79902) +++ Lib/unittest/runner.py (copie de travail) @@ -113,7 +113,9 @@ self.stream.writeln(self.separator1) self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) self.stream.writeln(self.separator2) - self.stream.writeln("%s" % err) + err_str = str(err) + err_str = err.encode("ascii", "backslashreplace").decode("ascii") + self.stream.writeln(err_str) class TextTestRunner(object): Index: Lib/test/test_subprocess.py =================================================================== --- Lib/test/test_subprocess.py (révision 79902) +++ Lib/test/test_subprocess.py (copie de travail) @@ -689,7 +689,7 @@ # args is a string fd, fname = mkstemp() # reopen in text mode - with open(fd, "w") as fobj: + with open(fd, "w", encoding="utf8", errors="surrogateescape") as fobj: fobj.write("#!/bin/sh\n") fobj.write("exec '%s' -c 'import sys; sys.exit(47)'\n" % sys.executable) @@ -732,7 +732,7 @@ # call() function with string argument on UNIX fd, fname = mkstemp() # reopen in text mode - with open(fd, "w") as fobj: + with open(fd, "w", encoding="utf8", errors="surrogateescape") as fobj: fobj.write("#!/bin/sh\n") fobj.write("exec '%s' -c 'import sys; sys.exit(47)'\n" % sys.executable) Index: Modules/getpath.c =================================================================== --- Modules/getpath.c (révision 79902) +++ Modules/getpath.c (copie de travail) @@ -131,6 +131,53 @@ static wchar_t *module_search_path = NULL; static wchar_t *lib_python = L"lib/python" VERSION; +char* +_Py_wchar2char(const wchar_t* arg) +{ + char* res; + size_t i, argsize; + size_t len, allocated; + int converted; + const wchar_t *in; + char* out; + + argsize = wcslen(arg); + allocated = argsize * MB_CUR_MAX; + if (allocated / MB_CUR_MAX != argsize) + return NULL; + res = PyMem_Malloc(allocated + 1); + if (res == NULL) + return NULL; + + len = allocated; + in = arg; + out = res; + for (i=0; i < argsize; i++) { + if (len < MB_CUR_MAX) + goto error; + if (*in >= 0xd800 && *in <= 0xdfff) { + *out = *in - 0xd800; + out++; + len--; + ++in; + } else { + converted = wctomb(out, *in); + if (converted == (size_t)-1) { + goto error; + } + len -= converted; + out += converted; + ++in; + } + } + *out = '\0'; + return res; + +error: + PyMem_Free(res); + return NULL; +} + /* In principle, this should use HAVE__WSTAT, and _wstat should be detected by autoconf. However, no current POSIX system provides that function, so testing for @@ -139,28 +186,45 @@ static int _wstat(const wchar_t* path, struct stat *buf) { - char fname[PATH_MAX]; - size_t res = wcstombs(fname, path, sizeof(fname)); - if (res == (size_t)-1) { + char *fname; + int res; + fname = _Py_wchar2char(path); + if (fname == NULL) { errno = EINVAL; return -1; } - return stat(fname, buf); + res = stat(fname, buf); + PyMem_Free(fname); + return res; } #endif #ifndef MS_WINDOWS +wchar_t* _Py_char2wchar(char* arg); +#endif + +#ifndef MS_WINDOWS static wchar_t* _wgetcwd(wchar_t *buf, size_t size) { char fname[PATH_MAX]; + wchar_t* wfname; if (getcwd(fname, PATH_MAX) == NULL) return NULL; - if (mbstowcs(buf, fname, size) >= size) { + wfname = _Py_char2wchar(fname); + if (wfname == NULL) + goto error; + if (size < wcslen(wfname)) + goto error; + wcscpy(buf, wfname); + PyMem_Free(wfname); + return buf; + +error: + if (wfname != NULL) + PyMem_Free(wfname); errno = ERANGE; return NULL; - } - return buf; } #endif @@ -169,14 +233,16 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) { char cbuf[PATH_MAX]; - char cpath[PATH_MAX]; + char *cpath; int res; - size_t r1 = wcstombs(cpath, path, PATH_MAX); - if (r1 == (size_t)-1 || r1 >= PATH_MAX) { + size_t r1; + cpath = _Py_wchar2char(path); + if (cpath == NULL) { errno = EINVAL; return -1; } res = (int)readlink(cpath, cbuf, PATH_MAX); + PyMem_Free(cpath); if (res == -1) return -1; if (res == PATH_MAX) { @@ -184,6 +250,7 @@ return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ + /* FIXME: use _Py_char2wchar */ r1 = mbstowcs(buf, cbuf, bufsiz); if (r1 == -1) { errno = EINVAL; @@ -461,6 +528,7 @@ #endif if (_path) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(wpath, _path, MAXPATHLEN+1); path = wpath; if (r == (size_t)-1 || r > MAXPATHLEN) { @@ -488,6 +556,7 @@ * absolutize() should help us out below */ else if(0 == _NSGetExecutablePath(execpath, &nsexeclength) && execpath[0] == SEP) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(progpath, execpath, MAXPATHLEN+1); if (r == (size_t)-1 || r > MAXPATHLEN) { /* Could not convert execpath, or it's too long. */ @@ -626,6 +695,7 @@ bufsz = 0; if (_rtpypath) { + /* FIXME: use _Py_char2wchar */ size_t s = mbstowcs(rtpypath, _rtpypath, sizeof(rtpypath)/sizeof(wchar_t)); if (s == (size_t)-1 || s >=sizeof(rtpypath)) /* XXX deal with errors more gracefully */ Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (révision 79902) +++ Modules/zipimport.c (copie de travail) @@ -60,26 +60,33 @@ static int zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path, *p, *prefix, buf[MAXPATHLEN+2]; size_t len; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) + if (!PyArg_ParseTuple(args, "O&:zipimporter", PyUnicode_FSConverter, &opath)) return -1; - len = strlen(path); + assert(PyBytes_Check(opath)); + + path = PyBytes_AS_STRING(opath); + len = PyBytes_GET_SIZE(opath); if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); + Py_DECREF(opath); return -1; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); + Py_DECREF(opath); return -1; } strcpy(buf, path); + Py_DECREF(opath); #ifdef ALTSEP for (p = buf; *p; p++) { Index: Modules/python.c =================================================================== --- Modules/python.c (révision 79902) +++ Modules/python.c (copie de travail) @@ -14,8 +14,8 @@ return Py_Main(argc, argv); } #else -static wchar_t* -char2wchar(char* arg) +wchar_t* +_Py_char2wchar(char* arg) { wchar_t *res; #ifdef HAVE_BROKEN_MBSTOWCS @@ -143,7 +143,7 @@ oldloc = strdup(setlocale(LC_ALL, NULL)); setlocale(LC_ALL, ""); for (i = 0; i < argc; i++) { - argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]); + argv_copy2[i] = argv_copy[i] = _Py_char2wchar(argv[i]); if (!argv_copy[i]) return 1; } Index: Modules/posixmodule.c =================================================================== --- Modules/posixmodule.c (révision 79902) +++ Modules/posixmodule.c (copie de travail) @@ -2689,15 +2689,18 @@ if (!PyArg_ParseTuple(args, "u:system", &command)) return NULL; #else + PyObject *command_obj; char *command; - if (!PyArg_ParseTuple(args, "s:system", &command)) + if (!PyArg_ParseTuple(args, "O&:system", PyUnicode_FSConverter, &command_obj)) return NULL; #endif Py_BEGIN_ALLOW_THREADS #ifdef MS_WINDOWS sts = _wsystem(command); #else + command = bytes2str(command_obj, 1); sts = system(command); + release_bytes(command_obj); #endif Py_END_ALLOW_THREADS return PyLong_FromLong(sts); Index: Modules/_posixsubprocess.c =================================================================== --- Modules/_posixsubprocess.c (révision 79902) +++ Modules/_posixsubprocess.c (copie de travail) @@ -177,6 +177,7 @@ int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite; int errpipe_read, errpipe_write, close_fds, restore_signals; int call_setsid; + PyObject *cwd_obj, *cwd_obj2 = NULL; const char *cwd; pid_t pid; int need_to_reenable_gc = 0; @@ -184,13 +185,14 @@ Py_ssize_t arg_num; if (!PyArg_ParseTuple( - args, "OOOzOiiiiiiiiiiO:fork_exec", - &process_args, &executable_list, &py_close_fds, &cwd, &env_list, + args, "OOOOOiiiiiiiiiiO:fork_exec", + &process_args, &executable_list, &py_close_fds, + &cwd_obj, &env_list, &p2cread, &p2cwrite, &c2pread, &c2pwrite, &errread, &errwrite, &errpipe_read, &errpipe_write, &restore_signals, &call_setsid, &preexec_fn)) return NULL; - + close_fds = PyObject_IsTrue(py_close_fds); if (close_fds && errpipe_write < 3) { /* precondition */ PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3"); @@ -266,6 +268,16 @@ _PyImport_AcquireLock(); } + if (cwd_obj != Py_None) + { + if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0) + goto cleanup; + assert(PyBytes_Check(cwd_obj2)); + cwd = PyBytes_AsString(cwd_obj2); + } else { + cwd = NULL; + } + pid = fork(); if (pid == 0) { /* Child process */ @@ -291,6 +303,9 @@ _exit(255); return NULL; /* Dead code to avoid a potential compiler warning. */ } + if (cwd_obj2 != NULL) + Py_DECREF(cwd_obj2); + if (pid == -1) { /* Capture the errno exception before errno can be clobbered. */ PyErr_SetFromErrno(PyExc_OSError);