Index: Python/errors.c =================================================================== --- Python/errors.c (révision 80231) +++ Python/errors.c (copie de travail) @@ -446,7 +446,7 @@ PyObject * PyErr_SetFromErrnoWithFilename(PyObject *exc, const char *filename) { - PyObject *name = filename ? PyUnicode_FromString(filename) : NULL; + PyObject *name = filename ? PyUnicode_DecodeFSDefault(filename) : NULL; PyObject *result = PyErr_SetFromErrnoWithFilenameObject(exc, name); Py_XDECREF(name); return result; @@ -798,7 +798,7 @@ Py_DECREF(tmp); } if (filename != NULL) { - tmp = PyUnicode_FromString(filename); + tmp = PyUnicode_DecodeFSDefault(filename); if (tmp == NULL) PyErr_Clear(); else { Index: Python/ceval.c =================================================================== --- Python/ceval.c (révision 80231) +++ Python/ceval.c (copie de travail) @@ -1175,7 +1175,9 @@ lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL; #endif #if defined(Py_DEBUG) || defined(LLTRACE) - filename = _PyUnicode_AsString(co->co_filename); + /*filename = _PyUnicode_AsString(co->co_filename); + if (filename == NULL) + PyErr_Clear();*/ #endif why = WHY_NOT; Index: Python/traceback.c =================================================================== --- Python/traceback.c (révision 80231) +++ Python/traceback.c (copie de travail) @@ -138,7 +138,7 @@ { int i; int fd = -1; - PyObject *v; + PyObject *v, *v2; Py_ssize_t _npath; int npath; size_t taillen; @@ -169,10 +169,17 @@ } if (!PyUnicode_Check(v)) continue; - path = _PyUnicode_AsStringAndSize(v, &len); + + v2 = PyUnicode_AsEncodedString(v, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (!v2) + continue; + path = PyBytes_AS_STRING(v2); + len = PyBytes_GET_SIZE(v2); if (len + 1 + (Py_ssize_t)taillen >= (Py_ssize_t)namelen - 1) continue; /* Too long */ strcpy(namebuf, path); + Py_DECREF(v2); if (strlen(namebuf) != len) continue; /* v contains '\0' */ if (len > 0 && namebuf[len-1] != SEP) @@ -291,13 +298,20 @@ { int err = 0; char linebuf[2000]; + PyObject *line; if (filename == NULL || name == NULL) return -1; /* This is needed by Emacs' compile command */ -#define FMT " File \"%.500s\", line %d, in %.500s\n" +#define FMT " File \"%s\", line %d, in %.500s\n" PyOS_snprintf(linebuf, sizeof(linebuf), FMT, filename, lineno, name); - err = PyFile_WriteString(linebuf, f); + line = PyUnicode_DecodeASCII(linebuf, strlen(linebuf), "replace"); + if (line != NULL) { + err = PyFile_WriteObject(line, f, Py_PRINT_RAW); + Py_DECREF(line); + } else { + err = 1; + } if (err != 0) return err; return _Py_DisplaySourceLine(f, filename, lineno, 4); @@ -306,6 +320,7 @@ static int tb_printinternal(PyTracebackObject *tb, PyObject *f, long limit) { + PyObject *filename; int err = 0; long depth = 0; PyTracebackObject *tb1 = tb; @@ -315,11 +330,17 @@ } while (tb != NULL && err == 0) { if (depth <= limit) { - err = tb_displayline(f, - _PyUnicode_AsString( - tb->tb_frame->f_code->co_filename), - tb->tb_lineno, - _PyUnicode_AsString(tb->tb_frame->f_code->co_name)); + filename = PyUnicode_AsEncodedString(tb->tb_frame->f_code->co_filename, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (filename != NULL) { + err = tb_displayline(f, + PyBytes_AS_STRING(filename), + tb->tb_lineno, + _PyUnicode_AsString(tb->tb_frame->f_code->co_name)); + Py_DECREF(filename); + } else { + err = 1; + } } depth--; tb = tb->tb_next; Index: Python/ast.c =================================================================== --- Python/ast.c (révision 80231) +++ Python/ast.c (copie de travail) @@ -101,7 +101,7 @@ static void ast_error_finish(const char *filename) { - PyObject *type, *value, *tback, *errstr, *loc, *tmp; + PyObject *type, *value, *tback, *errstr, *loc, *tmp, *filename_obj; long lineno; assert(PyErr_Occurred()); @@ -125,8 +125,14 @@ Py_INCREF(Py_None); loc = Py_None; } - tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) { + Py_DECREF(errstr); + return; + } + tmp = Py_BuildValue("(OlOO)", filename_obj, lineno, Py_None, loc); Py_DECREF(loc); + Py_DECREF(filename_obj); if (!tmp) { Py_DECREF(errstr); return; Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (révision 80231) +++ Python/pythonrun.c (copie de travail) @@ -1345,7 +1345,11 @@ if (PyLong_Check(value)) exitcode = (int)PyLong_AsLong(value); else { - PyObject_Print(value, stderr, Py_PRINT_RAW); + PyObject *sys_stderr = PySys_GetObject("stderr"); + if (sys_stderr == Py_None || sys_stderr == NULL) + PyObject_Print(value, stderr, Py_PRINT_RAW); + else + PyFile_WriteObject(value, sys_stderr, Py_PRINT_RAW); PySys_WriteStderr("\n"); exitcode = 1; } @@ -1895,8 +1899,8 @@ static void err_input(perrdetail *err) { - PyObject *v, *w, *errtype, *errtext; - PyObject* u = NULL; + PyObject *v, *w, *errtype, *errtext, *filename; + PyObject *msg_obj = NULL; char *msg = NULL; errtype = PyExc_SyntaxError; switch (err->error) { @@ -1952,14 +1956,9 @@ case E_DECODE: { PyObject *type, *value, *tb; PyErr_Fetch(&type, &value, &tb); - if (value != NULL) { - u = PyObject_Str(value); - if (u != NULL) { - msg = _PyUnicode_AsString(u); - } - } - if (msg == NULL) - msg = "unknown decode error"; + msg = "unknown decode error"; + if (value != NULL) + msg_obj = PyObject_Str(value); Py_XDECREF(type); Py_XDECREF(value); Py_XDECREF(tb); @@ -1986,16 +1985,24 @@ errtext = PyUnicode_DecodeUTF8(err->text, strlen(err->text), "replace"); } - v = Py_BuildValue("(ziiN)", err->filename, + filename = PyUnicode_DecodeFSDefault(err->filename); + if (filename == NULL) + goto cleanup; + v = Py_BuildValue("(OiiN)", filename, err->lineno, err->offset, errtext); - w = NULL; - if (v != NULL) - w = Py_BuildValue("(sO)", msg, v); - Py_XDECREF(u); + Py_DECREF(filename); + if (v != NULL) { + if (msg_obj) + w = Py_BuildValue("(OO)", msg_obj, v); + else + w = Py_BuildValue("(sO)", msg, v); + } else + w = NULL; Py_XDECREF(v); PyErr_SetObject(errtype, w); Py_XDECREF(w); cleanup: + Py_XDECREF(msg_obj); if (err->text != NULL) { PyObject_FREE(err->text); err->text = NULL; Index: Python/import.c =================================================================== --- Python/import.c (révision 80231) +++ Python/import.c (copie de travail) @@ -568,6 +568,7 @@ _PyImport_FixupExtension(PyObject *mod, char *name, char *filename) { PyObject *modules, *dict; + PyObject *filename_obj; struct PyModuleDef *def; if (extensions == NULL) { extensions = PyDict_New(); @@ -605,7 +606,11 @@ if (def->m_base.m_copy == NULL) return -1; } - PyDict_SetItemString(extensions, filename, (PyObject*)def); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) + return -1; + PyDict_SetItem(extensions, filename_obj, (PyObject*)def); + Py_DECREF(filename_obj); return 0; } @@ -613,10 +618,15 @@ _PyImport_FindExtension(char *name, char *filename) { PyObject *mod, *mdict; + PyObject *filename_obj; PyModuleDef* def; if (extensions == NULL) return NULL; - def = (PyModuleDef*)PyDict_GetItemString(extensions, filename); + filename_obj = PyUnicode_DecodeFSDefault(filename); + if (filename_obj == NULL) + return NULL; + def = (PyModuleDef*)PyDict_GetItem(extensions, filename_obj); + Py_DECREF(filename_obj); if (def == NULL) return NULL; if (def->m_size == -1) { @@ -1323,7 +1333,7 @@ len = strlen(file); /* match '*.py?' */ if (len > MAXPATHLEN || PyOS_strnicmp(&file[len-4], ".py", 3) != 0) { - return PyUnicode_DecodeFSDefault(file); + return PyUnicode_DecodeFSDefault(file); } /* Start by trying to turn PEP 3147 path into source path. If that @@ -1634,7 +1644,7 @@ return NULL; if (PyUnicode_Check(v)) { v = PyUnicode_AsEncodedString(v, - Py_FileSystemDefaultEncoding, NULL); + Py_FileSystemDefaultEncoding, "surrogateescape"); if (v == NULL) return NULL; } @@ -1706,8 +1716,8 @@ else { char warnstr[MAXPATHLEN+80]; sprintf(warnstr, "Not importing directory " - "'%.*s': missing __init__.py", - MAXPATHLEN, buf); + "'%.1000s': missing __init__.py", + buf); if (PyErr_WarnEx(PyExc_ImportWarning, warnstr, 1)) { return NULL; @@ -3321,23 +3331,30 @@ { char *name; char *pathname; + PyObject *pathname_obj; PyObject *fob = NULL; PyObject *m; FILE *fp = NULL; - if (!PyArg_ParseTuple(args, "ses|O:load_dynamic", + if (!PyArg_ParseTuple(args, "sO&|O:load_dynamic", &name, - Py_FileSystemDefaultEncoding, &pathname, + PyUnicode_FSConverter, &pathname_obj, &fob)) return NULL; + + if (PyBytes_Check(pathname_obj)) + pathname = PyBytes_AS_STRING(pathname_obj); + else + pathname = PyByteArray_AsString(pathname_obj); + if (fob) { fp = get_file(pathname, fob, "r"); if (fp == NULL) { - PyMem_Free(pathname); + Py_DECREF(pathname_obj); return NULL; } } m = _PyImport_LoadDynamicModule(name, pathname, fp); - PyMem_Free(pathname); + Py_DECREF(pathname_obj); if (fp) fclose(fp); return m; @@ -3375,15 +3392,16 @@ char *name; PyObject *fob; char *pathname; + PyObject *pathname_obj = NULL; PyObject * ret; char *suffix; /* Unused */ char *mode; int type; FILE *fp; - if (!PyArg_ParseTuple(args, "sOes(ssi):load_module", + if (!PyArg_ParseTuple(args, "sOO&(ssi):load_module", &name, &fob, - Py_FileSystemDefaultEncoding, &pathname, + PyUnicode_FSConverter, &pathname_obj, &suffix, &mode, &type)) return NULL; if (*mode) { @@ -3394,7 +3412,7 @@ if (!(*mode == 'r' || *mode == 'U') || strchr(mode, '+')) { PyErr_Format(PyExc_ValueError, "invalid file open mode %.200s", mode); - PyMem_Free(pathname); + Py_DECREF(pathname_obj); return NULL; } } @@ -3403,12 +3421,16 @@ else { fp = get_file(NULL, fob, mode); if (fp == NULL) { - PyMem_Free(pathname); + Py_DECREF(pathname_obj); return NULL; } } + if (PyBytes_Check(pathname_obj)) + pathname = PyBytes_AS_STRING(pathname_obj); + else + pathname = PyByteArray_AsString(pathname_obj); ret = load_module(name, fp, pathname, type, NULL); - PyMem_Free(pathname); + Py_DECREF(pathname_obj); if (fp) fclose(fp); return ret; @@ -3454,27 +3476,34 @@ static char *kwlist[] = {"path", "debug_override", NULL}; char buf[MAXPATHLEN+1]; + PyObject *pathname_obj; char *pathname, *cpathname; PyObject *debug_override = Py_None; int debug = !Py_OptimizeFlag; if (!PyArg_ParseTupleAndKeywords( - args, kws, "es|O", kwlist, - Py_FileSystemDefaultEncoding, &pathname, &debug_override)) + args, kws, "O&|O", kwlist, + PyUnicode_FSConverter, &pathname_obj, &debug_override)) return NULL; if (debug_override != Py_None) - if ((debug = PyObject_IsTrue(debug_override)) < 0) + if ((debug = PyObject_IsTrue(debug_override)) < 0) { + Py_DECREF(pathname_obj); return NULL; + } + if (PyBytes_Check(pathname_obj)) + pathname = PyBytes_AS_STRING(pathname_obj); + else + pathname = PyByteArray_AsString(pathname_obj); cpathname = make_compiled_pathname(pathname, buf, MAXPATHLEN+1, debug); - PyMem_Free(pathname); + Py_DECREF(pathname_obj); if (cpathname == NULL) { PyErr_Format(PyExc_SystemError, "path buffer too short"); return NULL; } - return PyUnicode_FromString(buf); + return PyUnicode_DecodeFSDefault(buf); } PyDoc_STRVAR(doc_cache_from_source, @@ -3620,19 +3649,26 @@ static int NullImporter_init(NullImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path; Py_ssize_t pathlen; if (!_PyArg_NoKeywords("NullImporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "es:NullImporter", - Py_FileSystemDefaultEncoding, &path)) + if (!PyArg_ParseTuple(args, "O&:NullImporter", + PyUnicode_FSConverter, &opath)) return -1; - pathlen = strlen(path); + if (PyBytes_Check(opath)) { + path = PyBytes_AS_STRING(opath); + pathlen = PyBytes_GET_SIZE(opath); + } else { + path = PyByteArray_AsString(opath); + pathlen = PyByteArray_GET_SIZE(opath); + } if (pathlen == 0) { - PyMem_Free(path); + Py_DECREF(opath); PyErr_SetString(PyExc_ImportError, "empty pathname"); return -1; } else { @@ -3641,7 +3677,7 @@ int rv; rv = stat(path, &statbuf); - PyMem_Free(path); + Py_DECREF(opath); if (rv == 0) { /* it exists */ if (S_ISDIR(statbuf.st_mode)) { @@ -3658,7 +3694,7 @@ * "e:\\shared\\" and "\\\\whiterab-c2znlh\\shared" as dirs. */ rv = GetFileAttributesA(path); - PyMem_Free(path); + Py_DECREF(opath); if (rv != INVALID_FILE_ATTRIBUTES) { /* it exists */ if (rv & FILE_ATTRIBUTE_DIRECTORY) { Index: Python/compile.c =================================================================== --- Python/compile.c (révision 80231) +++ Python/compile.c (copie de travail) @@ -3363,14 +3363,24 @@ { PyObject *loc; PyObject *u = NULL, *v = NULL; + PyObject *filename = NULL; loc = PyErr_ProgramText(c->c_filename, c->u->u_lineno); if (!loc) { Py_INCREF(Py_None); loc = Py_None; } - u = Py_BuildValue("(ziOO)", c->c_filename, c->u->u_lineno, + if (c->c_filename) { + filename = PyUnicode_DecodeFSDefault(c->c_filename); + if (filename == NULL) + goto exit; + } else { + Py_INCREF(Py_None); + filename = Py_None; + } + u = Py_BuildValue("(OiOO)", filename, c->u->u_lineno, Py_None, loc); + Py_DECREF(filename); if (!u) goto exit; v = Py_BuildValue("(zO)", errstr, u); Index: Python/bltinmodule.c =================================================================== --- Python/bltinmodule.c (révision 80231) +++ Python/bltinmodule.c (copie de travail) @@ -536,6 +536,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds) { char *str; + PyObject *filename_obj = NULL; char *filename; char *startstr; int mode = -1; @@ -547,11 +548,17 @@ static char *kwlist[] = {"source", "filename", "mode", "flags", "dont_inherit", NULL}; int start[] = {Py_file_input, Py_eval_input, Py_single_input}; + PyObject *result; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oss|ii:compile", - kwlist, &cmd, &filename, &startstr, + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&s|ii:compile", + kwlist, &cmd, PyUnicode_FSConverter, &filename_obj, &startstr, &supplied_flags, &dont_inherit)) return NULL; + + if (PyBytes_Check(filename_obj)) + filename = PyBytes_AsString(filename_obj); + else + filename = PyByteArray_AsString(filename_obj); cf.cf_flags = supplied_flags | PyCF_SOURCE_IS_UTF8; @@ -560,7 +567,7 @@ { PyErr_SetString(PyExc_ValueError, "compile(): unrecognised flags"); - return NULL; + goto error; } /* XXX Warn if (supplied_flags & PyCF_MASK_OBSOLETE) != 0? */ @@ -577,14 +584,13 @@ else { PyErr_SetString(PyExc_ValueError, "compile() arg 3 must be 'exec', 'eval' or 'single'"); - return NULL; + goto error; } is_ast = PyAST_Check(cmd); if (is_ast == -1) - return NULL; + goto error; if (is_ast) { - PyObject *result; if (supplied_flags & PyCF_ONLY_AST) { Py_INCREF(cmd); result = cmd; @@ -597,20 +603,27 @@ mod = PyAST_obj2mod(cmd, arena, mode); if (mod == NULL) { PyArena_Free(arena); - return NULL; + goto error; } result = (PyObject*)PyAST_Compile(mod, filename, &cf, arena); PyArena_Free(arena); } - return result; + goto finally; } str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf); if (str == NULL) - return NULL; + goto error; - return Py_CompileStringFlags(str, filename, start[mode], &cf); + result = Py_CompileStringFlags(str, filename, start[mode], &cf); + goto finally; + +error: + result = NULL; +finally: + Py_DECREF(filename_obj); + return result; } PyDoc_STRVAR(compile_doc, Index: Python/_warnings.c =================================================================== --- Python/_warnings.c (révision 80231) +++ Python/_warnings.c (copie de travail) @@ -246,6 +246,7 @@ PyObject *f_stderr; PyObject *name; char lineno_str[128]; + PyObject *filename_obj; PyOS_snprintf(lineno_str, sizeof(lineno_str), ":%d: ", lineno); @@ -281,10 +282,18 @@ PyFile_WriteString(source_line_str, f_stderr); PyFile_WriteString("\n", f_stderr); } - else - if (_Py_DisplaySourceLine(f_stderr, _PyUnicode_AsString(filename), - lineno, 2) < 0) - return; + else { + int err; + filename_obj = PyUnicode_AsEncodedString(filename, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (filename_obj == NULL) + return; + err = _Py_DisplaySourceLine(f_stderr, PyBytes_AsString(filename_obj), + lineno, 2); + Py_DECREF(filename_obj); + if (err < 0) + return; + } PyErr_Clear(); } @@ -498,6 +507,9 @@ /* Setup filename. */ *filename = PyDict_GetItemString(globals, "__file__"); if (*filename != NULL) { + + /* FIXME: Don't use _PyUnicode_AsString */ +#if 0 Py_ssize_t len = PyUnicode_GetSize(*filename); const char *file_str = _PyUnicode_AsString(*filename); if (file_str == NULL || (len < 0 && PyErr_Occurred())) @@ -517,6 +529,10 @@ } else Py_INCREF(*filename); +#else + Py_INCREF(*filename); +#endif + } else { const char *module_str = _PyUnicode_AsString(*module); @@ -717,7 +733,7 @@ PyErr_WarnEx(PyObject *category, const char *text, Py_ssize_t stack_level) { PyObject *res; - PyObject *message = PyUnicode_FromString(text); + PyObject *message = PyUnicode_DecodeUTF8(text, strlen(text), "replace"); if (message == NULL) return -1; Index: Include/moduleobject.h =================================================================== --- Include/moduleobject.h (révision 80231) +++ Include/moduleobject.h (copie de travail) @@ -15,6 +15,7 @@ PyAPI_FUNC(PyObject *) PyModule_New(const char *); PyAPI_FUNC(PyObject *) PyModule_GetDict(PyObject *); PyAPI_FUNC(const char *) PyModule_GetName(PyObject *); +PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *); PyAPI_FUNC(const char *) PyModule_GetFilename(PyObject *); PyAPI_FUNC(void) _PyModule_Clear(PyObject *); PyAPI_FUNC(struct PyModuleDef*) PyModule_GetDef(PyObject*); Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (révision 80231) +++ Objects/codeobject.c (copie de travail) @@ -345,8 +345,13 @@ if (co->co_firstlineno != 0) lineno = co->co_firstlineno; - if (co->co_filename && PyUnicode_Check(co->co_filename)) + if (co->co_filename && PyUnicode_Check(co->co_filename)) { filename = _PyUnicode_AsString(co->co_filename); + if (filename == NULL) { + PyErr_Clear(); + filename = "???"; + } + } return PyUnicode_FromFormat( "", co->co_name, co, filename, lineno); Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 80231) +++ Objects/unicodeobject.c (copie de travail) @@ -1445,6 +1445,14 @@ if (encoding == NULL) encoding = PyUnicode_GetDefaultEncoding(); + if (errors != NULL + && strcmp(encoding, "utf-8") == 0 + && strcmp(errors, "surrogateescape") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) @@ -1494,8 +1502,14 @@ strcmp(encoding, Py_FileSystemDefaultEncoding) == 0 && !PyThreadState_GET()->interp->codecs_initialized) return PyUnicode_AsASCIIString(unicode); + } else if (strcmp(errors, "surrogateescape") == 0) { + if (strcmp(encoding, "utf-8") == 0) + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); } + /* Encode via the codec registry */ v = PyCodec_Encode(unicode, encoding, errors); if (v == NULL) @@ -1594,19 +1608,19 @@ if (Py_FileSystemDefaultEncoding) { #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { - return PyUnicode_DecodeMBCS(s, size, "replace"); + return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); } #elif defined(__APPLE__) if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { - return PyUnicode_DecodeUTF8(s, size, "replace"); + return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } #endif return PyUnicode_Decode(s, size, Py_FileSystemDefaultEncoding, - "replace"); + "surrogateescape"); } else { - return PyUnicode_DecodeUTF8(s, size, "replace"); + return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } } Index: Objects/fileobject.c =================================================================== --- Objects/fileobject.c (révision 80231) +++ Objects/fileobject.c (copie de travail) @@ -41,10 +41,10 @@ if (stream == NULL) return NULL; if (name != NULL) { - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) + nameobj = PyUnicode_DecodeFSDefault(name); + if (nameobj == NULL) { PyErr_Clear(); - else { + } else { if (PyObject_SetAttrString(stream, "name", nameobj) < 0) PyErr_Clear(); Py_DECREF(nameobj); Index: Objects/moduleobject.c =================================================================== --- Objects/moduleobject.c (révision 80231) +++ Objects/moduleobject.c (copie de travail) @@ -191,8 +191,8 @@ return _PyUnicode_AsString(nameobj); } -const char * -PyModule_GetFilename(PyObject *m) +PyObject* +PyModule_GetFilenameObject(PyObject *m) { PyObject *d; PyObject *fileobj; @@ -208,6 +208,16 @@ PyErr_SetString(PyExc_SystemError, "module filename missing"); return NULL; } + return fileobj; +} + +const char * +PyModule_GetFilename(PyObject *m) +{ + PyObject *fileobj; + fileobj = PyModule_GetFilenameObject(m); + if (fileobj == NULL) + return NULL; return _PyUnicode_AsString(fileobj); } @@ -327,19 +337,19 @@ module_repr(PyModuleObject *m) { const char *name; - const char *filename; + PyObject *fileobj; name = PyModule_GetName((PyObject *)m); if (name == NULL) { PyErr_Clear(); name = "?"; } - filename = PyModule_GetFilename((PyObject *)m); - if (filename == NULL) { + fileobj = PyModule_GetFilenameObject((PyObject *)m); + if (fileobj == NULL) { PyErr_Clear(); return PyUnicode_FromFormat("", name); } - return PyUnicode_FromFormat("", name, filename); + return PyUnicode_FromFormat("", name, fileobj); } static int Index: Objects/exceptions.c =================================================================== --- Objects/exceptions.c (révision 80231) +++ Objects/exceptions.c (copie de travail) @@ -959,20 +959,26 @@ /* This is called "my_basename" instead of just "basename" to avoid name conflicts with glibc; basename is already prototyped if _GNU_SOURCE is defined, and Python does define that. */ -static char * -my_basename(char *name) +static PyObject* +my_basename(PyObject *name) { - char *cp = name; - char *result = name; + Py_UNICODE *u, *result; - if (name == NULL) - return "???"; - while (*cp != '\0') { - if (*cp == SEP) - result = cp + 1; - ++cp; + u = PyUnicode_AS_UNICODE(name); + result = u; + while (*u != '\0') { + if (*u == SEP) + result = u + 1; + ++u; } - return result; + u = PyUnicode_AS_UNICODE(name); + if (result != u) { + return PyUnicode_FromUnicode(result, + PyUnicode_GET_SIZE(name) - (result - u)); + } else { + Py_INCREF(name); + return name; + } } @@ -980,7 +986,8 @@ SyntaxError_str(PySyntaxErrorObject *self) { int have_lineno = 0; - char *filename = 0; + PyObject *filename = NULL; + PyObject *result; /* Below, we always ignore overflow errors, just printing -1. Still, we cannot allow an OverflowError to be raised, so we need to call PyLong_AsLongAndOverflow. */ @@ -990,7 +997,11 @@ lineno here */ if (self->filename && PyUnicode_Check(self->filename)) { - filename = _PyUnicode_AsString(self->filename); + filename = my_basename(self->filename); + if (filename == NULL) + return NULL; + } else { + filename = NULL; } have_lineno = (self->lineno != NULL) && PyLong_CheckExact(self->lineno); @@ -998,18 +1009,20 @@ return PyObject_Str(self->msg ? self->msg : Py_None); if (filename && have_lineno) - return PyUnicode_FromFormat("%S (%s, line %ld)", + result = PyUnicode_FromFormat("%S (%U, line %ld)", self->msg ? self->msg : Py_None, - my_basename(filename), + filename, PyLong_AsLongAndOverflow(self->lineno, &overflow)); else if (filename) - return PyUnicode_FromFormat("%S (%s)", + result = PyUnicode_FromFormat("%S (%U)", self->msg ? self->msg : Py_None, - my_basename(filename)); + filename); else /* only have_lineno */ - return PyUnicode_FromFormat("%S (line %ld)", + result = PyUnicode_FromFormat("%S (line %ld)", self->msg ? self->msg : Py_None, PyLong_AsLongAndOverflow(self->lineno, &overflow)); + Py_XDECREF(filename); + return result; } static PyMemberDef SyntaxError_members[] = { Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (révision 80231) +++ Parser/tokenizer.c (copie de travail) @@ -467,10 +467,16 @@ if (io == NULL) goto cleanup; - if (tok->filename) - stream = PyObject_CallMethod(io, "open", "ssis", - tok->filename, "r", -1, enc); - else + if (tok->filename) { + PyObject *filename = PyUnicode_DecodeFSDefault(tok->filename); + if (filename != NULL) { + stream = PyObject_CallMethod(io, "open", "Osis", + filename, "r", -1, enc); + Py_DECREF(filename); + } else { + stream = NULL; + } + } else stream = PyObject_CallMethod(io, "open", "isisOOO", fileno(tok->fp), "r", -1, enc, Py_None, Py_None, Py_False); if (stream == NULL) @@ -580,16 +586,14 @@ } } if (badchar) { - char buf[500]; /* Need to add 1 to the line number, since this line has not been counted, yet. */ - sprintf(buf, + PyErr_Format(PyExc_SyntaxError, "Non-UTF-8 code starting with '\\x%.2x' " "in file %.200s on line %i, " "but no encoding declared; " "see http://python.org/dev/peps/pep-0263/ for details", badchar, tok->filename, tok->lineno + 1); - PyErr_SetString(PyExc_SyntaxError, buf); return error_ret(tok); } #endif Index: Doc/library/tarfile.rst =================================================================== --- Doc/library/tarfile.rst (révision 80231) +++ Doc/library/tarfile.rst (copie de travail) @@ -218,7 +218,7 @@ .. versionadded:: 3.2 Added support for the context manager protocol. -.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors=None, pax_headers=None, debug=0, errorlevel=0) +.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=0) All following arguments are optional and can be accessed as instance attributes as well. @@ -264,8 +264,7 @@ The *encoding* and *errors* arguments define the character encoding to be used for reading or writing the archive and how conversion errors are going - to be handled. The default settings will work for most users. - See section :ref:`tar-unicode` for in-depth information. + to be handled. See section :ref:`tar-unicode` for in-depth information. The *pax_headers* argument is an optional dictionary of strings which will be added as a pax global header if *format* is :const:`PAX_FORMAT`. @@ -449,7 +448,7 @@ a :class:`TarInfo` object. -.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='strict') +.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='surrogateescape') Create a string buffer from a :class:`TarInfo` object. For information on the arguments see the constructor of the :class:`TarFile` class. @@ -701,11 +700,9 @@ appropriately, this conversion may fail. The *errors* argument defines how characters are treated that cannot be -converted. Possible values are listed in section :ref:`codec-base-classes`. In -read mode the default scheme is ``'replace'``. This avoids unexpected -:exc:`UnicodeError` exceptions and guarantees that an archive can always be -read. In write mode the default value for *errors* is ``'strict'``. This -ensures that name information is not altered unnoticed. +converted. Possible values are listed in section :ref:`codec-base-classes`. The +default scheme is ``'surrogateescape'``: undecodable bytes are stored as +unicode surrogates, use the same scheme to encode the value to bytes. In case of writing :const:`PAX_FORMAT` archives, *encoding* is ignored because non-ASCII metadata is stored using *UTF-8*. Index: Lib/unittest/runner.py =================================================================== --- Lib/unittest/runner.py (révision 80231) +++ Lib/unittest/runner.py (copie de travail) @@ -113,7 +113,9 @@ self.stream.writeln(self.separator1) self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) self.stream.writeln(self.separator2) - self.stream.writeln("%s" % err) + err_str = str(err) + err_str = err.encode("ascii", "backslashreplace").decode("ascii") + self.stream.writeln(err_str) class TextTestRunner(object): Index: Lib/traceback.py =================================================================== --- Lib/traceback.py (révision 80231) +++ Lib/traceback.py (copie de travail) @@ -11,7 +11,6 @@ def _print(file, str='', terminator='\n'): file.write(str+terminator) - def print_list(extracted_list, file=None): """Print the list of tuples as returned by extract_tb() or extract_stack() as a formatted stack trace to the given file.""" @@ -19,7 +18,7 @@ file = sys.stderr for filename, lineno, name, line in extracted_list: _print(file, - ' File "%s", line %d, in %s' % (filename,lineno,name)) + ' File "%s", line %d, in %s' % (repr(filename),lineno,name)) if line: _print(file, ' %s' % line.strip()) @@ -35,7 +34,7 @@ """ list = [] for filename, lineno, name, line in extracted_list: - item = ' File "%s", line %d, in %s\n' % (filename,lineno,name) + item = ' File "%s", line %d, in %s\n' % (repr(filename),lineno,name) if line: item = item + ' %s\n' % line.strip() list.append(item) @@ -61,9 +60,10 @@ lineno = tb.tb_lineno co = f.f_code filename = co.co_filename + filename = filename.encode("ascii", "backslashreplace").decode("ascii") name = co.co_name _print(file, - ' File "%s", line %d, in %s' % (filename, lineno, name)) + ' File "%s", line %d, in %s' % (repr(filename), lineno, name)) linecache.checkcache(filename) line = linecache.getline(filename, lineno, f.f_globals) if line: _print(file, ' ' + line.strip()) @@ -220,7 +220,7 @@ lines = [] filename = value.filename or "" lineno = str(value.lineno) or '?' - lines.append(' File "%s", line %s\n' % (filename, lineno)) + lines.append(' File "%s", line %s\n' % (repr(filename), lineno)) badline = value.text offset = value.offset if badline is not None: Index: Lib/platform.py =================================================================== --- Lib/platform.py (révision 80231) +++ Lib/platform.py (copie de travail) @@ -111,7 +111,7 @@ __version__ = '1.0.7' -import sys, os, re +import sys, os, re, io, subprocess ### Globals & Constants @@ -944,11 +944,15 @@ return default target = _follow_symlinks(target).replace('"', '\\"') try: - f = os.popen('file "%s" 2> %s' % (target, DEV_NULL)) + proc = subprocess.Popen( + ['file', target], + stdout=subprocess.PIPE, + stderr=open(DEV_NULL, 'rb')) except (AttributeError,os.error): return default - output = f.read().strip() - rc = f.close() + output = io.TextIOWrapper(proc.stdout, errors="surrogateescape").read() + output = output.strip() + rc = proc.wait() if not output or rc: return default else: Index: Lib/tarfile.py =================================================================== --- Lib/tarfile.py (révision 80231) +++ Lib/tarfile.py (copie de travail) @@ -985,7 +985,7 @@ return info - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"): + def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): """Return a tar header as a string of 512 byte blocks. """ info = self.get_info() @@ -1497,7 +1497,7 @@ def __init__(self, name=None, mode="r", fileobj=None, format=None, tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors=None, pax_headers=None, debug=None, errorlevel=None): + errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' @@ -1538,14 +1538,8 @@ self.ignore_zeros = ignore_zeros if encoding is not None: self.encoding = encoding + self.errors = errors - if errors is not None: - self.errors = errors - elif mode == "r": - self.errors = "replace" - else: - self.errors = "strict" - if pax_headers is not None and self.format == PAX_FORMAT: self.pax_headers = pax_headers else: @@ -1997,7 +1991,7 @@ tarinfo = copy.copy(tarinfo) - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) + buf = tarinfo.tobuf(self.format, self.encoding, "surrogateescape") self.fileobj.write(buf) self.offset += len(buf) Index: Lib/test/regrtest.py =================================================================== --- Lib/test/regrtest.py (révision 80231) +++ Lib/test/regrtest.py (copie de travail) @@ -161,7 +161,6 @@ import platform import sysconfig - # Some times __path__ and __file__ are not absolute (e.g. while running from # Lib/) and, if we change the CWD to run the tests in a temporary dir, some # imports might fail. This affects only the modules imported before os.chdir(). @@ -222,6 +221,8 @@ print("Use --help for usage", file=sys.stderr) sys.exit(2) +def toascii(text): + return text.encode("ascii", "backslashreplace").decode("ascii") def main(tests=None, testdir=None, verbose=0, quiet=False, exclude=False, single=False, randomize=False, fromfile=None, @@ -449,7 +450,7 @@ # Print basic platform information print("==", platform.python_implementation(), *sys.version.split()) print("== ", platform.platform(aliased=True)) - print("== ", os.getcwd()) + print("== ", toascii(os.getcwd())) alltests = findtests(testdir, stdtests, nottests) selected = tests or args or alltests Index: Lib/test/test_tarfile.py =================================================================== --- Lib/test/test_tarfile.py (révision 80231) +++ Lib/test/test_tarfile.py (copie de travail) @@ -1100,8 +1100,8 @@ if self.format != tarfile.PAX_FORMAT: tar = tarfile.open(tmpname, encoding="ascii") t = tar.getmember("foo") - self.assertEqual(t.uname, "\ufffd\ufffd\ufffd") - self.assertEqual(t.gname, "\ufffd\ufffd\ufffd") + self.assertEqual(t.uname, "\udce4\udcf6\udcfc") + self.assertEqual(t.gname, "\udce4\udcf6\udcfc") class GNUUnicodeTest(UstarUnicodeTest): Index: Lib/test/test_xml_etree.py =================================================================== --- Lib/test/test_xml_etree.py (révision 80231) +++ Lib/test/test_xml_etree.py (copie de travail) @@ -13,6 +13,7 @@ import sys import cgi +import unittest from test import support from test.support import findfile @@ -20,6 +21,11 @@ from xml.etree import ElementTree as ET SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") +try: + SIMPLE_XMLFILE.encode("utf8") +except UnicodeEncodeError: + # ignore all tests if the current working directory is undecodable + raise unittest.SkipTest("filename is not encodable to utf8") SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") SAMPLE_XML = """\ Index: Lib/test/test_reprlib.py =================================================================== --- Lib/test/test_reprlib.py (révision 80231) +++ Lib/test/test_reprlib.py (copie de travail) @@ -233,7 +233,9 @@ touch(os.path.join(self.subpkgname, self.pkgname + '.py')) from areallylongpackageandmodulenametotestreprtruncation.areallylongpackageandmodulenametotestreprtruncation import areallylongpackageandmodulenametotestreprtruncation eq(repr(areallylongpackageandmodulenametotestreprtruncation), - "" % (areallylongpackageandmodulenametotestreprtruncation.__name__, areallylongpackageandmodulenametotestreprtruncation.__file__)) + "" + % (areallylongpackageandmodulenametotestreprtruncation.__name__, + areallylongpackageandmodulenametotestreprtruncation.__file__)) eq(repr(sys), "") def test_type(self): Index: Lib/test/test_subprocess.py =================================================================== --- Lib/test/test_subprocess.py (révision 80231) +++ Lib/test/test_subprocess.py (copie de travail) @@ -689,7 +689,7 @@ # args is a string fd, fname = mkstemp() # reopen in text mode - with open(fd, "w") as fobj: + with open(fd, "w", encoding="utf8", errors="surrogateescape") as fobj: fobj.write("#!/bin/sh\n") fobj.write("exec '%s' -c 'import sys; sys.exit(47)'\n" % sys.executable) @@ -732,7 +732,7 @@ # call() function with string argument on UNIX fd, fname = mkstemp() # reopen in text mode - with open(fd, "w") as fobj: + with open(fd, "w", encoding="utf8", errors="surrogateescape") as fobj: fobj.write("#!/bin/sh\n") fobj.write("exec '%s' -c 'import sys; sys.exit(47)'\n" % sys.executable) Index: Lib/test/test_import.py =================================================================== --- Lib/test/test_import.py (révision 80231) +++ Lib/test/test_import.py (copie de travail) @@ -287,6 +287,11 @@ def test_import_by_filename(self): path = os.path.abspath(TESTFN) + try: + path.encode("utf8") + except UnicodeEncodeError: + # ignore the test if the current working directory is undecodable + raise unittest.SkipTest("filename is not encodable to utf8") with self.assertRaises(ImportError) as c: __import__(path) self.assertEqual("Import by filename is not supported.", Index: Lib/test/test_urllib.py =================================================================== --- Lib/test/test_urllib.py (révision 80231) +++ Lib/test/test_urllib.py (copie de travail) @@ -221,8 +221,12 @@ except: pass def constructLocalFileUrl(self, filePath): - return "file://%s" % urllib.request.pathname2url( - os.path.abspath(filePath)) + filePath = os.path.abspath(filePath) + try: + filePath.encode("utf8") + except UnicodeEncodeError: + raise unittest.SkipTest("filePath is not encodable to utf8") + return "file://%s" % urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, Index: Lib/test/test_urllib2.py =================================================================== --- Lib/test/test_urllib2.py (révision 80231) +++ Lib/test/test_urllib2.py (copie de travail) @@ -602,6 +602,10 @@ def sanepathname2url(path): + try: + path.encode("utf8") + except UnicodeEncodeError: + raise unittest.SkipTest("path is not encodable to utf8") urlpath = urllib.request.pathname2url(path) if os.name == "nt" and urlpath.startswith("///"): urlpath = urlpath[2:] Index: Lib/subprocess.py =================================================================== --- Lib/subprocess.py (révision 80231) +++ Lib/subprocess.py (copie de travail) @@ -1204,7 +1204,7 @@ errno = 0 message = '%s:%x:%s' % (exc_type.__name__, errno, exc_value) - os.write(errpipe_write, message.encode()) + os.write(errpipe_write, message.encode(errors="backslashreplace")) except: # We MUST not allow anything odd happening # above to prevent us from exiting below. Index: Modules/_ssl.c =================================================================== --- Modules/_ssl.c (révision 80231) +++ Modules/_ssl.c (copie de travail) @@ -942,13 +942,13 @@ PySSL_test_decode_certificate (PyObject *mod, PyObject *args) { PyObject *retval = NULL; - char *filename = NULL; + PyObject *filename = NULL; X509 *x=NULL; BIO *cert; int verbose = 1; - if (!PyArg_ParseTuple(args, "s|i:test_decode_certificate", - &filename, &verbose)) + if (!PyArg_ParseTuple(args, "O&|i:test_decode_certificate", + PyUnicode_FSConverter, &filename, &verbose)) return NULL; if ((cert=BIO_new(BIO_s_file())) == NULL) { @@ -957,7 +957,7 @@ goto fail0; } - if (BIO_read_filename(cert,filename) <= 0) { + if (BIO_read_filename(cert, PyBytes_AsString(filename)) <= 0) { PyErr_SetString(PySSLErrorObject, "Can't open file"); goto fail0; @@ -973,8 +973,8 @@ retval = _decode_certificate(x, verbose); fail0: - if (cert != NULL) BIO_free(cert); + Py_DECREF(filename); return retval; } Index: Modules/_tkinter.c =================================================================== --- Modules/_tkinter.c (révision 80231) +++ Modules/_tkinter.c (copie de travail) @@ -3147,9 +3147,9 @@ it also helps Tcl find its encodings. */ uexe = PyUnicode_FromWideChar(Py_GetProgramName(), -1); if (uexe) { - cexe = PyUnicode_AsEncodedString(uexe, - Py_FileSystemDefaultEncoding, - NULL); + cexe = PyUnicode_AsEncodedString(uexe, + Py_FileSystemDefaultEncoding, + "surrogateescape"); if (cexe) Tcl_FindExecutable(PyBytes_AsString(cexe)); Py_XDECREF(cexe); Index: Modules/_ctypes/callproc.c =================================================================== --- Modules/_ctypes/callproc.c (révision 80231) +++ Modules/_ctypes/callproc.c (copie de travail) @@ -92,7 +92,7 @@ ctypes maintains thread-local storage that has space for two error numbers: private copies of the system 'errno' value and, on Windows, the system error code accessed by the GetLastError() and SetLastError() api functions. - + Foreign functions created with CDLL(..., use_errno=True), when called, swap the system 'errno' value with the private copy just before the actual function call, and swapped again immediately afterwards. The 'use_errno' @@ -289,7 +289,7 @@ "exception: single step"); break; - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: + case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: /* The thread attempted to access an array element that is out of bounds, and the underlying hardware supports bounds checking. */ @@ -462,7 +462,7 @@ sprintf(buffer, "", self->tag, self->value.l); break; - + #ifdef HAVE_LONG_LONG case 'q': case 'Q': @@ -703,7 +703,7 @@ return result; } PyErr_Format(PyExc_TypeError, - "Don't know how to convert parameter %d", + "Don't know how to convert parameter %d", Py_SAFE_DOWNCAST(index, Py_ssize_t, int)); return -1; } @@ -773,7 +773,7 @@ "No ffi_type for result"); return -1; } - + cc = FFI_DEFAULT_ABI; #if defined(MS_WIN32) && !defined(MS_WIN64) && !defined(_WIN32_WCE) if ((flags & FUNCFLAG_CDECL) == 0) @@ -1147,7 +1147,7 @@ } for (i = 0; i < argcount; ++i) { atypes[i] = args[i].ffi_type; - if (atypes[i]->type == FFI_TYPE_STRUCT + if (atypes[i]->type == FFI_TYPE_STRUCT #ifdef _WIN64 && atypes[i]->size <= sizeof(void *) #endif Index: Modules/getpath.c =================================================================== --- Modules/getpath.c (révision 80231) +++ Modules/getpath.c (copie de travail) @@ -131,6 +131,53 @@ static wchar_t *module_search_path = NULL; static wchar_t *lib_python = L"lib/python" VERSION; +char* +_Py_wchar2char(const wchar_t* arg) +{ + char* res; + size_t i, argsize; + size_t len, allocated; + int converted; + const wchar_t *in; + char* out; + + argsize = wcslen(arg); + allocated = argsize * MB_CUR_MAX; + if (allocated / MB_CUR_MAX != argsize) + return NULL; + res = PyMem_Malloc(allocated + 1); + if (res == NULL) + return NULL; + + len = allocated; + in = arg; + out = res; + for (i=0; i < argsize; i++) { + if (len < MB_CUR_MAX) + goto error; + if (*in >= 0xd800 && *in <= 0xdfff) { + *out = *in - 0xd800; + out++; + len--; + ++in; + } else { + converted = wctomb(out, *in); + if (converted == (size_t)-1) { + goto error; + } + len -= converted; + out += converted; + ++in; + } + } + *out = '\0'; + return res; + +error: + PyMem_Free(res); + return NULL; +} + /* In principle, this should use HAVE__WSTAT, and _wstat should be detected by autoconf. However, no current POSIX system provides that function, so testing for @@ -139,44 +186,64 @@ static int _wstat(const wchar_t* path, struct stat *buf) { - char fname[PATH_MAX]; - size_t res = wcstombs(fname, path, sizeof(fname)); - if (res == (size_t)-1) { + char *fname; + int res; + fname = _Py_wchar2char(path); + if (fname == NULL) { errno = EINVAL; return -1; } - return stat(fname, buf); + res = stat(fname, buf); + PyMem_Free(fname); + return res; } #endif #ifndef MS_WINDOWS +wchar_t* _Py_char2wchar(char* arg); +#endif + +#ifndef MS_WINDOWS static wchar_t* _wgetcwd(wchar_t *buf, size_t size) { char fname[PATH_MAX]; + wchar_t* wfname; if (getcwd(fname, PATH_MAX) == NULL) return NULL; - if (mbstowcs(buf, fname, size) >= size) { + wfname = _Py_char2wchar(fname); + if (wfname == NULL) + goto error; + if (size < wcslen(wfname)) + goto error; + wcscpy(buf, wfname); + PyMem_Free(wfname); + return buf; + +error: + if (wfname != NULL) + PyMem_Free(wfname); errno = ERANGE; return NULL; - } - return buf; } #endif #ifdef HAVE_READLINK -int +int _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz) { char cbuf[PATH_MAX]; - char cpath[PATH_MAX]; + char *cpath; int res; - size_t r1 = wcstombs(cpath, path, PATH_MAX); - if (r1 == (size_t)-1 || r1 >= PATH_MAX) { + size_t r1; + wchar_t *wbuf; + cpath = _Py_wchar2char(path); + if (cpath == NULL) { errno = EINVAL; return -1; } res = (int)readlink(cpath, cbuf, PATH_MAX); + PyMem_Free(cpath); if (res == -1) return -1; if (res == PATH_MAX) { @@ -184,13 +251,15 @@ return -1; } cbuf[res] = '\0'; /* buf will be null terminated */ - r1 = mbstowcs(buf, cbuf, bufsiz); - if (r1 == -1) { + + wbuf = _Py_char2wchar(cbuf); + wcsncpy(buf, wbuf, bufsiz); + r1 = wcslen(buf); + if (r1 < wcslen(wbuf)) { errno = EINVAL; return -1; } return (int)r1; - } #endif @@ -461,6 +530,7 @@ #endif if (_path) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(wpath, _path, MAXPATHLEN+1); path = wpath; if (r == (size_t)-1 || r > MAXPATHLEN) { @@ -488,6 +558,7 @@ * absolutize() should help us out below */ else if(0 == _NSGetExecutablePath(execpath, &nsexeclength) && execpath[0] == SEP) { + /* FIXME: use _Py_char2wchar */ size_t r = mbstowcs(progpath, execpath, MAXPATHLEN+1); if (r == (size_t)-1 || r > MAXPATHLEN) { /* Could not convert execpath, or it's too long. */ @@ -626,6 +697,7 @@ bufsz = 0; if (_rtpypath) { + /* FIXME: use _Py_char2wchar */ size_t s = mbstowcs(rtpypath, _rtpypath, sizeof(rtpypath)/sizeof(wchar_t)); if (s == (size_t)-1 || s >=sizeof(rtpypath)) /* XXX deal with errors more gracefully */ Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (révision 80231) +++ Modules/zipimport.c (copie de travail) @@ -60,26 +60,37 @@ static int zipimporter_init(ZipImporter *self, PyObject *args, PyObject *kwds) { + PyObject *opath; char *path, *p, *prefix, buf[MAXPATHLEN+2]; size_t len; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; - if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) + if (!PyArg_ParseTuple(args, "O&:zipimporter", PyUnicode_FSConverter, &opath)) return -1; - len = strlen(path); + if (PyBytes_Check(opath)) { + path = PyBytes_AsString(opath); + len = PyBytes_GET_SIZE(opath); + } else { + path = PyByteArray_AsString(opath); + len = PyByteArray_GET_SIZE(opath); + } + if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); + Py_DECREF(opath); return -1; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); + Py_DECREF(opath); return -1; } strcpy(buf, path); + Py_DECREF(opath); #ifdef ALTSEP for (p = buf; *p; p++) { @@ -113,17 +124,26 @@ } if (path != NULL) { PyObject *files; - files = PyDict_GetItemString(zip_directory_cache, path); + PyObject *cache_key; + cache_key = PyUnicode_DecodeFSDefault(path); + if (cache_key == NULL) + return -1; + files = PyDict_GetItem(zip_directory_cache, cache_key); if (files == NULL) { files = read_directory(buf); - if (files == NULL) + if (files == NULL) { + Py_DECREF(cache_key); return -1; - if (PyDict_SetItemString(zip_directory_cache, path, + } + if (PyDict_SetItem(zip_directory_cache, cache_key, files) != 0) + { + Py_DECREF(cache_key); return -1; - } - else + } + } else Py_INCREF(files); + Py_DECREF(cache_key); self->files = files; } else { @@ -142,12 +162,11 @@ prefix[len + 1] = '\0'; } } - - self->archive = PyUnicode_FromString(buf); + self->archive = PyUnicode_DecodeFSDefault(buf); if (self->archive == NULL) return -1; - self->prefix = PyUnicode_FromString(prefix); + self->prefix = PyUnicode_DecodeFSDefault(prefix); if (self->prefix == NULL) return -1; @@ -176,19 +195,12 @@ static PyObject * zipimporter_repr(ZipImporter *self) { - char *archive = "???"; - char *prefix = ""; - - if (self->archive != NULL && PyUnicode_Check(self->archive)) - archive = _PyUnicode_AsString(self->archive); - if (self->prefix != NULL && PyUnicode_Check(self->prefix)) - prefix = _PyUnicode_AsString(self->prefix); - if (prefix != NULL && *prefix) - return PyUnicode_FromFormat("", - archive, SEP, prefix); + if (self->prefix != NULL) + return PyUnicode_FromFormat("", + self->archive, SEP, self->prefix); else - return PyUnicode_FromFormat("", - archive); + return PyUnicode_FromFormat("", + self->archive); } /* return fullname.split(".")[-1] */ @@ -371,7 +383,7 @@ return NULL; Py_DECREF(code); /* Only need the path info */ - return PyUnicode_FromString(modpath); + return PyUnicode_DecodeFSDefault(modpath); } /* Return a bool signifying whether the module is a package or not. */ @@ -401,42 +413,66 @@ zipimporter_get_data(PyObject *obj, PyObject *args) { ZipImporter *self = (ZipImporter *)obj; + PyObject *path_obj = NULL, *path_obj2 = NULL; char *path; #ifdef ALTSEP char *p, buf[MAXPATHLEN + 1]; #endif PyObject *toc_entry; Py_ssize_t len; + PyObject *archive; char *archive_str; + PyObject *result; - if (!PyArg_ParseTuple(args, "s:zipimporter.get_data", &path)) + if (!PyArg_ParseTuple(args, "O&:zipimporter.get_data", PyUnicode_FSConverter, &path_obj)) return NULL; #ifdef ALTSEP - if (strlen(path) >= MAXPATHLEN) { + if (PyBytes_GET_SIZE(path_obj) >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "path too long"); + Py_DECREF(path_obj); return NULL; } - strcpy(buf, path); + strcpy(buf, PyBytes_AsString(path_obj)); for (p = buf; *p; p++) { if (*p == ALTSEP) *p = SEP; } path = buf; +#else + path = PyBytes_AsString(path_obj); #endif - archive_str = _PyUnicode_AsStringAndSize(self->archive, &len); + archive = PyUnicode_AsEncodedString(self->archive, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (archive == NULL) { + Py_DECREF(path_obj); + return NULL; + } + archive_str = PyBytes_AsString(archive); + len = PyBytes_GET_SIZE(archive); if ((size_t)len < strlen(path) && strncmp(path, archive_str, len) == 0 && path[len] == SEP) { path = path + len + 1; } - toc_entry = PyDict_GetItemString(self->files, path); + path_obj2 = PyUnicode_DecodeFSDefault(path); + if (path_obj2 == NULL) { + Py_DECREF(path_obj); + Py_DECREF(archive); + return NULL; + } + toc_entry = PyDict_GetItem(self->files, path_obj2); + Py_DECREF(path_obj2); if (toc_entry == NULL) { PyErr_SetFromErrnoWithFilename(PyExc_IOError, path); + Py_DECREF(path_obj); + Py_DECREF(archive); return NULL; } - return get_data(archive_str, toc_entry); + result = get_data(archive_str, toc_entry); + Py_DECREF(archive); + return result; } static PyObject * @@ -486,8 +522,13 @@ toc_entry = PyDict_GetItemString(self->files, path); if (toc_entry != NULL) { - PyObject *bytes = get_data(_PyUnicode_AsString(self->archive), toc_entry); + PyObject *archive = PyUnicode_AsEncodedString(self->archive, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (archive == NULL) + return NULL; + PyObject *bytes = get_data(PyBytes_AsString(archive), toc_entry); PyObject *res = PyUnicode_FromString(PyBytes_AsString(bytes)); + Py_DECREF(archive); Py_XDECREF(bytes); return res; } @@ -679,10 +720,10 @@ long i, l, count; size_t length; char path[MAXPATHLEN + 5]; + PyObject *path_obj; char name[MAXPATHLEN + 5]; char *p, endof_central_dir[22]; long arc_offset; /* offset from beginning of file to start of zip-archive */ - if (strlen(archive) > MAXPATHLEN) { PyErr_SetString(PyExc_OverflowError, "Zip path name is too long"); @@ -690,10 +731,15 @@ } strcpy(path, archive); + path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) + return NULL; + fp = fopen(archive, "rb"); if (fp == NULL) { PyErr_Format(ZipImportError, "can't open Zip file: " - "'%.200s'", archive); + "'%U'", path_obj); + Py_DECREF(path_obj); return NULL; } fseek(fp, -22, SEEK_END); @@ -701,16 +747,19 @@ if (fread(endof_central_dir, 1, 22, fp) != 22) { fclose(fp); PyErr_Format(ZipImportError, "can't read Zip file: " - "'%.200s'", archive); + "'%U'", path_obj); + Py_DECREF(path_obj); return NULL; } if (get_long((unsigned char *)endof_central_dir) != 0x06054B50) { /* Bad: End of Central Dir signature */ fclose(fp); PyErr_Format(ZipImportError, "not a Zip file: " - "'%.200s'", archive); + "'%U'", path_obj); + Py_DECREF(path_obj); return NULL; } + Py_DECREF(path_obj); header_size = get_long((unsigned char *)endof_central_dir + 12); header_offset = get_long((unsigned char *)endof_central_dir + 16); @@ -718,8 +767,9 @@ header_offset += arc_offset; files = PyDict_New(); - if (files == NULL) + if (files == NULL) { goto error; + } length = (long)strlen(path); path[length] = SEP; @@ -761,15 +811,26 @@ header_offset += header_size; strncpy(path + length + 1, name, MAXPATHLEN - length - 1); - - t = Py_BuildValue("siiiiiii", path, compress, data_size, + path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) { + return NULL; + } + t = Py_BuildValue("Oiiiiiii", path_obj, compress, data_size, file_size, file_offset, time, date, crc); - if (t == NULL) + Py_DECREF(path_obj); + if (t == NULL) { goto error; - err = PyDict_SetItemString(files, name, t); + } + path_obj = PyUnicode_DecodeFSDefault(name); + if (path_obj == NULL) { + return NULL; + } + err = PyDict_SetItem(files, path_obj, t); + Py_DECREF(path_obj); Py_DECREF(t); - if (err != 0) + if (err != 0) { goto error; + } count++; } fclose(fp); @@ -828,11 +889,11 @@ int err; Py_ssize_t bytes_read = 0; long l; - char *datapath; + PyObject *datapath = NULL; long compress, data_size, file_size, file_offset, bytes_size; long time, date, crc; - if (!PyArg_ParseTuple(toc_entry, "slllllll", &datapath, &compress, + if (!PyArg_ParseTuple(toc_entry, "Olllllll", &datapath, &compress, &data_size, &file_size, &file_offset, &time, &date, &crc)) { return NULL; @@ -1056,10 +1117,15 @@ { PyObject *toc_entry; time_t mtime = 0; + PyObject *path_obj; Py_ssize_t lastchar = strlen(path) - 1; char savechar = path[lastchar]; path[lastchar] = '\0'; /* strip 'c' or 'o' from *.py[co] */ - toc_entry = PyDict_GetItemString(self->files, path); + path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) + return (time_t)-1; + toc_entry = PyDict_GetItem(self->files, path_obj); + Py_DECREF(path_obj); if (toc_entry != NULL && PyTuple_Check(toc_entry) && PyTuple_Size(toc_entry) == 8) { /* fetch the time stamp of the .py file for comparison @@ -1080,24 +1146,30 @@ time_t mtime, PyObject *toc_entry) { PyObject *data, *code; - char *modpath; - char *archive = _PyUnicode_AsString(self->archive); - + PyObject *modpath; + PyObject *archive; + + archive = PyUnicode_AsEncodedString(self->archive, + Py_FileSystemDefaultEncoding, "surrogateescape"); if (archive == NULL) return NULL; - data = get_data(archive, toc_entry); + data = get_data(PyBytes_AsString(archive), toc_entry); + Py_DECREF(archive); if (data == NULL) return NULL; - modpath = _PyUnicode_AsString(PyTuple_GetItem(toc_entry, 0)); - + modpath = PyUnicode_AsEncodedString(PyTuple_GetItem(toc_entry, 0), + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (modpath == NULL) + return NULL; if (isbytecode) { - code = unmarshal_code(modpath, data, mtime); + code = unmarshal_code(PyBytes_AsString(modpath), data, mtime); } else { - code = compile_source(modpath, data); + code = compile_source(PyBytes_AsString(modpath), data); } + Py_DECREF(modpath); Py_DECREF(data); return code; } @@ -1108,14 +1180,19 @@ get_module_code(ZipImporter *self, char *fullname, int *p_ispackage, char **p_modpath) { - PyObject *toc_entry; + PyObject *toc_entry, *prefix, *path_obj; char *subname, path[MAXPATHLEN + 1]; int len; struct st_zip_searchorder *zso; subname = get_subname(fullname); - len = make_filename(_PyUnicode_AsString(self->prefix), subname, path); + prefix = PyUnicode_AsEncodedString(self->prefix, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (prefix == NULL) + return NULL; + len = make_filename(PyBytes_AsString(prefix), subname, path); + Py_DECREF(prefix); if (len < 0) return NULL; @@ -1123,34 +1200,53 @@ PyObject *code = NULL; strcpy(path + len, zso->suffix); + path_obj = PyUnicode_DecodeFSDefault(path); + if (path_obj == NULL) + return NULL; if (Py_VerboseFlag > 1) + /* FIXME: _PyUnicode_AsString(self->archive) */ PySys_WriteStderr("# trying %s%c%s\n", _PyUnicode_AsString(self->archive), (int)SEP, path); - toc_entry = PyDict_GetItemString(self->files, path); + toc_entry = PyDict_GetItem(self->files, path_obj); if (toc_entry != NULL) { time_t mtime = 0; int ispackage = zso->type & IS_PACKAGE; int isbytecode = zso->type & IS_BYTECODE; - if (isbytecode) + if (isbytecode) { mtime = get_mtime_of_source(self, path); + if (mtime == (time_t)-1 && PyErr_Occurred()) + return NULL; + } if (p_ispackage != NULL) *p_ispackage = ispackage; code = get_code_from_data(self, ispackage, isbytecode, mtime, toc_entry); + if (code == NULL) + /* FIXME */ + return NULL; if (code == Py_None) { /* bad magic number or non-matching mtime in byte code, try next */ Py_DECREF(code); continue; } + Py_DECREF(path_obj); if (code != NULL && p_modpath != NULL) - *p_modpath = _PyUnicode_AsString( - PyTuple_GetItem(toc_entry, 0)); + { + PyObject *path1, *path2; + path1 = PyTuple_GetItem(toc_entry, 0); + path2 = PyUnicode_AsEncodedString(path1, Py_FileSystemDefaultEncoding, "surrogateescape"); + if (path2 != NULL) { + *p_modpath = strdup(PyBytes_AsString(path2)); + Py_DECREF(path2); + } + } return code; } + Py_DECREF(path_obj); } PyErr_Format(ZipImportError, "can't find module '%.200s'", fullname); return NULL; Index: Modules/pyexpat.c =================================================================== --- Modules/pyexpat.c (révision 80231) +++ Modules/pyexpat.c (copie de travail) @@ -911,12 +911,19 @@ xmlparse_SetBase(xmlparseobject *self, PyObject *args) { char *base; + PyObject *base_obj; - if (!PyArg_ParseTuple(args, "s:SetBase", &base)) + if (!PyArg_ParseTuple(args, "O&:SetBase", PyUnicode_FSConverter, &base_obj)) return NULL; + if (PyBytes_Check(base_obj)) + base = PyBytes_AsString(base_obj); + else + base = PyByteArray_AsString(base_obj); if (!XML_SetBase(self->itself, base)) { - return PyErr_NoMemory(); + Py_DECREF(base_obj); + return PyErr_NoMemory(); } + Py_DECREF(base_obj); Py_INCREF(Py_None); return Py_None; } Index: Modules/posixmodule.c =================================================================== --- Modules/posixmodule.c (révision 80231) +++ Modules/posixmodule.c (copie de travail) @@ -3173,6 +3173,7 @@ for (pos = 0; pos < i; pos++) { char *p, *k, *v; size_t len; + PyObject *val2; key = PyList_GetItem(keys, pos); val = PyList_GetItem(vals, pos); @@ -3182,27 +3183,30 @@ if (!PyArg_Parse( key, "s;execve() arg 3 contains a non-string key", - &k) || - !PyArg_Parse( - val, - "s;execve() arg 3 contains a non-string value", - &v)) + &k)) { goto fail_2; } + val2 = PyUnicode_AsEncodedString(val, + Py_FileSystemDefaultEncoding, "surrogateescape"); + if (val2 == NULL) + goto fail_2; + #if defined(PYOS_OS2) /* Omit Pseudo-Env Vars that Would Confuse Programs if Passed On */ if (stricmp(k, "BEGINLIBPATH") != 0 && stricmp(k, "ENDLIBPATH") != 0) { #endif - len = PyUnicode_GetSize(key) + PyUnicode_GetSize(val) + 2; + len = PyUnicode_GetSize(key) + PyBytes_GET_SIZE(val2) + 2; p = PyMem_NEW(char, len); if (p == NULL) { PyErr_NoMemory(); + Py_DECREF(val2); goto fail_2; } - PyOS_snprintf(p, len, "%s=%s", k, v); + PyOS_snprintf(p, len, "%s=%s", k, PyBytes_AsString(val2)); envlist[envc++] = p; + Py_DECREF(val2); #if defined(PYOS_OS2) } #endif Index: Modules/main.c =================================================================== --- Modules/main.c (révision 80231) +++ Modules/main.c (copie de travail) @@ -102,23 +102,27 @@ "; #ifndef MS_WINDOWS +char* _Py_wchar2char(const wchar_t* arg); + static FILE* _wfopen(const wchar_t *path, const wchar_t *mode) { - char cpath[PATH_MAX]; - char cmode[10]; - size_t r; - r = wcstombs(cpath, path, PATH_MAX); - if (r == (size_t)-1 || r >= PATH_MAX) { + char *cpath, *cmode; + FILE *fp = NULL; + + cpath = _Py_wchar2char(path); + cmode = _Py_wchar2char(mode); + if (cpath == NULL || cmode == NULL) { errno = EINVAL; - return NULL; + goto finally; } - r = wcstombs(cmode, mode, 10); - if (r == (size_t)-1 || r >= 10) { - errno = EINVAL; - return NULL; - } - return fopen(cpath, cmode); + fp = fopen(cpath, cmode); +finally: + if (cpath) + PyMem_Free(cpath); + if (cmode) + PyMem_Free(cmode); + return fp; } #endif @@ -632,15 +636,14 @@ } if (sts==-1) { - PyObject *filenameObj = NULL; - char *p_cfilename = ""; + char *cfilename = ""; + int free_cfilename = 0; if (filename) { - filenameObj = PyUnicode_FromWideChar( - filename, wcslen(filename)); - if (filenameObj != NULL) - p_cfilename = _PyUnicode_AsString(filenameObj); + cfilename = _Py_wchar2char(filename); + if (cfilename != NULL) + free_cfilename = 1; else - p_cfilename = ""; + cfilename = ""; } /* call pending calls like signal handlers (SIGINT) */ if (Py_MakePendingCalls() == -1) { @@ -649,10 +652,11 @@ } else { sts = PyRun_AnyFileExFlags( fp, - p_cfilename, + cfilename, filename != NULL, &cf) != 0; } - Py_XDECREF(filenameObj); + if (free_cfilename) + PyMem_Free(cfilename); } }