Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (révision 84163) +++ Python/pythonrun.c (copie de travail) @@ -701,6 +701,167 @@ } } +static PyObject* +reencode_filename(PyObject *file, const char *new_encoding, + const char *errors) +{ + PyObject *file_bytes = NULL; + PyObject *new_file = NULL; + + file_bytes = PyUnicode_EncodeFSDefault(file); + if (file_bytes == NULL) + return NULL; + new_file = PyUnicode_Decode( + PyBytes_AsString(file_bytes), + PyBytes_GET_SIZE(file_bytes), + new_encoding, + "surrogateescape"); + Py_DECREF(file_bytes); + return new_file; +} + +static int +reencode_path_list(PyObject *paths, + const char *new_encoding, const char *errors) +{ + PyObject *filename; + PyObject *new_filename; + Py_ssize_t i, size; + + size = PyList_Size(paths); + for (i=0; iinterp; + PyObject *modules = interp->modules; + PyObject *values, *iter = NULL; + PyObject *module = NULL, *module_dict = NULL; + PyObject *file, *paths, *new_file, *ref; + Py_ssize_t i, len; + int ret = 1; + char *errors; + PyCodeObject *co; + + /* FIXME: Re-encode PySys_GetObject("path_importer_cache") keys? */ + + /* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already + use utf-8 if Py_FileSystemDefaultEncoding is NULL */ + if (strcmp(new_encoding, "utf-8") == 0) + return 0; + + if (strcmp(new_encoding, "mbcs") != 0) + errors = "surrogateescape"; + else + errors = NULL; + + values = PyObject_CallMethod(modules, "values", ""); + if (values == NULL) + return 1; + + iter = PyObject_GetIter(values); + Py_DECREF(values); + if (iter == NULL) + return 1; + + /* sys.modules: reencode __file__ and __path__ attributes */ + for (module = PyIter_Next(iter); module != NULL; module = PyIter_Next(iter)) { + file = PyModule_GetFilenameObject(module); + if (file != NULL) { + new_file = reencode_filename(file, new_encoding, errors); + Py_DECREF(file); + if (new_file == NULL) + goto error; + if (PyObject_SetAttrString(module, "__file__", new_file)) { + Py_DECREF(new_file); + goto error; + } + Py_DECREF(new_file); + } else { + PyErr_Clear(); + } + + module_dict = PyModule_GetDict(module); + if (module_dict == NULL) + goto error; + paths = PyDict_GetItemString(module_dict, "__path__"); + if (paths != NULL) { + if (reencode_path_list(paths, new_encoding, errors)) + goto error; + } else { + PyErr_Clear(); + } + + Py_CLEAR(module); + } + + /* sys.path and sys.meta_path */ + paths = PySys_GetObject("path"); + if (paths != NULL) { + if (reencode_path_list(paths, new_encoding, errors)) + goto error; + } + paths = PySys_GetObject("meta_path"); + if (paths != NULL) { + if (reencode_path_list(paths, new_encoding, errors)) + goto error; + } + + /* sys.executable */ + file = PySys_GetObject("executable"); + new_file = reencode_filename(file, new_encoding, errors); + if (new_file == NULL) + return 1; + PySys_SetObject("executable", new_file); + + /* code objects */ + len = Py_SIZE(_Py_code_object_list); + for (i=0; i < len; i++) { + ref = PyList_GET_ITEM(_Py_code_object_list, i); + co = (PyCodeObject *)PyWeakref_GetObject(ref); + if ((PyObject*)co == Py_None) + continue; + if (co == NULL) + goto error; + + new_file = reencode_filename(co->co_filename, new_encoding, errors); + if (new_file == NULL) + return 1; + Py_DECREF(co->co_filename); + co->co_filename = new_file; + } + Py_CLEAR(_Py_code_object_list); + + ret = 0; + goto finally; + +error: + ret = 1; +finally: + Py_XDECREF(iter); + Py_XDECREF(module); + return ret; +} + static void initfsencoding(void) { @@ -716,20 +877,27 @@ stdin and stdout if these are terminals. */ codeset = get_codeset(); if (codeset != NULL) { + if (reencode_filenames(codeset)) + Py_FatalError( + "Py_Initialize: can't reencode paths"); + Py_FileSystemDefaultEncoding = codeset; Py_HasFileSystemDefaultEncoding = 0; + Py_CLEAR(_Py_code_object_list); return; } PyErr_Clear(); fprintf(stderr, - "Unable to get the locale encoding: " + "Unable to get the filesystem encoding: " "fallback to utf-8\n"); Py_FileSystemDefaultEncoding = "utf-8"; Py_HasFileSystemDefaultEncoding = 1; } #endif + Py_CLEAR(_Py_code_object_list); + /* the encoding is mbcs, utf-8 or ascii */ codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); if (!codec) { Index: Include/code.h =================================================================== --- Include/code.h (révision 84162) +++ Include/code.h (copie de travail) @@ -99,6 +99,13 @@ PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj); +/* List of weak references of all code objects. The list is used by + initfsencoding() to reencode code filenames at startup if the filesystem + encoding changes. At initfsencoding() exit, the list is set to NULL and it + is no more used. */ + +extern PyObject *_Py_code_object_list; + #ifdef __cplusplus } #endif Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (révision 84162) +++ Objects/codeobject.c (copie de travail) @@ -5,6 +5,8 @@ #define NAME_CHARS \ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" +PyObject *_Py_code_object_list = NULL; + /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */ static int @@ -109,8 +111,23 @@ co->co_lnotab = lnotab; co->co_zombieframe = NULL; co->co_weakreflist = NULL; + + if (_Py_code_object_list != NULL) { + int err; + PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL); + if (ref == NULL) + goto error; + err = PyList_Append(_Py_code_object_list, ref); + Py_DECREF(ref); + if (err) + goto error; + } } return co; + +error: + Py_DECREF(co); + return NULL; } PyCodeObject * Index: Objects/object.c =================================================================== --- Objects/object.c (révision 84162) +++ Objects/object.c (copie de travail) @@ -1587,6 +1587,10 @@ if (PyType_Ready(&PyCode_Type) < 0) Py_FatalError("Can't initialize code type"); + _Py_code_object_list = PyList_New(0); + if (_Py_code_object_list == NULL) + Py_FatalError("Can't initialize code type"); + if (PyType_Ready(&PyFrame_Type) < 0) Py_FatalError("Can't initialize frame type"); Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (révision 84162) +++ Objects/unicodeobject.c (copie de travail) @@ -1479,9 +1479,11 @@ Py_FileSystemDefaultEncoding, "surrogateescape"); } else + /* if you change the default encoding, update also + PyUnicode_DecodeFSDefaultAndSize() and reencode_filenames() */ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - "surrogateescape"); + PyUnicode_GET_SIZE(unicode), + "surrogateescape"); } PyObject *PyUnicode_AsEncodedString(PyObject *unicode, @@ -1648,6 +1650,8 @@ "surrogateescape"); } else { + /* if you change the default encoding, update also + PyUnicode_EncodeFSDefault() and reencode_filenames() */ return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } }