From 4040928614f0382efffe88fbc997a744efa862de Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 17 Aug 2010 20:02:46 +0200 Subject: [PATCH] Reencode filenames if filesystem encoding changes * Reencode the filenames of all module and code objects * Store weak references of all code objects --- Include/code.h | 7 ++ Objects/codeobject.c | 17 ++++++ Objects/object.c | 4 + Objects/unicodeobject.c | 8 ++- Python/pythonrun.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 180 insertions(+), 2 deletions(-) diff --git a/Include/code.h b/Include/code.h index f0f88cd..e003643 100644 --- a/Include/code.h +++ b/Include/code.h @@ -99,6 +99,13 @@ PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co, PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, PyObject *lineno_obj); +/* List of weak references of all code objects. The list is used by + initfsencoding() to reencode code filenames at startup if the filesystem + encoding changes. At initfsencoding() exit, the list is set to NULL and it + is no more used. */ + +extern PyObject *_Py_code_object_list; + #ifdef __cplusplus } #endif diff --git a/Objects/codeobject.c b/Objects/codeobject.c index da5c09a..470bf56 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -5,6 +5,8 @@ #define NAME_CHARS \ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" +PyObject *_Py_code_object_list = NULL; + /* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */ static int @@ -109,8 +111,23 @@ PyCode_New(int argcount, int kwonlyargcount, co->co_lnotab = lnotab; co->co_zombieframe = NULL; co->co_weakreflist = NULL; + + if (_Py_code_object_list != NULL) { + int err; + PyObject *ref = PyWeakref_NewRef((PyObject*)co, NULL); + if (ref == NULL) + goto error; + err = PyList_Append(_Py_code_object_list, ref); + Py_DECREF(ref); + if (err) + goto error; + } } return co; + +error: + Py_DECREF(co); + return NULL; } PyCodeObject * diff --git a/Objects/object.c b/Objects/object.c index ef23ac1..750776a 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1587,6 +1587,10 @@ _Py_ReadyTypes(void) if (PyType_Ready(&PyCode_Type) < 0) Py_FatalError("Can't initialize code type"); + _Py_code_object_list = PyList_New(0); + if (_Py_code_object_list == NULL) + Py_FatalError("Can't initialize code type"); + if (PyType_Ready(&PyFrame_Type) < 0) Py_FatalError("Can't initialize frame type"); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 9fd342b..ac4b7ec 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1479,9 +1479,11 @@ PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) Py_FileSystemDefaultEncoding, "surrogateescape"); } else + /* if you change the default encoding, update also + PyUnicode_DecodeFSDefaultAndSize() and reencode_modules_path() */ return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - "surrogateescape"); + PyUnicode_GET_SIZE(unicode), + "surrogateescape"); } PyObject *PyUnicode_AsEncodedString(PyObject *unicode, @@ -1648,6 +1650,8 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) "surrogateescape"); } else { + /* if you change the default encoding, update also + PyUnicode_EncodeFSDefault() and reencode_modules_path() */ return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); } } diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 76a8eef..ec48dc9 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -701,6 +701,145 @@ initmain(void) } } +static PyObject* +reencode_filename(PyObject *file, const char *new_encoding, + const char *errors) +{ + PyObject *file_bytes = NULL; + PyObject *new_file = NULL; + + file_bytes = PyUnicode_EncodeFSDefault(file); + if (file_bytes == NULL) + return NULL; + new_file = PyUnicode_Decode( + PyBytes_AsString(file_bytes), + PyBytes_GET_SIZE(file_bytes), + new_encoding, + "surrogateescape"); + Py_DECREF(file_bytes); + return new_file; +} + +static int +reencode_module_path(PyObject *module, PyObject *path, + const char *new_encoding, const char *errors) +{ + PyObject *filename; + PyObject *new_filename; + Py_ssize_t i, size; + + size = PyList_Size(path); + for (i=0; iinterp; + PyObject *modules = interp->modules; + PyObject *values, *iter = NULL; + PyObject *module = NULL, *module_dict = NULL; + PyObject *file, *path, *new_file, *ref; + Py_ssize_t i, len; + int ret = 1; + char *errors; + PyCodeObject *co; + + /* FIXME: Re-encode PySys_GetObject("path_importer_cache") keys? */ + + /* PyUnicode_DecodeFSDefault() and PyUnicode_EncodeFSDefault() do already + use utf-8 if Py_FileSystemDefaultEncoding is NULL */ + if (strcmp(new_encoding, "utf-8") == 0) + return 0; + + if (strcmp(new_encoding, "mbcs") != 0) + errors = "surrogateescape"; + else + errors = NULL; + + values = PyObject_CallMethod(modules, "values", ""); + if (values == NULL) + return 1; + + iter = PyObject_GetIter(values); + Py_DECREF(values); + if (iter == NULL) + return 1; + + for (module = PyIter_Next(iter); module != NULL; module = PyIter_Next(iter)) { + file = PyModule_GetFilenameObject(module); + if (file != NULL) { + new_file = reencode_filename(file, new_encoding, errors); + Py_DECREF(file); + if (new_file == NULL) + goto error; + if (PyObject_SetAttrString(module, "__file__", new_file)) { + Py_DECREF(new_file); + goto error; + } + Py_DECREF(new_file); + } else { + PyErr_Clear(); + } + + module_dict = PyModule_GetDict(module); + if (module_dict == NULL) + goto error; + path = PyDict_GetItemString(module_dict, "__path__"); + if (path != NULL) { + if (reencode_module_path(module, path, new_encoding, errors)) + goto error; + } else { + PyErr_Clear(); + } + + Py_CLEAR(module); + } + + len = Py_SIZE(_Py_code_object_list); + for (i=0; i < len; i++) { + ref = PyList_GET_ITEM(_Py_code_object_list, i); + co = (PyCodeObject *)PyWeakref_GetObject(ref); + if ((PyObject*)co == Py_None) + continue; + if (co == NULL) + goto error; + + new_file = reencode_filename(co->co_filename, new_encoding, errors); + if (new_file == NULL) + return 1; + Py_DECREF(co->co_filename); + co->co_filename = new_file; + } + Py_CLEAR(_Py_code_object_list); + + ret = 0; + goto finally; + +error: + ret = 1; +finally: + Py_XDECREF(iter); + Py_XDECREF(module); + return ret; +} + static void initfsencoding(void) { @@ -716,8 +855,13 @@ initfsencoding(void) stdin and stdout if these are terminals. */ codeset = get_codeset(); if (codeset != NULL) { + if (reencode_modules_path(codeset)) + Py_FatalError( + "Py_Initialize: can't reencode paths"); + Py_FileSystemDefaultEncoding = codeset; Py_HasFileSystemDefaultEncoding = 0; + Py_CLEAR(_Py_code_object_list); return; } @@ -730,6 +874,8 @@ initfsencoding(void) } #endif + Py_CLEAR(_Py_code_object_list); + /* the encoding is mbcs, utf-8 or ascii */ codec = _PyCodec_Lookup(Py_FileSystemDefaultEncoding); if (!codec) { -- 1.6.2.5