Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 58422) +++ Python/ceval.c (working copy) @@ -767,7 +767,7 @@ lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL; #endif #if defined(Py_DEBUG) || defined(LLTRACE) - filename = PyString_AsString(co->co_filename); + filename = PyUnicode_AsString(co->co_filename); #endif why = WHY_NOT; Index: Python/traceback.c =================================================================== --- Python/traceback.c (revision 58422) +++ Python/traceback.c (working copy) @@ -229,10 +229,10 @@ while (tb != NULL && err == 0) { if (depth <= limit) { err = tb_displayline(f, - PyString_AsString( + PyUnicode_AsString( tb->tb_frame->f_code->co_filename), tb->tb_lineno, - PyString_AsString(tb->tb_frame->f_code->co_name)); + PyUnicode_AsString(tb->tb_frame->f_code->co_name)); } depth--; tb = tb->tb_next; Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 58422) +++ Python/pythonrun.c (working copy) @@ -867,7 +867,8 @@ return -1; d = PyModule_GetDict(m); if (PyDict_GetItemString(d, "__file__") == NULL) { - PyObject *f = PyString_FromString(filename); + PyObject *f; + f = PyUnicode_DecodeFSDefault(filename); if (f == NULL) return -1; if (PyDict_SetItemString(d, "__file__", f) < 0) { Index: Python/import.c =================================================================== --- Python/import.c (revision 58422) +++ Python/import.c (working copy) @@ -74,10 +74,11 @@ 3040 (added signature annotations) 3050 (print becomes a function) 3060 (PEP 3115 metaclass syntax) - 3070 (PEP 3109 raise changes) + 3070 (PEP 3109 raise changes) + 3080 (PEP 3137 make __file__ and __name__ unicode) . */ -#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the @@ -652,7 +653,7 @@ /* Remember the filename as the __file__ attribute */ v = NULL; if (pathname != NULL) { - v = PyString_FromString(pathname); + v = PyUnicode_DecodeFSDefault(pathname); if (v == NULL) PyErr_Clear(); } @@ -983,7 +984,7 @@ PySys_WriteStderr("import %s # directory %s\n", name, pathname); d = PyModule_GetDict(m); - file = PyString_FromString(pathname); + file = PyUnicode_DecodeFSDefault(pathname); if (file == NULL) goto error; path = Py_BuildValue("[O]", file); Index: Python/compile.c =================================================================== --- Python/compile.c (revision 58422) +++ Python/compile.c (working copy) @@ -4001,7 +4001,7 @@ freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars)); if (!freevars) goto error; - filename = PyString_FromString(c->c_filename); + filename = PyUnicode_DecodeFSDefault(c->c_filename); if (!filename) goto error; Index: Python/importdl.c =================================================================== --- Python/importdl.c (revision 58422) +++ Python/importdl.c (working copy) @@ -62,7 +62,9 @@ return NULL; } /* Remember the filename as the __file__ attribute */ - if (PyModule_AddStringConstant(m, "__file__", pathname) < 0) + PyObject *path; + path = PyUnicode_DecodeFSDefault(pathname); + if (PyModule_AddObject(m, "__file__", path) < 0) PyErr_Clear(); /* Not important enough to report */ if (_PyImport_FixupExtension(name, pathname) == NULL) Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 58422) +++ Include/unicodeobject.h (working copy) @@ -154,6 +154,7 @@ # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful @@ -245,6 +246,7 @@ # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful @@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *, const char *); +/* Decode a null-terminated string using Py_FileSystemDefaultEncoding. + + If the encoding is supported by one of the built-in codecs (i.e., UTF-8, + UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace + invalid characters with '?'. + + The function is intended to be used for paths and file names only + during bootstrapping process where the codecs are not set up. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( + const char *s /* encoded string */ + ); + /* Return a char* holding the UTF-8 encoded value of the Unicode object. Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (revision 58422) +++ Objects/codeobject.c (working copy) @@ -50,6 +50,7 @@ { PyCodeObject *co; Py_ssize_t i; + /* Check argument types */ if (argcount < 0 || nlocals < 0 || code == NULL || @@ -58,20 +59,16 @@ varnames == NULL || !PyTuple_Check(varnames) || freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || - name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || - filename == NULL || !PyString_Check(filename) || + name == NULL || !PyUnicode_Check(name) || + filename == NULL || PyUnicode_Check(filename) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { PyErr_BadInternalCall(); return NULL; } - if (PyString_Check(name)) { - name = PyUnicode_FromString(PyString_AS_STRING(name)); - if (name == NULL) - return NULL; - } else { - Py_INCREF(name); - } + Py_INCREF(name); + Py_INCREF(filename); + intern_strings(names); intern_strings(varnames); intern_strings(freevars); @@ -300,7 +297,7 @@ if (co->co_firstlineno != 0) lineno = co->co_firstlineno; if (co->co_filename && PyString_Check(co->co_filename)) - filename = PyString_AS_STRING(co->co_filename); + filename = PyUnicode_AsString(co->co_filename); return PyUnicode_FromFormat( "", co->co_name, co, filename, lineno); Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 58422) +++ Objects/unicodeobject.c (working copy) @@ -1231,6 +1231,57 @@ return v; } +PyObject* +PyUnicode_DecodeFSDefault(const char *s) +{ + PyObject *v = NULL; + Py_ssize_t size = (Py_ssize_t)strlen(s); + const char *encoding; + enum { N = 16 }; + char mangled[N]; + + /* During the early bootstrapping process, Py_FileSystemDefaultEncoding + can be undefined. If it is case, decode using UTF-8. */ + if (Py_FileSystemDefaultEncoding) { + encoding = Py_FileSystemDefaultEncoding; + } + else { + return PyUnicode_DecodeUTF8(s, size, "replace"); + } + + /* Py_FileSystemDefaultEncoding is not guarantee to be normalized. + So, lower the string and remove any non-alphanumeric characters. */ + char *p = mangled; + while (*encoding && (p - mangled) < (N - 1)) { + if (isalnum(*encoding)) { + *p++ = tolower(*encoding); + } + encoding++; + } + *p = '\0'; + + if (strcmp(mangled, "utf8") == 0) + v = PyUnicode_DecodeUTF8(s, size, NULL); + else if (strcmp(mangled, "utf16") == 0) + v = PyUnicode_DecodeUTF16(s, size, NULL, 0); + else if (strcmp(mangled, "utf32") == 0) + v = PyUnicode_DecodeUTF32(s, size, NULL, 0); + else if ((strcmp(mangled, "latin1") == 0) + || (strcmp(mangled, "iso8859-1") == 0)) + v = PyUnicode_DecodeLatin1(s, size, NULL); + else if (strcmp(mangled, "ascii") == 0) + v = PyUnicode_DecodeASCII(s, size, NULL); +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + else if (strcmp(mangled, "mbcs") == 0) + v = PyUnicode_DecodeMBCS(s, size, NULL); +#endif + + if (v == NULL) + v = PyUnicode_DecodeUTF8(s, size, "replace"); + + return v; +} + char* PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) { Index: Objects/moduleobject.c =================================================================== --- Objects/moduleobject.c (revision 58422) +++ Objects/moduleobject.c (working copy) @@ -86,12 +86,12 @@ d = ((PyModuleObject *)m)->md_dict; if (d == NULL || (fileobj = PyDict_GetItemString(d, "__file__")) == NULL || - !PyString_Check(fileobj)) + !PyUnicode_Check(fileobj)) { PyErr_SetString(PyExc_SystemError, "module filename missing"); return NULL; } - return PyString_AsString(fileobj); + return PyUnicode_AsString(fileobj); } void Index: Modules/_ctypes/callbacks.c =================================================================== --- Modules/_ctypes/callbacks.c (revision 58422) +++ Modules/_ctypes/callbacks.c (working copy) @@ -34,9 +34,9 @@ PyCodeObject *py_code = 0; PyFrameObject *py_frame = 0; - py_srcfile = PyString_FromString(filename); + py_srcfile = PyUnicode_DecodeFSDefault(filename); if (!py_srcfile) goto bad; - py_funcname = PyString_FromString(funcname); + py_funcname = PyUnicode_FromString(funcname); if (!py_funcname) goto bad; py_globals = PyDict_New(); if (!py_globals) goto bad; Index: Modules/pyexpat.c =================================================================== --- Modules/pyexpat.c (revision 58422) +++ Modules/pyexpat.c (working copy) @@ -232,13 +232,13 @@ code = PyString_FromString(""); if (code == NULL) goto failed; - name = PyString_FromString(func_name); + name = PyUnicode_FromString(func_name); if (name == NULL) goto failed; nulltuple = PyTuple_New(0); if (nulltuple == NULL) goto failed; - filename = PyString_FromString(__FILE__); + filename = PyUnicode_DecodeFSDefault(__FILE__); handler_info[slot].tb_code = PyCode_New(0, /* argcount */ 0, /* kwonlyargcount */