Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 58412) +++ Python/pythonrun.c (working copy) @@ -867,7 +867,8 @@ return -1; d = PyModule_GetDict(m); if (PyDict_GetItemString(d, "__file__") == NULL) { - PyObject *f = PyString_FromString(filename); + PyObject *f; + f = PyUnicode_DecodeFSDefault(filename, 0, NULL); if (f == NULL) return -1; if (PyDict_SetItemString(d, "__file__", f) < 0) { Index: Python/import.c =================================================================== --- Python/import.c (revision 58412) +++ Python/import.c (working copy) @@ -652,7 +652,7 @@ /* Remember the filename as the __file__ attribute */ v = NULL; if (pathname != NULL) { - v = PyString_FromString(pathname); + v = PyUnicode_DecodeFSDefault(pathname, 0, NULL); if (v == NULL) PyErr_Clear(); } @@ -983,7 +983,7 @@ PySys_WriteStderr("import %s # directory %s\n", name, pathname); d = PyModule_GetDict(m); - file = PyString_FromString(pathname); + file = PyUnicode_DecodeFSDefault(pathname, 0, NULL); if (file == NULL) goto error; path = Py_BuildValue("[O]", file); Index: Python/compile.c =================================================================== --- Python/compile.c (revision 58412) +++ Python/compile.c (working copy) @@ -4001,7 +4001,7 @@ freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars)); if (!freevars) goto error; - filename = PyString_FromString(c->c_filename); + filename = PyUnicode_DecodeFSDefault(c->c_filename, 0, NULL); if (!filename) goto error; Index: Python/importdl.c =================================================================== --- Python/importdl.c (revision 58412) +++ Python/importdl.c (working copy) @@ -62,7 +62,9 @@ return NULL; } /* Remember the filename as the __file__ attribute */ - if (PyModule_AddStringConstant(m, "__file__", pathname) < 0) + PyObject *path; + path = PyUnicode_DecodeFSDefault(pathname, 0, NULL); + if (PyModule_AddObject(m, "__file__", path) < 0) PyErr_Clear(); /* Not important enough to report */ if (_PyImport_FixupExtension(name, pathname) == NULL) Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 58412) +++ Include/unicodeobject.h (working copy) @@ -154,6 +154,7 @@ # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful @@ -245,6 +246,7 @@ # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 +# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful @@ -641,6 +643,25 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( PyObject *, const char *); +/* Decode a string to a Python unicode object using either + Py_FileSystemDefaultEncoding or UTF-8 if the default encoding isn't given. + + The function is intended to be used for paths and file names only. It + doesn't use the codecs module and PyUnicode_Decode() since it is required + during boot strapping and before the codecs are set up. For that reason + the default fs encoding should be UTF-8, UTF-16, UTF-32, Latin-1 or MBCS. + + In case the length paramenter is 0 the length of string is autodetected + with strlen(string). errors must be set to NULL. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( + const char *string, /* encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + + /* Return a char* holding the UTF-8 encoded value of the Unicode object. Index: setup.py =================================================================== --- setup.py (revision 58412) +++ setup.py (working copy) @@ -414,7 +414,6 @@ # Python C API test module exts.append( Extension('_testcapi', ['_testcapimodule.c']) ) # profilers (_lsprof is for cProfile.py) - exts.append( Extension('_hotshot', ['_hotshot.c']) ) exts.append( Extension('_lsprof', ['_lsprof.c', 'rotatingtree.c']) ) # static Unicode character database exts.append( Extension('unicodedata', ['unicodedata.c']) ) Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (revision 58412) +++ Objects/codeobject.c (working copy) @@ -59,7 +59,7 @@ freevars == NULL || !PyTuple_Check(freevars) || cellvars == NULL || !PyTuple_Check(cellvars) || name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || - filename == NULL || !PyString_Check(filename) || + filename == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) || lnotab == NULL || !PyString_Check(lnotab) || !PyObject_CheckReadBuffer(code)) { PyErr_BadInternalCall(); @@ -72,6 +72,13 @@ } else { Py_INCREF(name); } + if (PyString_Check(filename)) { + filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename), 0, NULL); + if (filename == NULL) + return NULL; + } else { + Py_INCREF(filename); + } intern_strings(names); intern_strings(varnames); intern_strings(freevars); @@ -260,6 +267,8 @@ ourcellvars = PyTuple_New(0); if (ourcellvars == NULL) goto cleanup; + filename = PyUnicode_DecodeFSDefault(PyString_AS_STRING(filename), + 0, NULL); co = (PyObject *)PyCode_New(argcount, kwonlyargcount, nlocals, stacksize, flags, Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 58412) +++ Objects/unicodeobject.c (working copy) @@ -1231,6 +1231,61 @@ return v; } +PyObject* +PyUnicode_DecodeFSDefault(const char *string, Py_ssize_t length, + const char *errors) +{ + PyObject *v = NULL; + char encoding[32], mangled[32], *encptr, *manptr; + char tmp; + + if (errors != NULL) + Py_FatalError("non-NULL encoding in PyUnicode_DecodeFSDefault"); + if ((length == 0) && *string) + length = (Py_ssize_t)strlen(string); + + strncpy(encoding, + Py_FileSystemDefaultEncoding ? + Py_FileSystemDefaultEncoding : "UTF-8", + 31); + encoding[31] = '\0'; + + encptr = encoding; + manptr = mangled; + /* lower the string and remove non alpha numeric chars like '-' */ + while(*encptr) { + tmp = *encptr++; + if (isupper(tmp)) + tmp = tolower(tmp); + if (!isalnum(tmp)) + continue; + *manptr++ = tmp; + } + *manptr++ = '\0'; + + if (strcmp(mangled, "utf8") == 0) + v = PyUnicode_DecodeUTF8(string, length, NULL); + else if (strcmp(mangled, "utf16") == 0) + v = PyUnicode_DecodeUTF16(string, length, NULL, 0); + else if (strcmp(mangled, "utf32") == 0) + v = PyUnicode_DecodeUTF32(string, length, NULL, 0); + else if ((strcmp(mangled, "latin1") == 0) || + (strcmp(mangled, "iso88591") == 0) || + (strcmp(mangled, "iso885915") == 0)) + v = PyUnicode_DecodeLatin1(string, length, NULL); + else if (strcmp(mangled, "ascii") == 0) + v = PyUnicode_DecodeASCII(string, length, NULL); +#ifdef MS_WIN32 + else if (strcmp(mangled, "mbcs") == 0) + v = PyUnicode_DecodeMBCS(string, length, NULL); +#endif + + if (v == NULL) + v = PyUnicode_DecodeUTF8(string, length, "replace"); + + return (PyObject*)v; +} + char* PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize) { Index: Objects/moduleobject.c =================================================================== --- Objects/moduleobject.c (revision 58412) +++ Objects/moduleobject.c (working copy) @@ -86,12 +86,12 @@ d = ((PyModuleObject *)m)->md_dict; if (d == NULL || (fileobj = PyDict_GetItemString(d, "__file__")) == NULL || - !PyString_Check(fileobj)) + !PyUnicode_Check(fileobj)) { PyErr_SetString(PyExc_SystemError, "module filename missing"); return NULL; } - return PyString_AsString(fileobj); + return PyUnicode_AsString(fileobj); } void Index: Modules/pyexpat.c =================================================================== --- Modules/pyexpat.c (revision 58412) +++ Modules/pyexpat.c (working copy) @@ -238,7 +238,7 @@ nulltuple = PyTuple_New(0); if (nulltuple == NULL) goto failed; - filename = PyString_FromString(__FILE__); + filename = PyUnicode_DecodeFSDefault(__FILE__, 0, NULL); handler_info[slot].tb_code = PyCode_New(0, /* argcount */ 0, /* kwonlyargcount */