Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (revision 54841) +++ Modules/zipimport.c (working copy) @@ -43,6 +43,16 @@ static PyObject *ZipImportError; static PyObject *zip_directory_cache = NULL; +/* externs */ +#ifdef Py_UNICODE_IMPORT +extern FILE *fopen_utf8(const char *fname, const char *mode); +extern int stat_utf8(const char *fname, struct stat *buf); +extern PyObject *Import_UtoUtf8(PyObject *u); +#else +#define fopen_utf8 fopen +#define stat_utf8 stat +#endif + /* forward decls */ static PyObject *read_directory(char *archive); static PyObject *get_data(char *archive, PyObject *toc_entry); @@ -62,25 +72,39 @@ { char *path, *p, *prefix, buf[MAXPATHLEN+2]; size_t len; + PyObject *pathO, *copy = NULL; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; - +#ifdef Py_UNICODE_IMPORT + if (!PyArg_ParseTuple(args, "O!:zipimporter", &PyBaseString_Type, + &pathO)) + return -1; + copy = Import_UtoUtf8(pathO); + if (copy == NULL) + return -1; + pathO = copy; + path = PyString_AS_STRING(pathO); +#else if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) return -1; +#endif len = strlen(path); if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); + Py_XDECREF(copy); return -1; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); + Py_XDECREF(copy); return -1; } strcpy(buf, path); + Py_XDECREF(copy); #ifdef ALTSEP for (p = buf; *p; p++) { @@ -96,7 +120,7 @@ struct stat statbuf; int rv; - rv = stat(buf, &statbuf); + rv = stat_utf8(buf, &statbuf); if (rv == 0) { /* it exists */ if (S_ISREG(statbuf.st_mode)) @@ -666,7 +690,7 @@ } strcpy(path, archive); - fp = fopen(archive, "rb"); + fp = fopen_utf8(archive, "rb"); if (fp == NULL) { PyErr_Format(ZipImportError, "can't open Zip file: " "'%.200s'", archive); @@ -814,7 +838,7 @@ return NULL; } - fp = fopen(archive, "rb"); + fp = fopen_utf8(archive, "rb"); if (!fp) { PyErr_Format(PyExc_IOError, "zipimport: can not open file %s", archive); Index: Objects/moduleobject.c =================================================================== --- Objects/moduleobject.c (revision 54841) +++ Objects/moduleobject.c (working copy) @@ -184,6 +184,7 @@ { char *name; char *filename; + PyObject *filenameO; name = PyModule_GetName((PyObject *)m); if (name == NULL) { @@ -191,11 +192,23 @@ name = "?"; } filename = PyModule_GetFilename((PyObject *)m); - if (filename == NULL) { - PyErr_Clear(); - return PyString_FromFormat("", name); + if (filename != NULL) + return PyString_FromFormat("", name, filename); + PyErr_Clear(); + filenameO = PyObject_GetAttrString((PyObject *)m, "__file__"); + if (filenameO != NULL) { + PyObject *res; + PyObject *r = PyObject_Repr(filenameO); + Py_DECREF(filenameO); + if (r != NULL && PyString_Check(r)) { + res = PyString_FromFormat("", name, PyString_AS_STRING(r)); + Py_DECREF(r); + return res; + } + Py_XDECREF(r); } - return PyString_FromFormat("", name, filename); + PyErr_Clear(); + return PyString_FromFormat("", name); } /* We only need a traverse function, no clear function: If the module Index: PC/import_nt.c =================================================================== --- PC/import_nt.c (revision 54841) +++ PC/import_nt.c (working copy) @@ -20,6 +20,95 @@ char *pathBuf, Py_ssize_t pathLen) { +#ifdef Py_UNICODE_IMPORT + wchar_t *moduleKey; + const wchar_t keyPrefix[] = L"Software\\Python\\PythonCore\\"; + const wchar_t keySuffix[] = L"\\Modules\\"; +#ifdef _DEBUG + /* In debugging builds, we _must_ have the debug version + * registered. + */ + const wchar_t debugString[] = L"\\Debug"; +#else + const wchar_t debugString[] = L""; +#endif + struct filedescr *fdp = NULL; + FILE *fp; + HKEY keyBase = HKEY_CURRENT_USER; + int modNameSize; + long regStat; + wchar_t wpathBuf[MAX_PATH]; + PyObject *umode; + + /* Calculate the size for the sprintf buffer. + * Get the size of the chars only, plus 1 NULL. + */ + size_t bufSize = 1 + _scwprintf( + L"Software\\Python\\PythonCore\\%S\\Modules\\%S%s", + PyWin_DLLVersionString, moduleName, debugString); + + /* alloca == no free required, but memory only local to fn, + * also no heap fragmentation! + */ + moduleKey = alloca(bufSize * sizeof(wchar_t)); + _snwprintf(moduleKey, bufSize, + L"Software\\Python\\PythonCore\\%S\\Modules\\%S%s", + PyWin_DLLVersionString, moduleName, debugString); + + modNameSize = (int) sizeof(wpathBuf); + regStat = RegQueryValueW(keyBase, moduleKey, wpathBuf, &modNameSize); + if (regStat != ERROR_SUCCESS) { + /* No user setting - lookup in machine settings */ + keyBase = HKEY_LOCAL_MACHINE; + /* be anal - failure may have reset size param */ + modNameSize = (int)sizeof(wpathBuf); + regStat = RegQueryValueW(keyBase, moduleKey, + wpathBuf, &modNameSize); + + if (regStat != ERROR_SUCCESS) + return NULL; + } + modNameSize /= sizeof(wchar_t); + + /* use the file extension to locate the type entry. */ + for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { + PyObject *usuffix; + const wchar_t *suffix; + size_t extLen; + usuffix = PyUnicode_DecodeASCII(fdp->suffix, strlen(fdp->suffix), NULL); + assert(usuffix != NULL); + suffix = PyUnicode_AS_UNICODE(usuffix); + extLen = wcslen(suffix); + assert(modNameSize >= 0); /* else cast to size_t is wrong */ + if ((size_t)modNameSize > extLen && + wcsnicmp(wpathBuf + ((size_t)modNameSize-extLen-1), + suffix, + extLen) == 0) { + Py_DECREF(usuffix); + break; + } + Py_DECREF(usuffix); + } + if (fdp->suffix == NULL) + return NULL; + umode = PyUnicode_DecodeASCII(fdp->mode, strlen(fdp->mode), NULL); + assert(umode != NULL); + fp = _wfopen(wpathBuf, PyUnicode_AS_UNICODE(umode)); + Py_DECREF(umode); + if (fp != NULL) { + PyObject *upath; + *ppFileDesc = fdp; + /* now return the path as utf 8 */ + upath = PyUnicode_EncodeUTF8(wpathBuf, wcslen(wpathBuf), NULL); + assert(upath != NULL); + strncpy(pathBuf, PyString_AS_STRING(upath), pathLen); + pathBuf[pathLen-1] = 0; + Py_DECREF(upath); + } + return fp; + +#else /* Py_UNICODE_IMPORT */ + char *moduleKey; const char keyPrefix[] = "Software\\Python\\PythonCore\\"; const char keySuffix[] = "\\Modules\\"; @@ -83,4 +172,5 @@ if (fp != NULL) *ppFileDesc = fdp; return fp; +#endif /* Py_UNICODE_IMPORT */ } Index: PC/pyconfig.h =================================================================== --- PC/pyconfig.h (revision 54841) +++ PC/pyconfig.h (working copy) @@ -493,7 +493,10 @@ /* Define to indicate that the Python Unicode representation can be passed as-is to Win32 Wide API. */ #define Py_WIN_WIDE_FILENAMES +#ifdef Py_USING_UNICODE +#define Py_UNICODE_IMPORT #endif +#endif /* Use Python's own small-block memory-allocator. */ #define WITH_PYMALLOC 1 Index: Python/dynload_win.c =================================================================== --- Python/dynload_win.c (revision 54841) +++ Python/dynload_win.c (working copy) @@ -176,13 +176,30 @@ { HINSTANCE hDLL = NULL; - char pathbuf[260]; - LPTSTR dummy; /* We use LoadLibraryEx so Windows looks for dependent DLLs in directory of pathname first. However, Windows95 can sometimes not work correctly unless the absolute path is used. If GetFullPathName() fails, the LoadLibrary will certainly fail too, so use its error code */ +#ifdef Py_UNICODE_IMPORT + wchar_t pathbuf[260]; + wchar_t *dummy; + PyObject *upath = PyUnicode_DecodeUTF8(pathname, strlen(pathname), NULL); + if (upath == NULL) + return NULL; + + if (GetFullPathNameW(PyUnicode_AS_UNICODE(upath), + sizeof(pathbuf) / sizeof(*pathbuf), + pathbuf, + &dummy)) + /* XXX This call doesn't exist in Windows CE */ + hDLL = LoadLibraryExW(PyUnicode_AS_UNICODE(upath), + NULL, + LOAD_WITH_ALTERED_SEARCH_PATH); + Py_DECREF(upath); +#else /* Py_UNICODE_IMPORT */ + char pathbuf[260]; + char *dummy; if (GetFullPathName(pathname, sizeof(pathbuf), pathbuf, @@ -190,6 +207,7 @@ /* XXX This call doesn't exist in Windows CE */ hDLL = LoadLibraryEx(pathname, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); +#endif /* Py_UNICODE_IMPORT */ if (hDLL==NULL){ char errBuf[256]; unsigned int errorCode; Index: Python/import.c =================================================================== --- Python/import.c (revision 54841) +++ Python/import.c (working copy) @@ -232,7 +232,99 @@ _PyImport_Filetab = NULL; } +#ifdef Py_UNICODE_IMPORT +int stat_utf8(const char *path, struct stat *buffer) +{ + int r; + PyObject *upath = PyUnicode_DecodeUTF8(path, strlen(path), NULL); + if (upath == NULL) { + errno = EINVAL; + return -1; + } + assert( sizeof(struct stat) == sizeof(struct _stat64i32) ); + r = _wstat(PyUnicode_AS_UNICODE(upath), (struct _stat64i32*)buffer); + Py_DECREF(upath); + return r; +} +FILE *fopen_utf8(const char *fname, const char *mode) +{ + FILE *r; + PyObject *ufname, *umode = NULL; + ufname = PyUnicode_DecodeUTF8(fname, strlen(fname), NULL); + umode = PyUnicode_DecodeUTF8(mode, strlen(mode), NULL); + if (ufname == NULL || umode == NULL) { + errno = EINVAL; + Py_XDECREF(ufname); + Py_XDECREF(umode); + return NULL; + } + r = _wfopen(PyUnicode_AS_UNICODE(ufname), PyUnicode_AS_UNICODE(umode)); + Py_DECREF(ufname); + Py_DECREF(umode); + return r; +} + +static int open_utf8(const char *fname, int oflag, int pmode) +{ + int r; + PyObject *ufname = PyUnicode_DecodeUTF8(fname, strlen(fname), NULL); + if (ufname == NULL) { + errno = EINVAL; + return -1; + } + r = _wopen(PyUnicode_AS_UNICODE(ufname), oflag, pmode); + Py_DECREF(ufname); + return r; +} + + +PyObject *Import_Utf8toS(const char *utf8) +{ + /* a function that converts an utf8 thingy to unicode and then to file system encoding if possible + * This is for putting into the __file__ member + */ + PyObject *s; + PyObject *u = PyUnicode_DecodeUTF8(utf8, strlen(utf8), NULL); + if (!u) + return u; + s = PyUnicode_Encode(PyUnicode_AS_UNICODE(u), PyUnicode_GET_SIZE(u), + Py_FileSystemDefaultEncoding, NULL); + if (!s) { + PyErr_Clear(); + return u; /* ok, must use unicode */ + } + Py_DECREF(u); + return s; +} + + +PyObject *Import_UtoUtf8(PyObject *u) +{ + /* To create utf8 from an input path. If string, assume file system encoding */ + PyObject *copy = 0; + PyObject *result; + if (!PyObject_TypeCheck(u, &PyBaseString_Type)) + return PyErr_SetString(PyExc_TypeError, "expected string or unicode"), 0; + if (PyString_Check(u)) { + /* convert to unicode first using file system encoding */ + copy = PyUnicode_Decode(PyString_AS_STRING(u), PyString_GET_SIZE(u), + Py_FileSystemDefaultEncoding, NULL); + u = copy; + } + result = PyUnicode_AsUTF8String(u); + Py_XDECREF(copy); + return result; +} + + +#else +#define stat_utf8 stat +#define fopen_utf8 fopen +#define open_utf8 open +#define Import_Utf8ToS PyString_FromString +#endif + /* Locking primitives to prevent parallel imports of the same module in different threads to return with a partially loaded module. These calls are serialized by the global interpreter lock. */ @@ -654,7 +746,7 @@ /* Remember the filename as the __file__ attribute */ v = NULL; if (pathname != NULL) { - v = PyString_FromString(pathname); + v = Import_Utf8toS(pathname); if (v == NULL) PyErr_Clear(); } @@ -728,7 +820,7 @@ long magic; long pyc_mtime; - fp = fopen(cpathname, "rb"); + fp = fopen_utf8(cpathname, "rb"); if (fp == NULL) return NULL; magic = PyMarshal_ReadLongFromFile(fp); @@ -835,7 +927,7 @@ */ int fd; (void) unlink(filename); - fd = open(filename, O_EXCL|O_CREAT|O_WRONLY|O_TRUNC + fd = open_utf8(filename, O_EXCL|O_CREAT|O_WRONLY|O_TRUNC #ifdef O_BINARY |O_BINARY /* necessary for Windows */ #endif @@ -984,7 +1076,7 @@ PySys_WriteStderr("import %s # directory %s\n", name, pathname); d = PyModule_GetDict(m); - file = PyString_FromString(pathname); + file = Import_Utf8toS(pathname); if (file == NULL) goto error; path = Py_BuildValue("[O]", file); @@ -1182,15 +1274,29 @@ Py_DECREF(meta_path); } +#ifdef Py_UNICODE_IMPORT + if (path != NULL && PyObject_TypeCheck(path, &PyBaseString_Type)) { +#else if (path != NULL && PyString_Check(path)) { +#endif + PyObject *spath = NULL; +#ifdef Py_UNICODE_IMPORT + spath = Import_UtoUtf8(path); + if (spath == NULL) + return NULL; + path = spath; +#endif + /* The only type of submodule allowed inside a "frozen" package are other frozen modules or packages. */ if (PyString_Size(path) + 1 + strlen(name) >= (size_t)buflen) { PyErr_SetString(PyExc_ImportError, "full frozen module name too long"); + Py_XDECREF(spath); return NULL; } strcpy(buf, PyString_AsString(path)); + Py_XDECREF(spath); strcat(buf, "."); strcat(buf, name); strcpy(name, buf); @@ -1246,9 +1352,18 @@ namelen = strlen(name); for (i = 0; i < npath; i++) { PyObject *copy = NULL; - PyObject *v = PyList_GetItem(path, i); + PyObject *vOrg, *v = PyList_GetItem(path, i); if (!v) return NULL; + vOrg = v; +#ifdef Py_UNICODE_IMPORT + if (!PyObject_TypeCheck(v, &PyBaseString_Type)) + continue; + copy = Import_UtoUtf8(v); + if (!copy) + return NULL; + v = copy; +#else #ifdef Py_USING_UNICODE if (PyUnicode_Check(v)) { copy = PyUnicode_Encode(PyUnicode_AS_UNICODE(v), @@ -1261,6 +1376,7 @@ #endif if (!PyString_Check(v)) continue; +#endif len = PyString_GET_SIZE(v); if (len + 2 + namelen + MAXSUFFIXSIZE >= buflen) { Py_XDECREF(copy); @@ -1277,7 +1393,7 @@ PyObject *importer; importer = get_path_importer(path_importer_cache, - path_hooks, v); + path_hooks, vOrg); if (importer == NULL) { Py_XDECREF(copy); return NULL; @@ -1314,7 +1430,7 @@ /* Check for package import (buf holds a directory name, and there's an __init__ module in that directory */ #ifdef HAVE_STAT - if (stat(buf, &statbuf) == 0 && /* it exists */ + if (stat_utf8(buf, &statbuf) == 0 && /* it exists */ S_ISDIR(statbuf.st_mode) && /* it's a directory */ case_ok(buf, len, namelen, name)) { /* case matches */ if (find_init_module(buf)) { /* and has __init__.py */ @@ -1395,7 +1511,7 @@ filemode = fdp->mode; if (filemode[0] == 'U') filemode = "r" PY_STDIOTEXTMODE; - fp = fopen(buf, filemode); + fp = fopen_utf8(buf, filemode); if (fp != NULL) { if (case_ok(buf, len, namelen, name)) break; @@ -1504,21 +1620,51 @@ /* MS_WINDOWS */ #if defined(MS_WINDOWS) +#ifdef Py_UNICODE_IMPORT + WIN32_FIND_DATAW data; +#else WIN32_FIND_DATA data; +#endif + PyObject *uname = NULL; + PyObject *ubuf = NULL; + int result; HANDLE h; if (Py_GETENV("PYTHONCASEOK") != NULL) return 1; +#ifdef Py_UNICODE_IMPORT + ubuf = PyUnicode_DecodeUTF8(buf, strlen(buf), NULL); + if (ubuf == NULL) + return 0; + /* module names are still ASCII only */ + uname = PyUnicode_DecodeASCII(name, strlen(name), NULL); + if (uname == NULL) { + Py_DECREF(ubuf); + return 0; + } + h = FindFirstFileW(PyUnicode_AS_UNICODE(ubuf), &data); + Py_DECREF(ubuf); + ubuf = 0; + +#else h = FindFirstFile(buf, &data); +#endif if (h == INVALID_HANDLE_VALUE) { PyErr_Format(PyExc_NameError, "Can't find file for module %.100s\n(filename %.300s)", name, buf); + Py_XDECREF(uname); return 0; } FindClose(h); - return strncmp(data.cFileName, name, namelen) == 0; +#ifdef Py_UNICODE_IMPORT + result = wcsncmp(data.cFileName, PyUnicode_AS_UNICODE(uname), namelen) == 0; +#else + result = strncmp(data.cFileName, name, namelen) == 0; +#endif + Py_XDECREF(uname); + return result; /* DJGPP */ #elif defined(DJGPP) @@ -1654,7 +1800,7 @@ buf[i++] = SEP; pname = buf + i; strcpy(pname, "__init__.py"); - if (stat(buf, &statbuf) == 0) { + if (stat_utf8(buf, &statbuf) == 0) { if (case_ok(buf, save_len + 9, /* len("/__init__") */ 8, /* len("__init__") */ @@ -1665,7 +1811,7 @@ } i += strlen(pname); strcpy(buf+i, Py_OptimizeFlag ? "o" : "c"); - if (stat(buf, &statbuf) == 0) { + if (stat_utf8(buf, &statbuf) == 0) { if (case_ok(buf, save_len + 9, /* len("/__init__") */ 8, /* len("__init__") */ Index: Python/importdl.c =================================================================== --- Python/importdl.c (revision 54841) +++ Python/importdl.c (working copy) @@ -18,10 +18,17 @@ +#ifdef Py_UNICODE_IMPORT +extern PyObject *Import_Utf8toS(const char *utf8); +#else +#define Import_Utf8ToS PyString_FromString +#endif + PyObject * _PyImport_LoadDynamicModule(char *name, char *pathname, FILE *fp) { PyObject *m; + PyObject *path; char *lastdot, *shortname, *packagecontext, *oldcontext; dl_funcptr p; @@ -62,7 +69,8 @@ return NULL; } /* Remember the filename as the __file__ attribute */ - if (PyModule_AddStringConstant(m, "__file__", pathname) < 0) + path = Import_Utf8toS(pathname); + if (PyModule_AddObject(m, "__file__", path) < 0) PyErr_Clear(); /* Not important enough to report */ if (_PyImport_FixupExtension(name, pathname) == NULL)