Index: Modules/zipimport.c =================================================================== --- Modules/zipimport.c (revision 51747) +++ Modules/zipimport.c (working copy) @@ -43,6 +43,15 @@ static PyObject *ZipImportError; static PyObject *zip_directory_cache = NULL; +/* externs */ +#ifdef Py_UNICODE_IMPORT +extern FILE *fopen_utf8(const char *fname, const char *mode); +extern int stat_utf8(const char *fname, struct stat *buf); +#else +#define fopen_utf8 fopen +#define stat_utf8 stat +#endif + /* forward decls */ static PyObject *read_directory(char *archive); static PyObject *get_data(char *archive, PyObject *toc_entry); @@ -62,25 +71,52 @@ { char *path, *p, *prefix, buf[MAXPATHLEN+2]; size_t len; + PyObject *pathO, *copy = NULL; if (!_PyArg_NoKeywords("zipimporter()", kwds)) return -1; +#ifdef Py_UNICODE_IMPORT + if (!PyArg_ParseTuple(args, "O!:zipimporter", &PyBaseString_Type, + &pathO)) + return -1; + if (PyString_Check(pathO)) { + /* decode using default encoding */ + PyObject *copy = PyUnicode_FromObject(pathO); + if (copy == NULL) + return -1; + pathO = copy; + } + /* turn unicode into utf8 */ + assert(PyUnicode_Check(pathO)); + { + PyObject *copy2 = PyUnicode_AsUTF8String(pathO); + Py_XDECREF(copy); + copy = copy2; + } + if (copy == NULL) + return -1; + path = PyString_AS_STRING(copy); +#else if (!PyArg_ParseTuple(args, "s:zipimporter", &path)) return -1; +#endif len = strlen(path); if (len == 0) { PyErr_SetString(ZipImportError, "archive path is empty"); + Py_XDECREF(copy); return -1; } if (len >= MAXPATHLEN) { PyErr_SetString(ZipImportError, "archive path too long"); + Py_XDECREF(copy); return -1; } strcpy(buf, path); + Py_XDECREF(copy); #ifdef ALTSEP for (p = buf; *p; p++) { @@ -96,7 +132,7 @@ struct stat statbuf; int rv; - rv = stat(buf, &statbuf); + rv = stat_utf8(buf, &statbuf); if (rv == 0) { /* it exists */ if (S_ISREG(statbuf.st_mode)) @@ -130,7 +166,7 @@ if (files == NULL) return -1; if (PyDict_SetItemString(zip_directory_cache, path, - files) != 0) + files) != 0) return -1; } else @@ -157,7 +193,7 @@ self->archive = PyString_FromString(buf); if (self->archive == NULL) return -1; - + self->prefix = PyString_FromString(prefix); if (self->prefix == NULL) return -1; @@ -666,7 +702,7 @@ } strcpy(path, archive); - fp = fopen(archive, "rb"); + fp = fopen_utf8(archive, "rb"); if (fp == NULL) { PyErr_Format(ZipImportError, "can't open Zip file: " "'%.200s'", archive); @@ -814,7 +850,7 @@ return NULL; } - fp = fopen(archive, "rb"); + fp = fopen_utf8(archive, "rb"); if (!fp) { PyErr_Format(PyExc_IOError, "zipimport: can not open file %s", archive); Index: Objects/moduleobject.c =================================================================== --- Objects/moduleobject.c (revision 51747) +++ Objects/moduleobject.c (working copy) @@ -184,6 +184,7 @@ { char *name; char *filename; + PyObject *filenameO; name = PyModule_GetName((PyObject *)m); if (name == NULL) { @@ -191,11 +192,21 @@ name = "?"; } filename = PyModule_GetFilename((PyObject *)m); - if (filename == NULL) { - PyErr_Clear(); - return PyString_FromFormat("", name); + if (filename != NULL) + return PyString_FromFormat("", name, filename); + PyErr_Clear(); + filenameO = PyObject_GetAttrString((PyObject *)m, "__file__"); + if (filenameO != NULL) { + PyObject *res; + PyObject *r = PyObject_Repr(filenameO); + if (r != NULL && PyString_Check(r)) { + res = PyString_FromFormat("", name, PyString_AS_STRING(r)); + Py_DECREF(r); + return res; + } } - return PyString_FromFormat("", name, filename); + PyErr_Clear(); + return PyString_FromFormat("", name); } /* We only need a traverse function, no clear function: If the module Index: PC/import_nt.c =================================================================== --- PC/import_nt.c (revision 51747) +++ PC/import_nt.c (working copy) @@ -20,6 +20,95 @@ char *pathBuf, Py_ssize_t pathLen) { +#ifdef Py_UNICODE_IMPORT + wchar_t *moduleKey; + const wchar_t keyPrefix[] = L"Software\\Python\\PythonCore\\"; + const wchar_t keySuffix[] = L"\\Modules\\"; +#ifdef _DEBUG + /* In debugging builds, we _must_ have the debug version + * registered. + */ + const wchar_t debugString[] = L"\\Debug"; +#else + const wchar_t debugString[] = L""; +#endif + struct filedescr *fdp = NULL; + FILE *fp; + HKEY keyBase = HKEY_CURRENT_USER; + int modNameSize; + long regStat; + wchar_t wpathBuf[MAX_PATH]; + PyObject *umode; + + /* Calculate the size for the sprintf buffer. + * Get the size of the chars only, plus 1 NULL. + */ + size_t bufSize = 1 + _scwprintf( + L"Software\\Python\\PythonCore\\%S\\Modules\\%S%s", + PyWin_DLLVersionString, moduleName, debugString); + + /* alloca == no free required, but memory only local to fn, + * also no heap fragmentation! + */ + moduleKey = alloca(bufSize); + _snwprintf(moduleKey, bufSize, + L"Software\\Python\\PythonCore\\%S\\Modules\\%S%s", + PyWin_DLLVersionString, moduleName, debugString); + + modNameSize = (int) sizeof(wpathBuf); + regStat = RegQueryValueW(keyBase, moduleKey, wpathBuf, &modNameSize); + if (regStat != ERROR_SUCCESS) { + /* No user setting - lookup in machine settings */ + keyBase = HKEY_LOCAL_MACHINE; + /* be anal - failure may have reset size param */ + modNameSize = (int)sizeof(wpathBuf); + regStat = RegQueryValueW(keyBase, moduleKey, + wpathBuf, &modNameSize); + + if (regStat != ERROR_SUCCESS) + return NULL; + } + modNameSize /= sizeof(wchar_t); + + /* use the file extension to locate the type entry. */ + for (fdp = _PyImport_Filetab; fdp->suffix != NULL; fdp++) { + PyObject *usuffix; + const wchar_t *suffix; + size_t extLen; + usuffix = PyUnicode_DecodeASCII(fdp->suffix, strlen(fdp->suffix), NULL); + assert(usuffix != NULL); + suffix = PyUnicode_AS_UNICODE(usuffix); + extLen = wcslen(suffix); + assert(modNameSize >= 0); /* else cast to size_t is wrong */ + if ((size_t)modNameSize > extLen && + wcsnicmp(wpathBuf + ((size_t)modNameSize-extLen-1), + suffix, + extLen) == 0) { + Py_DECREF(usuffix); + break; + } + Py_DECREF(usuffix); + } + if (fdp->suffix == NULL) + return NULL; + umode = PyUnicode_DecodeASCII(fdp->mode, strlen(fdp->mode), NULL); + assert(umode != NULL); + fp = _wfopen(wpathBuf, PyUnicode_AS_UNICODE(umode)); + Py_DECREF(umode); + if (fp != NULL) { + PyObject *upath; + *ppFileDesc = fdp; + /* now return the path as utf 8 */ + upath = PyUnicode_EncodeUTF8(wpathBuf, wcslen(wpathBuf), NULL); + assert(upath != NULL); + strncpy(pathBuf, PyString_AS_STRING(upath), pathLen); + pathBuf[pathLen-1] = 0; + Py_DECREF(upath); + } + return fp; + +#else /* Py_UNICODE_IMPORT */ + char *moduleKey; const char keyPrefix[] = "Software\\Python\\PythonCore\\"; const char keySuffix[] = "\\Modules\\"; @@ -83,4 +172,5 @@ if (fp != NULL) *ppFileDesc = fdp; return fp; +#endif /* Py_UNICODE_IMPORT */ } Index: PC/pyconfig.h =================================================================== --- PC/pyconfig.h (revision 51747) +++ PC/pyconfig.h (working copy) @@ -489,7 +489,10 @@ /* Define to indicate that the Python Unicode representation can be passed as-is to Win32 Wide API. */ #define Py_WIN_WIDE_FILENAMES +#ifdef Py_USING_UNICODE +#define Py_UNICODE_IMPORT #endif +#endif /* Use Python's own small-block memory-allocator. */ #define WITH_PYMALLOC 1 Index: Python/dynload_win.c =================================================================== --- Python/dynload_win.c (revision 51747) +++ Python/dynload_win.c (working copy) @@ -176,13 +176,31 @@ { HINSTANCE hDLL = NULL; - char pathbuf[260]; - LPTSTR dummy; + /* We use LoadLibraryEx so Windows looks for dependent DLLs in directory of pathname first. However, Windows95 can sometimes not work correctly unless the absolute path is used. If GetFullPathName() fails, the LoadLibrary will certainly fail too, so use its error code */ +#ifdef Py_UNICODE_IMPORT + wchar_t pathbuf[260]; + wchar_t *dummy; + PyObject *upath = PyUnicode_DecodeUTF8(pathname, strlen(pathname), NULL); + if (upath == NULL) + return NULL; + + if (GetFullPathNameW(PyUnicode_AS_UNICODE(upath), + sizeof(pathbuf) / sizeof(*pathbuf), + pathbuf, + &dummy)) + /* XXX This call doesn't exist in Windows CE */ + hDLL = LoadLibraryExW(PyUnicode_AS_UNICODE(upath), + NULL, + LOAD_WITH_ALTERED_SEARCH_PATH); + Py_DECREF(upath); +#else /* Py_UNICODE_IMPORT */ + char pathbuf[260]; + char *dummy; if (GetFullPathName(pathname, sizeof(pathbuf), pathbuf, @@ -190,6 +208,7 @@ /* XXX This call doesn't exist in Windows CE */ hDLL = LoadLibraryEx(pathname, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); +#endif /* Py_UNICODE_IMPORT */ if (hDLL==NULL){ char errBuf[256]; unsigned int errorCode; Index: Python/import.c =================================================================== --- Python/import.c (revision 51747) +++ Python/import.c (working copy) @@ -232,7 +232,61 @@ _PyImport_Filetab = NULL; } +#ifdef Py_UNICODE_IMPORT +int stat_utf8(const char *path, struct stat *buffer) +{ + int r; + PyObject *upath = PyUnicode_DecodeUTF8(path, strlen(path), NULL); + if (upath == NULL) { + errno = EINVAL; + return -1; + } + assert( sizeof(struct stat) == sizeof(struct _stat64i32) ); + r = _wstat(PyUnicode_AS_UNICODE(upath), (struct _stat64i32*)buffer); + Py_DECREF(upath); + return r; +} +FILE *fopen_utf8(const char *fname, const char *mode) +{ + FILE *r; + PyObject *ufname, *umode = NULL; + ufname = PyUnicode_DecodeUTF8(fname, strlen(fname), NULL); + if (ufname != NULL) + umode = PyUnicode_DecodeUTF8(mode, strlen(mode), NULL); + if (ufname == NULL && umode == NULL) { + errno = EINVAL; + Py_XDECREF(ufname); + Py_XDECREF(umode); + return NULL; + } + r = _wfopen(PyUnicode_AS_UNICODE(ufname), PyUnicode_AS_UNICODE(umode)); + Py_DECREF(ufname); + Py_DECREF(umode); + return r; +} + +static int open_utf8(const char *fname, int oflag, int pmode) +{ + int r; + PyObject *ufname = PyUnicode_DecodeUTF8(fname, strlen(fname), NULL); + if (ufname == NULL) { + errno = EINVAL; + return -1; + } + r = _wopen(PyUnicode_AS_UNICODE(ufname), oflag, pmode); + Py_DECREF(ufname); + return r; +} + + +#else +#define stat_utf8 stat +#define fopen_utf8 fopen +#define open_utf8 open +#endif + + /* Locking primitives to prevent parallel imports of the same module in different threads to return with a partially loaded module. These calls are serialized by the global interpreter lock. */ @@ -643,7 +697,11 @@ /* Remember the filename as the __file__ attribute */ v = NULL; if (pathname != NULL) { +#ifdef Py_UNICODE_IMPORT + v = PyUnicode_DecodeUTF8(pathname, strlen(pathname), NULL); +#else v = PyString_FromString(pathname); +#endif if (v == NULL) PyErr_Clear(); } @@ -717,7 +775,7 @@ long magic; long pyc_mtime; - fp = fopen(cpathname, "rb"); + fp = fopen_utf8(cpathname, "rb"); if (fp == NULL) return NULL; magic = PyMarshal_ReadLongFromFile(fp); @@ -822,7 +880,7 @@ */ int fd; (void) unlink(filename); - fd = open(filename, O_EXCL|O_CREAT|O_WRONLY|O_TRUNC + fd = open_utf8(filename, O_EXCL|O_CREAT|O_WRONLY|O_TRUNC #ifdef O_BINARY |O_BINARY /* necessary for Windows */ #endif @@ -837,7 +895,7 @@ return fdopen(fd, "wb"); #else /* Best we can do -- on Windows this can't happen anyway */ - return fopen(filename, "wb"); + return fopen_u(filename, "wb"); #endif } @@ -971,7 +1029,11 @@ PySys_WriteStderr("import %s # directory %s\n", name, pathname); d = PyModule_GetDict(m); +#ifdef Py_UNICODE_IMPORT + file = PyUnicode_DecodeUTF8(pathname, strlen(pathname), NULL); +#else file = PyString_FromString(pathname); +#endif if (file == NULL) goto error; path = Py_BuildValue("[O]", file); @@ -1169,15 +1231,31 @@ Py_DECREF(meta_path); } +#ifdef Py_UNICODE_IMPORT + if (path && PyObject_TypeCheck(path, &PyBaseString_Type)) { +#else if (path != NULL && PyString_Check(path)) { +#endif + PyObject *spath = NULL; +#ifdef Py_UNICODE_IMPORT + if (PyUnicode_Check(path)) { + spath = PyUnicode_AsUTF8String(path); + if (!spath) + return NULL; + path = spath; + } +#endif + /* The only type of submodule allowed inside a "frozen" package are other frozen modules or packages. */ if (PyString_Size(path) + 1 + strlen(name) >= (size_t)buflen) { PyErr_SetString(PyExc_ImportError, "full frozen module name too long"); + Py_XDECREF(spath); return NULL; } strcpy(buf, PyString_AsString(path)); + Py_XDECREF(spath); strcat(buf, "."); strcat(buf, name); strcpy(name, buf); @@ -1233,9 +1311,30 @@ namelen = strlen(name); for (i = 0; i < npath; i++) { PyObject *copy = NULL; - PyObject *v = PyList_GetItem(path, i); + PyObject *vOrg, *v = PyList_GetItem(path, i); if (!v) return NULL; + vOrg = v; +#ifdef Py_UNICODE_IMPORT + if (PyString_Check(v)) { + //convert to unicode first using default encoding + copy = PyUnicode_FromObject(v); + if (copy == NULL) + return NULL; + v = copy; + } + if (PyUnicode_Check(v)) { + //now convert unicode to utf8, used henceforth internally + PyObject *copy2; + copy2 = PyUnicode_AsUTF8String(v); + Py_XDECREF(copy); + copy = copy2; + if (copy == NULL) + return NULL; + v = copy; + } + else +#else #ifdef Py_USING_UNICODE if (PyUnicode_Check(v)) { copy = PyUnicode_Encode(PyUnicode_AS_UNICODE(v), @@ -1246,6 +1345,7 @@ } else #endif +#endif if (!PyString_Check(v)) continue; len = PyString_GET_SIZE(v); @@ -1264,7 +1364,7 @@ PyObject *importer; importer = get_path_importer(path_importer_cache, - path_hooks, v); + path_hooks, vOrg); if (importer == NULL) { Py_XDECREF(copy); return NULL; @@ -1301,7 +1401,7 @@ /* Check for package import (buf holds a directory name, and there's an __init__ module in that directory */ #ifdef HAVE_STAT - if (stat(buf, &statbuf) == 0 && /* it exists */ + if (stat_utf8(buf, &statbuf) == 0 && /* it exists */ S_ISDIR(statbuf.st_mode) && /* it's a directory */ case_ok(buf, len, namelen, name)) { /* case matches */ if (find_init_module(buf)) { /* and has __init__.py */ @@ -1382,7 +1482,7 @@ filemode = fdp->mode; if (filemode[0] == 'U') filemode = "r" PY_STDIOTEXTMODE; - fp = fopen(buf, filemode); + fp = fopen_utf8(buf, filemode); if (fp != NULL) { if (case_ok(buf, len, namelen, name)) break; @@ -1491,21 +1591,46 @@ /* MS_WINDOWS */ #if defined(MS_WINDOWS) +#ifdef Py_UNICODE_IMPORT + WIN32_FIND_DATAW data; +#else WIN32_FIND_DATA data; +#endif + PyObject *uname = NULL; + PyObject *ubuf = NULL; HANDLE h; if (Py_GETENV("PYTHONCASEOK") != NULL) return 1; +#ifdef Py_UNICODE_IMPORT + ubuf = PyUnicode_DecodeUTF8(buf, strlen(buf), NULL); + if (ubuf == NULL) + return 0; + uname = PyUnicode_DecodeASCII(name, strlen(name), NULL); + if (uname == NULL) { + Py_DECREF(ubuf); + return 0; + } + h = FindFirstFileW(PyUnicode_AS_UNICODE(ubuf), &data); + Py_DECREF(ubuf); + +#else h = FindFirstFile(buf, &data); +#endif if (h == INVALID_HANDLE_VALUE) { PyErr_Format(PyExc_NameError, "Can't find file for module %.100s\n(filename %.300s)", name, buf); + Py_XDECREF(uname); return 0; } FindClose(h); +#ifdef Py_UNICODE_IMPORT + return wcsncmp(data.cFileName, PyUnicode_AS_UNICODE(uname), namelen) == 0; +#else return strncmp(data.cFileName, name, namelen) == 0; +#endif /* DJGPP */ #elif defined(DJGPP) @@ -1641,7 +1766,7 @@ buf[i++] = SEP; pname = buf + i; strcpy(pname, "__init__.py"); - if (stat(buf, &statbuf) == 0) { + if (stat_utf8(buf, &statbuf) == 0) { if (case_ok(buf, save_len + 9, /* len("/__init__") */ 8, /* len("__init__") */ @@ -1652,7 +1777,7 @@ } i += strlen(pname); strcpy(buf+i, Py_OptimizeFlag ? "o" : "c"); - if (stat(buf, &statbuf) == 0) { + if (stat_utf8(buf, &statbuf) == 0) { if (case_ok(buf, save_len + 9, /* len("/__init__") */ 8, /* len("__init__") */ Index: Python/importdl.c =================================================================== --- Python/importdl.c (revision 51747) +++ Python/importdl.c (working copy) @@ -22,6 +22,7 @@ _PyImport_LoadDynamicModule(char *name, char *pathname, FILE *fp) { PyObject *m; + PyObject *path; char *lastdot, *shortname, *packagecontext, *oldcontext; dl_funcptr p; @@ -62,7 +63,12 @@ return NULL; } /* Remember the filename as the __file__ attribute */ - if (PyModule_AddStringConstant(m, "__file__", pathname) < 0) +#ifdef Py_UNICODE_IMPORT + path = PyUnicode_DecodeUTF8(pathname, strlen(pathname), NULL); +#else + path = PyString_FromString(pathname); +#endif + if (PyModule_AddObject(m, "__file__", path) < 0) PyErr_Clear(); /* Not important enough to report */ if (_PyImport_FixupExtension(name, pathname) == NULL)