Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 58477) +++ Python/pythonrun.c (working copy) @@ -51,6 +51,7 @@ /* Forward */ static void initmain(void); static void initsite(void); +static int initstdio(void); static PyObject *run_mod(mod_ty, const char *, PyObject *, PyObject *, PyCompilerFlags *, PyArena *); static PyObject *run_pyc_file(FILE *, const char *, PyObject *, PyObject *, @@ -241,6 +242,9 @@ initsigs(); /* Signal handling stuff, including initintr() */ initmain(); /* Module __main__ */ + if (initstdio() < 0) + Py_FatalError( + "Py_Initialize: can't initialize sys standard streams"); if (!Py_NoSiteFlag) initsite(); /* Module site */ @@ -676,6 +680,84 @@ } } +/* Initialize sys.stdin, stdout, stderr and __builtin__.open */ +static int +initstdio(void) +{ + PyObject *iomod=NULL, *open, *wrapper; + PyObject *bimod=NULL; + PyObject *m; + PyObject *std=NULL; + int status = 0; + + /* Hack to avoid a nasty recursion issue when Python is invoked + in verbose mode: pre-import the Latin-1 and UTF-8 codecs */ + if ((m = PyImport_ImportModule("encodings.utf_8")) == NULL) { + goto error; + } + Py_DECREF(m); + + if (!(m = PyImport_ImportModule("encodings.latin_1"))) { + goto error; + } + Py_DECREF(m); + + if (!(bimod = PyImport_ImportModule("__builtin__"))) { + goto error; + } + + if (!(iomod = PyImport_ImportModule("io"))) { + goto error; + } + if (!(wrapper = PyObject_GetAttrString(iomod, "OpenWrapper"))) { + goto error; + } + if (!(open = PyObject_GetAttrString(iomod, "open"))) { + goto error; + } + + /* Set __builtin__.open */ + if (PyObject_SetAttrString(bimod, "open", wrapper) == -1) { + goto error; + } + + /* Set sys.stdin */ + if (!(std = PyFile_FromFileEx(stdin, "", "r", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stdin__", std); + PySys_SetObject("stdin", std); + Py_DECREF(std); + + /* Set sys.stdout */ + if (!(std = PyFile_FromFileEx(stdout, "", "w", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stdout__", std); + PySys_SetObject("stdout", std); + Py_DECREF(std); + + /* Set sys.stderr */ + if (!(std = PyFile_FromFileEx(stderr, "", "w", fclose, -1, + NULL, "\n"))) { + goto error; + } + PySys_SetObject("__stderr__", std); + PySys_SetObject("stderr", std); + Py_DECREF(std); + + if (0) { + error: + status = -1; + } + + Py_XDECREF(bimod); + Py_XDECREF(iomod); + return status; +} + /* Parse input from a file and execute it */ int @@ -1146,10 +1228,10 @@ int err = 0; PyObject *f = PySys_GetObject("stderr"); Py_INCREF(value); - if (f == NULL) + if (f == NULL) { _PyObject_Dump(value); - if (f == NULL) fprintf(stderr, "lost sys.stderr\n"); + } else { fflush(stdout); if (tb && tb != Py_None) @@ -1589,6 +1671,9 @@ Py_FatalError(const char *msg) { fprintf(stderr, "Fatal Python error: %s\n", msg); + if (PyErr_Occurred()) { + PyErr_Print(); + } #ifdef MS_WINDOWS OutputDebugString("Fatal Python error: "); OutputDebugString(msg); Index: Python/import.c =================================================================== --- Python/import.c (revision 58477) +++ Python/import.c (working copy) @@ -91,6 +91,9 @@ /* This table is defined in config.c: */ extern struct _inittab _PyImport_Inittab[]; +/* Method from Parser/tokenizer.c */ +extern const char * PyTokenizer_FindEncoding(FILE *fp); + struct _inittab *PyImport_Inittab = _PyImport_Inittab; /* these tables define the module suffixes that Python recognizes */ @@ -2558,6 +2561,7 @@ struct filedescr *fdp; char pathname[MAXPATHLEN+1]; FILE *fp = NULL; + const char *encoding = NULL; pathname[0] = '\0'; if (path == Py_None) @@ -2566,7 +2570,11 @@ if (fdp == NULL) return NULL; if (fp != NULL) { - fob = PyFile_FromFile(fp, pathname, fdp->mode, fclose); + encoding = PyTokenizer_FindEncoding(fp); + encoding = (encoding != NULL) ? encoding : + PyUnicode_GetDefaultEncoding(); + fob = PyFile_FromFileEx(fp, pathname, fdp->mode, fclose, + -1, (char*)encoding, NULL); if (fob == NULL) { fclose(fp); return NULL; Index: Include/fileobject.h =================================================================== --- Include/fileobject.h (revision 58477) +++ Include/fileobject.h (working copy) @@ -9,6 +9,9 @@ #define PY_STDIOTEXTMODE "b" PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, int (*)(FILE*)); +PyAPI_FUNC(PyObject *) PyFile_FromFileEx(FILE *, char *, char *, + int (*)(FILE *), int, char *, + char *); PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); Index: Objects/fileobject.c =================================================================== --- Objects/fileobject.c (revision 58477) +++ Objects/fileobject.c (working copy) @@ -28,22 +28,32 @@ PyObject * PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *)) { - PyObject *io, *stream, *nameobj; + return PyFile_FromFileEx(fp, name, mode, close, -1, NULL, NULL); +} +PyObject * +PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), + int buffering, char *encoding, char *newline) +{ + PyObject *io, *stream, *nameobj=NULL; + io = PyImport_ImportModule("io"); if (io == NULL) return NULL; - stream = PyObject_CallMethod(io, "open", "is", fileno(fp), mode); - Py_DECREF(io); + stream = PyObject_CallMethod(io, "open", "isiss", fileno(fp), mode, + buffering, encoding, newline); + Py_DECREF(io); if (stream == NULL) return NULL; - nameobj = PyUnicode_FromString(name); - if (nameobj == NULL) - PyErr_Clear(); - else { - if (PyObject_SetAttrString(stream, "name", nameobj) < 0) + if (name != NULL) { + nameobj = PyUnicode_FromString(name); + if (nameobj == NULL) PyErr_Clear(); - Py_DECREF(nameobj); + else { + if (PyObject_SetAttrString(stream, "name", nameobj) < 0) + PyErr_Clear(); + Py_DECREF(nameobj); + } } return stream; } Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (revision 58477) +++ Parser/tokenizer.c (working copy) @@ -1601,8 +1601,29 @@ } #endif - +/* Get -*- encoding -*- from a Python file + PyTokenizer_FindEncoding returns NULL when it can't find the encoding in + the first or second line of the file. In this case the encoding is + PyUnicode_GetDefaultEncoding(). +*/ +const char * +PyTokenizer_FindEncoding(FILE *fp) { + struct tok_state *tok; + char *p_start=NULL, *p_end=NULL; + + if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) { + rewind(fp); + return NULL; + } + while((tok->lineno <= 2)) { + PyTokenizer_Get(tok, &p_start, &p_end); + } + + rewind(fp); + return (const char *)tok->encoding; +} + #ifdef Py_DEBUG void Index: Parser/tokenizer.h =================================================================== --- Parser/tokenizer.h (revision 58477) +++ Parser/tokenizer.h (working copy) @@ -67,6 +67,7 @@ extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); +extern const char * PyTokenizer_FindEncoding(FILE *fp); #ifdef __cplusplus } Index: Doc/c-api/concrete.rst =================================================================== --- Doc/c-api/concrete.rst (revision 58477) +++ Doc/c-api/concrete.rst (working copy) @@ -2425,7 +2425,13 @@ pointer, *fp*. The function *close* will be called when the file should be closed. Return *NULL* on failure. +.. cfunction:: PyFile_FromFileEx(FILE *fp, char *name, char *mode, int (*close)(FILE *), int buffering, char *encoding, char *newline) + Create a new :ctype:`PyFileObject` from the already-open standard C file + pointer, *fp*. The functions works similar to *PyFile_FromFile* but takes + optional arguments for *buffering*, *encoding* and *newline*. Use -1 resp. + *NULL* for default values. + .. cfunction:: FILE* PyFile_AsFile(PyObject *p) Return the file object associated with *p* as a :ctype:`FILE\*`. Index: Doc/data/refcounts.dat =================================================================== --- Doc/data/refcounts.dat (revision 58477) +++ Doc/data/refcounts.dat (working copy) @@ -338,6 +338,15 @@ PyFile_FromFile:char*:mode:: PyFile_FromFile:int(*:close):: +PyFile_FromFileEx:PyObject*::+1: +PyFile_FromFileEx:FILE*:fp:: +PyFile_FromFileEx:char*:name:: +PyFile_FromFileEx:char*:mode:: +PyFile_FromFileEx:int(*:close):: +PyFile_FromFileEx:int:buffering:: +PyFile_FromFileEx:char*:encoding:: +PyFile_FromFileEx:char*:newline:: + PyFile_FromString:PyObject*::+1: PyFile_FromString:char*:name:: PyFile_FromString:char*:mode:: Index: Lib/site.py =================================================================== --- Lib/site.py (revision 58477) +++ Lib/site.py (working copy) @@ -402,23 +402,6 @@ (err.__class__.__name__, err)) -def installnewio(): - """Install new I/O library as default.""" - import io - # Hack to avoid a nasty recursion issue when Python is invoked - # in verbose mode: pre-import the Latin-1 and UTF-8 codecs - from encodings import latin_1, utf_8 - # Trick so that open won't become a bound method when stored - # as a class variable (as dumbdbm does) - class open: - def __new__(cls, *args, **kwds): - return io.open(*args, **kwds) - __builtin__.open = open - sys.__stdin__ = sys.stdin = io.open(0, "r", newline='\n') - sys.__stdout__ = sys.stdout = io.open(1, "w", newline='\n') - sys.__stderr__ = sys.stderr = io.open(2, "w", newline='\n') - - def main(): abs__file__() paths_in_sys = removeduppaths() @@ -433,7 +416,6 @@ sethelper() aliasmbcs() setencoding() - installnewio() execsitecustomize() # Remove sys.setdefaultencoding() so that users cannot change the # encoding after initialization. The test for presence is needed when Index: Lib/io.py =================================================================== --- Lib/io.py (revision 58477) +++ Lib/io.py (working copy) @@ -178,6 +178,18 @@ return text +class OpenWrapper: + """Wrapper for __builtin__.open + + Trick so that open won't become a bound method when stored + as a class variable (as dumbdbm does). + + see initstdio() in Python/pythonrun.c + """ + def __new__(cls, *args, **kwargs): + return open(*args, **kwargs) + + class UnsupportedOperation(ValueError, IOError): pass Index: Lib/test/test_imp.py =================================================================== --- Lib/test/test_imp.py (revision 58477) +++ Lib/test/test_imp.py (working copy) @@ -38,9 +38,16 @@ self.fail("release_lock() without lock should raise " "RuntimeError") +class ImportTests(unittest.TestCase): + + def test_find_module_encoding(self): + fd = imp.find_module("heapq")[0] + self.assertEqual(fd.encoding, "iso-8859-1") + def test_main(): test_support.run_unittest( LockTests, + ImportTests, ) if __name__ == "__main__":