Index: Parser/tokenizer.c =================================================================== --- Parser/tokenizer.c (révision 87441) +++ Parser/tokenizer.c (copie de travail) @@ -1677,17 +1677,8 @@ return result; } -/* Get -*- encoding -*- from a Python file. - - PyTokenizer_FindEncoding returns NULL when it can't find the encoding in - the first or second line of the file (in which case the encoding - should be assumed to be PyUnicode_GetDefaultEncoding()). - - The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed - by the caller. -*/ char * -PyTokenizer_FindEncoding(int fd) +PyTokenizer_FindEncodingFilename(int fd, const char *filename) { struct tok_state *tok; FILE *fp; @@ -1706,6 +1697,10 @@ fclose(fp); return NULL; } + if (filename) + tok->filename = filename; + else + tok->filename = ""; while (tok->lineno < 2 && tok->done == E_OK) { PyTokenizer_Get(tok, &p_start, &p_end); } @@ -1713,12 +1708,18 @@ if (tok->encoding) { encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1); if (encoding) - strcpy(encoding, tok->encoding); + strcpy(encoding, tok->encoding); } PyTokenizer_Free(tok); return encoding; } +char * +PyTokenizer_FindEncoding(int fd) +{ + return PyTokenizer_FindEncodingFilename(fd, NULL); +} + #ifdef Py_DEBUG void Index: Parser/tokenizer.h =================================================================== --- Parser/tokenizer.h (révision 87441) +++ Parser/tokenizer.h (copie de travail) @@ -69,8 +69,20 @@ extern int PyTokenizer_Get(struct tok_state *, char **, char **); extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset); -extern char * PyTokenizer_FindEncoding(int); +/* Get the encoding of a Python file. Check for the coding cookie and check if + the file starts with a BOM. + + PyTokenizer_FindEncodingFilename() returns NULL when it can't find the + encoding in the first or second line of the file (in which case the encoding + should be assumed to be UTF-8). + + The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed + by the caller. +*/ +extern char* PyTokenizer_FindEncodingFilename(int fd, const char* filename); +extern char* PyTokenizer_FindEncoding(int fd); + #ifdef __cplusplus } #endif Index: Python/traceback.c =================================================================== --- Python/traceback.c (révision 87441) +++ Python/traceback.c (copie de travail) @@ -13,9 +13,6 @@ #define OFF(x) offsetof(PyTracebackObject, x) -/* Method from Parser/tokenizer.c */ -extern char * PyTokenizer_FindEncoding(int); - static PyObject * tb_dir(PyTracebackObject *self) { @@ -226,6 +223,7 @@ char buf[MAXPATHLEN+1]; Py_UNICODE *u, *p; Py_ssize_t len; + char *filename_utf8; /* open the file */ if (filename == NULL) @@ -246,7 +244,10 @@ /* use the right encoding to decode the file as unicode */ fd = PyObject_AsFileDescriptor(binary); - found_encoding = PyTokenizer_FindEncoding(fd); + filename_utf8 = _PyUnicode_AsString(filename); + if (filename_utf8 == NULL) + PyErr_Clear(); + found_encoding = PyTokenizer_FindEncodingFilename(fd, filename_utf8); encoding = (found_encoding != NULL) ? found_encoding : "utf-8"; lseek(fd, 0, 0); /* Reset position */ fob = PyObject_CallMethod(io, "TextIOWrapper", "Os", binary, encoding); Index: Python/import.c =================================================================== --- Python/import.c (révision 87441) +++ Python/import.c (copie de travail) @@ -123,9 +123,6 @@ /* This table is defined in config.c: */ extern struct _inittab _PyImport_Inittab[]; -/* Method from Parser/tokenizer.c */ -extern char * PyTokenizer_FindEncoding(int); - struct _inittab *PyImport_Inittab = _PyImport_Inittab; /* these tables define the module suffixes that Python recognizes */ @@ -3174,9 +3171,9 @@ } if (fd != -1) { if (strchr(fdp->mode, 'b') == NULL) { - /* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed + /* PyTokenizer_FindEncodingFilename() returns PyMem_MALLOC'ed memory. */ - found_encoding = PyTokenizer_FindEncoding(fd); + found_encoding = PyTokenizer_FindEncodingFilename(fd, pathname); lseek(fd, 0, 0); /* Reset position */ if (found_encoding == NULL && PyErr_Occurred()) return NULL;