Index: Python/import.c =================================================================== --- Python/import.c (revision 65833) +++ Python/import.c (working copy) @@ -2758,6 +2758,30 @@ */ static PyObject * +imp_get_codingspec(PyObject *self, PyObject *args) +{ + char *fname, *encoding = NULL; + int fd; + PyObject *r; + if (!PyArg_ParseTuple(args, "s:get_codingspec", &fname)) + return NULL; + fd = open(fname, O_RDONLY); + if (fd == -1) + return PyErr_SetFromErrno(PyExc_IOError); + /* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed memory. */ + encoding = PyTokenizer_FindEncoding(fd); + close(fd); + if (encoding == NULL && PyErr_Occurred()) + return NULL; + r = PyUnicode_FromString((encoding != NULL) ? encoding : + PyUnicode_GetDefaultEncoding()); + if (encoding != NULL) + PyMem_FREE(encoding); + return r; +} + + +static PyObject * imp_get_magic(PyObject *self, PyObject *noargs) { char buf[4]; @@ -2826,6 +2850,10 @@ /* PyTokenizer_FindEncoding() returns PyMem_MALLOC'ed memory. */ found_encoding = PyTokenizer_FindEncoding(fd); + if (found_encoding == NULL && PyErr_Occurred()) { + close(fd); + return NULL; + } lseek(fd, 0, 0); /* Reset position */ encoding = (found_encoding != NULL) ? found_encoding : (char*)PyUnicode_GetDefaultEncoding(); @@ -3100,6 +3128,12 @@ Load a module, given information returned by find_module().\n\ The module name must include the full package name, if any."); +PyDoc_STRVAR(doc_get_codingspec, +"get_codingspec(path) -> string\n\ +Return the normalized coding spec of a Python file as returned by the \n\ +tokenizer. When no coding spec is set, the default encoding is returned \n\ +instead."); + PyDoc_STRVAR(doc_get_magic, "get_magic() -> string\n\ Return the magic number for .pyc or .pyo files."); @@ -3133,6 +3167,7 @@ static PyMethodDef imp_methods[] = { {"find_module", imp_find_module, METH_VARARGS, doc_find_module}, + {"get_codingspec", imp_get_codingspec, METH_VARARGS, doc_get_codingspec}, {"get_magic", imp_get_magic, METH_NOARGS, doc_get_magic}, {"get_suffixes", imp_get_suffixes, METH_NOARGS, doc_get_suffixes}, {"load_module", imp_load_module, METH_VARARGS, doc_load_module}, Index: Doc/library/imp.rst =================================================================== --- Doc/library/imp.rst (revision 65833) +++ Doc/library/imp.rst (working copy) @@ -32,6 +32,13 @@ below. +.. function:: get_codingspec(path) + + Return the normalized source file encoding for *path*. If no explicit + encoding is specified in the source file, the default encoding for source is + returned. + + .. function:: find_module(name[, path]) Try to find the module *name* on the search path *path*. If *path* is a list Index: Lib/test/test_imp.py =================================================================== --- Lib/test/test_imp.py (revision 65833) +++ Lib/test/test_imp.py (working copy) @@ -1,6 +1,10 @@ +# DO NOT specify an encoding for this file! import imp +import os.path import unittest +import test from test import support +import sys class LockTests(unittest.TestCase): @@ -67,7 +71,17 @@ ## import sys ## self.assertRaises(ImportError, reload, sys) + def test_get_encoding(self): + self.assertEqual(imp.get_codingspec(__file__), "utf-8") + import pydoc + self.assertEqual(imp.get_codingspec(pydoc.__file__), "iso-8859-1") + self.assertRaises(SyntaxError, imp.get_codingspec, sys.executable) + test_directory = os.path.split(test.__file__)[0] + bad_encoding_path = os.path.join(test_directory, + 'bad_source_encoding.py') + self.assertRaises(SyntaxError, imp.get_codingspec, bad_encoding_path) + def test_main(): support.run_unittest( LockTests, Index: Lib/test/bad_source_encoding.py =================================================================== --- Lib/test/bad_source_encoding.py (revision 0) +++ Lib/test/bad_source_encoding.py (revision 0) @@ -0,0 +1,2 @@ +# coding: nonsense +u = "helper for testing non-existent encodings"