#include /* Function from Parser/tokenizer.c */ extern char * PyTokenizer_FindEncodingFilename(int, PyObject *); static PyObject * tokenizer_detect_encoding(PyObject *self, PyObject *args) { char *encoding; int fd; FILE *fp; PyObject *filename; if (!PyArg_ParseTuple(args, "U", &filename)) return NULL; fp = _Py_fopen(filename, "r"); if (fp == NULL) { if (!PyErr_Occurred()) PyErr_SetFromErrno(PyExc_IOError); return NULL; } fd = fileno(fp); if (fd != -1) fd = dup(fd); fclose(fp); fp = NULL; if (fd == -1) return PyErr_SetFromErrno(PyExc_OSError); encoding = PyTokenizer_FindEncodingFilename(fd, filename); close(fd); if (encoding == NULL && PyErr_Occurred()) return NULL; encoding = (encoding != NULL) ? encoding : (char*)PyUnicode_GetDefaultEncoding(); if (encoding == NULL) return NULL; return PyUnicode_FromString(encoding); }; static PyMethodDef TokenizerMethods[] = { {"detect_encoding", tokenizer_detect_encoding, METH_VARARGS, "detect a file's encoding."}, {NULL, NULL, 0, NULL} /* Sentinel */ }; static struct PyModuleDef tokenizermodule = { PyModuleDef_HEAD_INIT, "_tokenizer", /* name of module */ NULL, /* module documentation, may be NULL */ -1, /* size of per-interpreter state of the module, or -1 if the module keeps state in global variables. */ TokenizerMethods }; PyMODINIT_FUNC PyInit__tokenizer(void) { return PyModule_Create(&tokenizermodule); }