Index: Include/fileobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/fileobject.h,v retrieving revision 2.29 diff -c -r2.29 fileobject.h *** Include/fileobject.h 24 May 2002 15:24:38 -0000 2.29 --- Include/fileobject.h 6 Aug 2002 04:59:17 -0000 *************** *** 13,21 **** PyObject *f_name; PyObject *f_mode; int (*f_close)(FILE *); ! int f_softspace; /* Flag used by 'print' command */ ! int f_binary; /* Flag which indicates whether the file is open ! open in binary (1) or test (0) mode */ #ifdef WITH_UNIVERSAL_NEWLINES int f_univ_newline; /* Handle any newline convention */ int f_newlinetypes; /* Types of newlines seen */ --- 13,24 ---- PyObject *f_name; PyObject *f_mode; int (*f_close)(FILE *); ! int f_softspace; /* Flag used by 'print' command */ ! int f_binary; /* Flag which indicates whether the file is ! open in binary (1) or text (0) mode */ ! char* f_buf; /* Allocated readahead buffer */ ! char* f_bufend; /* Points after last occupied position */ ! char* f_bufptr; /* Current buffer position */ #ifdef WITH_UNIVERSAL_NEWLINES int f_univ_newline; /* Handle any newline convention */ int f_newlinetypes; /* Types of newlines seen */ Index: Objects/fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.165 diff -c -r2.165 fileobject.c *** Objects/fileobject.c 14 Jul 2002 22:14:19 -0000 2.165 --- Objects/fileobject.c 6 Aug 2002 04:59:21 -0000 *************** *** 116,121 **** --- 116,122 ---- f->f_close = close; f->f_softspace = 0; f->f_binary = strchr(mode,'b') != NULL; + f->f_buf = NULL; #ifdef WITH_UNIVERSAL_NEWLINES f->f_univ_newline = (strchr(mode, 'U') != NULL); f->f_newlinetypes = NEWLINE_UNKNOWN; *************** *** 271,276 **** --- 272,279 ---- return NULL; } + void drop_readahead(PyFileObject *); + /* Methods */ static void *************** *** 283,288 **** --- 286,292 ---- } Py_XDECREF(f->f_name); Py_XDECREF(f->f_mode); + drop_readahead(f); f->ob_type->tp_free((PyObject *)f); } *************** *** 405,410 **** --- 409,415 ---- if (f->f_fp == NULL) return err_closed(); + drop_readahead(f); whence = 0; if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) return NULL; *************** *** 1178,1205 **** } static PyObject * - file_xreadlines(PyFileObject *f) - { - static PyObject* xreadlines_function = NULL; - - if (f->f_fp == NULL) - return err_closed(); - if (!xreadlines_function) { - PyObject *xreadlines_module = - PyImport_ImportModule("xreadlines"); - if(!xreadlines_module) - return NULL; - - xreadlines_function = PyObject_GetAttrString(xreadlines_module, - "xreadlines"); - Py_DECREF(xreadlines_module); - if(!xreadlines_function) - return NULL; - } - return PyObject_CallFunction(xreadlines_function, "(O)", f); - } - - static PyObject * file_readlines(PyFileObject *f, PyObject *args) { long sizehint = 0; --- 1183,1188 ---- *************** *** 1462,1467 **** --- 1445,1457 ---- #undef CHUNKSIZE } + static PyObject * + file_getiter(PyObject *f) + { + Py_INCREF(f); + return f; + } + PyDoc_STRVAR(readline_doc, "readline([size]) -> next line from the file, as a string.\n" "\n" *************** *** 1517,1526 **** "total number of bytes in the lines returned."); PyDoc_STRVAR(xreadlines_doc, ! "xreadlines() -> next line from the file, as a string.\n" "\n" ! "Equivalent to xreadlines.xreadlines(file). This is like readline(), but\n" ! "often quicker, due to reading ahead internally."); PyDoc_STRVAR(writelines_doc, "writelines(sequence_of_strings) -> None. Write the strings to the file.\n" --- 1507,1516 ---- "total number of bytes in the lines returned."); PyDoc_STRVAR(xreadlines_doc, ! "xreadlines() -> returns self.\n" "\n" ! "For backward compatibility. File objects now include the performance\n" ! "optimizations previously implemented in the xreadlines module."); PyDoc_STRVAR(writelines_doc, "writelines(sequence_of_strings) -> None. Write the strings to the file.\n" *************** *** 1554,1560 **** {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc}, {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc}, {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc}, ! {"xreadlines", (PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc}, {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc}, {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc}, {"close", (PyCFunction)file_close, METH_NOARGS, close_doc}, --- 1544,1550 ---- {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc}, {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc}, {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc}, ! {"xreadlines", (PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc}, {"writelines", (PyCFunction)file_writelines, METH_O, writelines_doc}, {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc}, {"close", (PyCFunction)file_close, METH_NOARGS, close_doc}, *************** *** 1617,1628 **** {0}, }; static PyObject * ! file_getiter(PyObject *f) { ! return PyObject_CallMethod(f, "xreadlines", ""); } static PyObject * file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { --- 1607,1722 ---- {0}, }; + void + drop_readahead(PyFileObject *f) + { + if (f->f_buf != NULL) { + PyMem_Free(f->f_buf); + f->f_buf = NULL; + } + } + + /* Make sure that file has a readahead buffer with at least one byte + (unless at EOF) and no more than bufsize. Returns negative value on + error */ + int readahead(PyFileObject *f, int bufsize) { + int chunksize; + + if (f->f_buf != NULL) { + if( (f->f_bufend - f->f_bufptr) >= 1) + return 0; + else + drop_readahead(f); + } + if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { + return -1; + } + Py_BEGIN_ALLOW_THREADS + errno = 0; + chunksize = Py_UniversalNewlineFread( + f->f_buf, bufsize, f->f_fp, (PyObject *)f); + Py_END_ALLOW_THREADS + if (chunksize == 0) { + if (ferror(f->f_fp)) { + PyErr_SetFromErrno(PyExc_IOError); + clearerr(f->f_fp); + drop_readahead(f); + return -1; + } + } + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf + chunksize; + return 0; + } + + /* Used by file_iternext. The returned string will start with 'skip' + uninitialized bytes followed by the remainder of the line. Don't be + horrified by the recursive call - maximum recursion depth is limited by + logarithmic buffer growth to about 50 even when reading a 1gb line. */ + + PyStringObject* + readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) { + PyStringObject* s; + char *bufptr; + char *buf; + int len; + + if (f->f_buf == NULL) + if (readahead(f, bufsize)<0) + return NULL; + + len = f->f_bufend - f->f_bufptr; + if (len == 0) + return (PyStringObject *) + PyString_FromStringAndSize(NULL, skip); + bufptr = memchr(f->f_bufptr, '\n', len); + if (bufptr != NULL) { + bufptr++; /* Count the '\n' */ + len = bufptr - f->f_bufptr; + s = (PyStringObject *) + PyString_FromStringAndSize(NULL, skip+len); + if (s == NULL) + return NULL; + memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); + f->f_bufptr = bufptr; + if (bufptr == f->f_bufend) + drop_readahead(f); + } else { + bufptr = f->f_bufptr; + buf = f->f_buf; + f->f_buf = NULL; /* Force new readahead buffer */ + s = readahead_get_line_skip( + f, skip+len, bufsize + (bufsize>>2) ); + if (s == NULL) { + PyMem_Free(buf); + return NULL; + } + memcpy(PyString_AS_STRING(s)+skip, bufptr, len); + PyMem_Free(buf); + } + return s; + } + + /* A larger buffer size may actually decrease performance. */ + #define READAHEAD_BUFSIZE 8192 + static PyObject * ! file_iternext(PyObject *f) { ! PyStringObject* l; ! ! int i; ! i = ((PyFileObject *)f)->f_softspace; ! ! l = readahead_get_line_skip((PyFileObject *)f, 0, READAHEAD_BUFSIZE); ! if (l == NULL || PyString_GET_SIZE(l) == 0) { ! Py_XDECREF(l); ! return NULL; ! } ! return (PyObject *)l; } + static PyObject * file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { *************** *** 1743,1749 **** 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ file_getiter, /* tp_iter */ ! 0, /* tp_iternext */ file_methods, /* tp_methods */ file_memberlist, /* tp_members */ file_getsetlist, /* tp_getset */ --- 1837,1843 ---- 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ file_getiter, /* tp_iter */ ! file_iternext, /* tp_iternext */ file_methods, /* tp_methods */ file_memberlist, /* tp_members */ file_getsetlist, /* tp_getset */