Index: Include/fileobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/fileobject.h,v retrieving revision 2.29 diff -c -r2.29 fileobject.h *** Include/fileobject.h 24 May 2002 15:24:38 -0000 2.29 --- Include/fileobject.h 5 Aug 2002 05:00:38 -0000 *************** *** 7,12 **** --- 7,14 ---- extern "C" { #endif + #define WITH_READAHEAD_BUFFER + typedef struct { PyObject_HEAD FILE *f_fp; *************** *** 15,25 **** int (*f_close)(FILE *); int f_softspace; /* Flag used by 'print' command */ int f_binary; /* Flag which indicates whether the file is open ! open in binary (1) or test (0) mode */ #ifdef WITH_UNIVERSAL_NEWLINES int f_univ_newline; /* Handle any newline convention */ int f_newlinetypes; /* Types of newlines seen */ int f_skipnextlf; /* Skip next \n */ #endif } PyFileObject; --- 17,32 ---- int (*f_close)(FILE *); int f_softspace; /* Flag used by 'print' command */ int f_binary; /* Flag which indicates whether the file is open ! open in binary (1) or text (0) mode */ #ifdef WITH_UNIVERSAL_NEWLINES int f_univ_newline; /* Handle any newline convention */ int f_newlinetypes; /* Types of newlines seen */ int f_skipnextlf; /* Skip next \n */ + #endif + #ifdef WITH_READAHEAD_BUFFER + char* f_buf; /* Allocated buffer */ + char* f_bufend; /* Points after last occupied position */ + char* f_bufptr; /* Current buffer position */ #endif } PyFileObject; Index: Objects/fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.165 diff -c -r2.165 fileobject.c *** Objects/fileobject.c 14 Jul 2002 22:14:19 -0000 2.165 --- Objects/fileobject.c 5 Aug 2002 05:00:42 -0000 *************** *** 121,126 **** --- 121,129 ---- f->f_newlinetypes = NEWLINE_UNKNOWN; f->f_skipnextlf = 0; #endif + #ifdef WITH_READAHEAD_BUFFER + f->f_buf = NULL; + #endif if (f->f_name == NULL || f->f_mode == NULL) return NULL; *************** *** 271,276 **** --- 274,283 ---- return NULL; } + #ifdef WITH_READAHEAD_BUFFER + void drop_readahead(PyFileObject *); + #endif + /* Methods */ static void *************** *** 283,288 **** --- 290,298 ---- } Py_XDECREF(f->f_name); Py_XDECREF(f->f_mode); + #ifdef WITH_READAHEAD_BUFFER + drop_readahead(f); + #endif f->ob_type->tp_free((PyObject *)f); } *************** *** 405,410 **** --- 415,423 ---- if (f->f_fp == NULL) return err_closed(); + #ifdef WITH_READAHEAD_BUFFER + drop_readahead(f); + #endif whence = 0; if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) return NULL; *************** *** 1620,1629 **** --- 1633,1756 ---- static PyObject * file_getiter(PyObject *f) { + #ifdef WITH_READAHEAD_BUFFER + Py_INCREF(f); + return f; + #else return PyObject_CallMethod(f, "xreadlines", ""); + #endif + } + + #ifdef WITH_READAHEAD_BUFFER + #define READAHEAD_BUFSIZE 8192 + + void + drop_readahead(PyFileObject *f) + { + if (f->f_buf != NULL) { + PyMem_Free(f->f_buf); + f->f_buf = NULL; + } + } + + /* Make sure that file has a readahead buffer with at least one byte + (unless at EOF) and no more than bufsize. Returns negative value on + error */ + int readahead(PyFileObject *f, int bufsize) { + int chunksize; + + if (f->f_buf != NULL) { + if( (f->f_bufend - f->f_bufptr) >= 1) + return 0; + else + drop_readahead(f); + } + if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) { + return -1; + } + Py_BEGIN_ALLOW_THREADS + errno = 0; + chunksize = Py_UniversalNewlineFread( + f->f_buf, bufsize, f->f_fp, (PyObject *)f); + Py_END_ALLOW_THREADS + if (chunksize == 0) { + if (ferror(f->f_fp)) { + PyErr_SetFromErrno(PyExc_IOError); + clearerr(f->f_fp); + drop_readahead(f); + return -1; + } + } + f->f_bufptr = f->f_buf; + f->f_bufend = f->f_buf + chunksize; + return 0; + } + + /* Used by file_iternext. The returned string will start with 'skip' + uninitialized bytes followed by the remainder of the line. Don't be + horrified by the recursive call - maximum recursion depth is limited by + logarithmic buffer growth to about 50 even when reading a 1gb line. */ + + PyStringObject* + readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) { + PyStringObject* s; + char *bufptr; + char *buf; + int len; + + if (f->f_buf == NULL) + if (readahead(f, bufsize)<0) + return NULL; + + len = f->f_bufend - f->f_bufptr; + if (len == 0) + return (PyStringObject *) + PyString_FromStringAndSize(NULL, skip); + bufptr = memchr(f->f_bufptr, '\n', len); + if (bufptr != NULL) { + bufptr++; /* Count the '\n' */ + len = bufptr - f->f_bufptr; + s = (PyStringObject *) + PyString_FromStringAndSize(NULL, skip+len); + if (s == NULL) + return NULL; + memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); + f->f_bufptr = bufptr; + if (bufptr == f->f_bufend) + drop_readahead(f); + } else { + bufptr = f->f_bufptr; + buf = f->f_buf; + f->f_buf = NULL; /* Force new readahead buffer */ + s = readahead_get_line_skip( + f, skip+len, bufsize + (bufsize>>2) ); + if (s == NULL) { + PyMem_Free(buf); + return NULL; + } + memcpy(PyString_AS_STRING(s)+skip, bufptr, len); + PyMem_Free(buf); + } + return s; } static PyObject * + file_iternext(PyObject *f) + { + PyStringObject* l; + + l = readahead_get_line_skip((PyFileObject *)f, 0, READAHEAD_BUFSIZE); + if (l == NULL || PyString_GET_SIZE(l) == 0) { + Py_XDECREF(l); + return NULL; + } + return (PyObject *)l; + } + + + #endif + + static PyObject * file_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *self; *************** *** 1743,1749 **** --- 1870,1880 ---- 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ file_getiter, /* tp_iter */ + #ifdef WITH_READAHEAD_BUFFER + file_iternext, /* tp_iternext */ + #else 0, /* tp_iternext */ + #endif file_methods, /* tp_methods */ file_memberlist, /* tp_members */ file_getsetlist, /* tp_getset */