Create a read buffer (4096 bytes) in unpickler class. Using [0]*10**7 or [1000]*10**7, load() is from 6 to 8 times faster. I removed last_string attribute because it's not used. If there are tail bytes, seek backward. Index: Modules/_pickle.c =================================================================== --- Modules/_pickle.c (révision 70674) +++ Modules/_pickle.c (copie de travail) @@ -318,18 +318,24 @@ PyObject *fast_memo; } PicklerObject; +typedef struct UnpicklerBuffer { + PyObject *pybytes; /* PyBytes object . */ + Py_ssize_t size; /* buffer size in bytes */ + Py_ssize_t index; /* buffer index in bytes */ + char *bytes; /* Pointer to buffer bytes. */ +} UnpicklerBuffer; + typedef struct UnpicklerObject { PyObject_HEAD Pdata *stack; /* Pickle data stack, store unpickled objects. */ - PyObject *readline; /* readline() method of the output stream */ - PyObject *read; /* read() method of the output stream */ + PyObject *readline; /* readline() method of the input stream */ + PyObject *read; /* read() method of the input stream */ + PyObject *seek; /* seek() method of the input stream */ PyObject *memo; /* Memo dictionary, provide the objects stored using the PUT opcodes. */ PyObject *arg; PyObject *pers_func; /* persistent_load() method, can be NULL. */ - PyObject *last_string; /* Reference to the last string read by the - readline() method. */ - char *buffer; /* Reading buffer. */ + UnpicklerBuffer buffer; char *encoding; /* Name of the encoding to be used for decoding strings pickled using Python 2.x. The default value is "ASCII" */ @@ -465,6 +471,8 @@ On the other hand, the added complexity might not worth it. */ +#define PREFETCH 4096 + /* Read at least n characters from the input stream and set s to the current reading position. */ static Py_ssize_t @@ -472,8 +480,21 @@ { PyObject *len; PyObject *data; + Py_ssize_t prefetch; + + if (self->buffer.pybytes != NULL && self->buffer.index + n <= self->buffer.size) { + *s = self->buffer.bytes; + self->buffer.index += n; + self->buffer.bytes += n; + return n; + } + + if (n < PREFETCH) + prefetch = PREFETCH; + else + prefetch = n; - len = PyLong_FromSsize_t(n); + len = PyLong_FromSsize_t(prefetch); if (len == NULL) return -1; @@ -490,17 +511,32 @@ return -1; } - if (PyBytes_GET_SIZE(data) != n) { - PyErr_SetNone(PyExc_EOFError); + if (self->buffer.pybytes != NULL && self->buffer.index != self->buffer.size) { + PyBytes_Concat(&self->buffer.pybytes, data); Py_DECREF(data); + if (self->buffer.pybytes == NULL) return -1; + } else { + Py_XDECREF(self->buffer.pybytes); + self->buffer.pybytes = data; + self->buffer.index = 0; } - Py_XDECREF(self->last_string); - self->last_string = data; - - if (!(*s = PyBytes_AS_STRING(data))) + prefetch = PyBytes_GET_SIZE(self->buffer.pybytes); + if (prefetch < n) { + PyErr_SetNone(PyExc_EOFError); return -1; + } + self->buffer.size = prefetch; + self->buffer.bytes = PyBytes_AS_STRING(self->buffer.pybytes); + if (self->buffer.bytes == NULL) + return -1; + + self->buffer.bytes += self->buffer.index; + *s = self->buffer.bytes; + + self->buffer.index += n; + self->buffer.bytes += n; return n; } @@ -522,9 +558,6 @@ return -1; } - Py_XDECREF(self->last_string); - self->last_string = data; - if (!(*s = PyBytes_AS_STRING(data))) return -1; @@ -4229,6 +4262,18 @@ return NULL; } + if (self->buffer.pybytes != NULL) { + Py_ssize_t tail = self->buffer.size - self->buffer.index; + if (tail) { + /* file.seek(-tail, 1) */ + PyObject* result; + result = PyObject_CallFunction(self->seek, "ni", -tail, 1); + if (result == NULL) + return NULL; + Py_DECREF(result); + } + } + PDATA_POP(self->stack, value); return value; } @@ -4319,7 +4364,6 @@ Py_XDECREF(self->stack); Py_XDECREF(self->pers_func); Py_XDECREF(self->arg); - Py_XDECREF(self->last_string); PyMem_Free(self->marks); free(self->encoding); @@ -4337,7 +4381,7 @@ Py_VISIT(self->stack); Py_VISIT(self->pers_func); Py_VISIT(self->arg); - Py_VISIT(self->last_string); + Py_VISIT(self->buffer.pybytes); return 0; } @@ -4350,7 +4394,7 @@ Py_CLEAR(self->stack); Py_CLEAR(self->pers_func); Py_CLEAR(self->arg); - Py_CLEAR(self->last_string); + Py_CLEAR(self->buffer.pybytes); PyMem_Free(self->marks); self->marks = NULL; @@ -4412,6 +4456,7 @@ if (self->read != NULL) (void)Unpickler_clear(self); + self->seek = PyObject_GetAttrString(file, "seek"); self->read = PyObject_GetAttrString(file, "read"); self->readline = PyObject_GetAttrString(file, "readline"); if (self->readline == NULL || self->read == NULL) @@ -4447,6 +4492,8 @@ if (self->memo == NULL) return -1; + memset(&self->buffer, 0, sizeof(self->buffer)); + return 0; }