Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 45831) +++ Python/ceval.c (working copy) @@ -92,7 +92,7 @@ static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int); static PyObject *update_keyword_args(PyObject *, int, PyObject ***,PyObject *); static PyObject *update_star_args(int, int, PyObject *, PyObject ***); -static PyObject *load_args(PyObject ***, int); +static PyObject *load_args(PyObject ***, int, int*); #define CALL_FLAG_VAR 1 #define CALL_FLAG_KW 2 @@ -3442,6 +3442,53 @@ } } +/* Maximum # of arguments to optimize for. */ +#define _MAX_ARG_COUNT 8 +/* # of tuples to hold on to (cache). Must be a power of 2. */ +#define _NUM_ARG_HOLDERS 64 +static PyObject* _args_holder[_NUM_ARG_HOLDERS]; +static int _last_index = 0; +#ifdef Py_DEBUG +/* This must be a power of 2 so it's calculated properly. */ +#define _MAX_ARG_COUNT_STATS 128 +static int _Py_NumArgsCount[_MAX_ARG_COUNT_STATS]; +static int _Py_CachedArgsHits = 0; +static int _Py_CachedArgsMissedSize = 0; +static int _Py_CachedArgsMissedRefCount = 0; +#endif + +int +_PyEval_Init(void) +{ + int i; + for (i = 0; i < _NUM_ARG_HOLDERS; i++) + _args_holder[i] = PyTuple_New(_MAX_ARG_COUNT); + return !PyErr_Occurred(); +} + +void +_PyEval_Fini(void) +{ + int i; + for (i = 0; i < _NUM_ARG_HOLDERS; i++) + Py_CLEAR(_args_holder[i]); +} + +#ifdef Py_DEBUG +void +_PyEval_FuncStats(void) +{ + int i; + fprintf(stdout, "Hits: %d, Size misses: %d, refcount misses: %d\n", + _Py_CachedArgsHits, _Py_CachedArgsMissedSize, + _Py_CachedArgsMissedRefCount); + + fprintf(stdout, "Arg counts:\n"); + for (i = 0; i < _NUM_ARG_HOLDERS; i++) + fprintf(stdout, " [%3d] = %d\n", i, _Py_NumArgsCount[i]); +} +#endif + static void err_args(PyObject *func, int flags, int nargs) { @@ -3509,11 +3556,11 @@ if (PyCFunction_Check(func) && nk == 0) { int flags = PyCFunction_GET_FLAGS(func); PyThreadState *tstate = PyThreadState_GET(); + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); PCALL(PCALL_CFUNCTION); if (flags & (METH_NOARGS | METH_O)) { - PyCFunction meth = PyCFunction_GET_FUNCTION(func); - PyObject *self = PyCFunction_GET_SELF(func); if (flags & METH_NOARGS && na == 0) { C_TRACE(x, (*meth)(self,NULL)); } @@ -3528,11 +3575,44 @@ } } else { + int needs_release; PyObject *callargs; - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); READ_TIMESTAMP(*pintr0); - C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + if ((flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) + == METH_VARARGS) { + C_TRACE(x, (*meth)(self,callargs)); + } + else { + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + } READ_TIMESTAMP(*pintr1); + if (needs_release) { + int i; + for (i = 0; i < na; i++) { + w = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(w); + } + ((PyVarObject*)callargs)->ob_size = 0; + if (callargs->ob_refcnt != 2) { + PyObject *tmp; + /* We need to do a strange dance to keep everything + * working properly. If we are in here, it means + * the function held on to the *args and we can't + * reuse it. So we need try to release the old + * one and allocate a new one. However, if the alloc + * of the new one fails, we're nearly screwed. + * We can't allow a NULL in _args_holder, so + * we have to live with an unusable slot instead. + */ + tmp = PyTuple_New(_MAX_ARG_COUNT); + if (tmp) { + Py_DECREF(callargs); + _args_holder[_last_index & (_NUM_ARG_HOLDERS - 1)] = tmp; + } + } + _last_index--; + } Py_XDECREF(callargs); } } else { @@ -3699,13 +3779,34 @@ } static PyObject * -load_args(PyObject ***pp_stack, int na) +load_args(PyObject ***pp_stack, int na, int *needs_release) { - PyObject *args = PyTuple_New(na); + PyObject *args = _args_holder[_last_index++ & (_NUM_ARG_HOLDERS - 1)]; PyObject *w; - if (args == NULL) - return NULL; +#ifdef Py_DEBUG + _Py_NumArgsCount[na & (_MAX_ARG_COUNT_STATS - 1)]; +#endif + + *needs_release = na < _MAX_ARG_COUNT && args->ob_refcnt == 1; + if (*needs_release) { +#ifdef Py_DEBUG + _Py_CachedArgsHit++; +#endif + ((PyVarObject*)args)->ob_size = na; + Py_INCREF(args); + } + else { +#ifdef Py_DEBUG + if (na >= _MAX_ARG_COUNT) + _Py_CachedArgsMissedSize++; + else + _Py_CachedArgsMissedRefCount++; +#endif + args = PyTuple_New(na); + if (args == NULL) + return NULL; + } while (--na >= 0) { w = EXT_POP(*pp_stack); PyTuple_SET_ITEM(args, na, w); @@ -3719,13 +3820,14 @@ PyObject *callargs = NULL; PyObject *kwdict = NULL; PyObject *result = NULL; + int needs_release; if (nk > 0) { kwdict = update_keyword_args(NULL, nk, pp_stack, func); if (kwdict == NULL) goto call_fail; } - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); if (callargs == NULL) goto call_fail; #ifdef CALL_PROFILE @@ -3743,6 +3845,32 @@ PCALL(PCALL_OTHER); #endif result = PyObject_Call(func, callargs, kwdict); + if (needs_release) { + int i; + for (i = 0; i < na; i++) { + PyObject *tmp = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(tmp); + } + ((PyVarObject*)callargs)->ob_size = 0; + if (callargs->ob_refcnt != 2) { + PyObject *tmp; + /* We need to do a strange dance to keep everything + * working properly. If we are in here, it means + * the function held on to the *args and we can't + * reuse it. So we need try to release the old + * one and allocate a new one. However, if the alloc + * of the new one fails, we're nearly screwed. + * We can't allow a NULL in _args_holder, so + * we have to live with an unusable slot instead. + */ + tmp = PyTuple_New(_MAX_ARG_COUNT); + if (tmp) { + Py_DECREF(callargs); + _args_holder[_last_index & (_NUM_ARG_HOLDERS - 1)] = tmp; + } + } + _last_index--; + } call_fail: Py_XDECREF(callargs); Py_XDECREF(kwdict); Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 45831) +++ Python/pythonrun.c (working copy) @@ -56,6 +56,9 @@ static void initsigs(void); static void call_sys_exitfunc(void); static void call_ll_exitfuncs(void); +extern int _PyEval_Init(void); +extern void _PyEval_Fini(void); +extern void _PyEval_FuncStats(void); extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); @@ -183,6 +186,9 @@ if (!_PyFrame_Init()) Py_FatalError("Py_Initialize: can't init frames"); + if (!_PyEval_Init()) + Py_FatalError("Py_Initialize: can't init eval"); + if (!_PyInt_Init()) Py_FatalError("Py_Initialize: can't init ints"); @@ -458,6 +464,11 @@ /* Cleanup Unicode implementation */ _PyUnicode_Fini(); #endif + _PyEval_Fini(); +#ifdef Py_DEBUG + if (Py_GETENV("PYTHONFUNCSTATS")) + _PyEval_FuncStats(); +#endif /* XXX Still allocated: - various static ad-hoc pointers to interned strings Index: Objects/frameobject.c =================================================================== --- Objects/frameobject.c (revision 45831) +++ Objects/frameobject.c (working copy) @@ -547,11 +547,16 @@ if (back == NULL || back->f_globals != globals) { builtins = PyDict_GetItem(globals, builtin_object); if (builtins) { - if (PyModule_Check(builtins)) { + if (PyDict_Check(builtins)) { + Py_INCREF(builtins); + goto got_builtins; + } + else if (PyModule_Check(builtins)) { builtins = PyModule_GetDict(builtins); assert(!builtins || PyDict_Check(builtins)); + Py_XINCREF(builtins); } - else if (!PyDict_Check(builtins)) + else builtins = NULL; } if (builtins == NULL) { @@ -563,8 +568,6 @@ builtins, "None", Py_None) < 0) return NULL; } - else - Py_INCREF(builtins); } else { @@ -574,6 +577,7 @@ assert(builtins != NULL && PyDict_Check(builtins)); Py_INCREF(builtins); } +got_builtins: if (free_list == NULL) { f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras); if (f == NULL) { @@ -621,12 +625,12 @@ f->f_locals = locals; f->f_trace = NULL; f->f_exc_type = f->f_exc_value = f->f_exc_traceback = NULL; + f->f_iblock = 0; + f->f_lasti = -1; f->f_tstate = tstate; - f->f_lasti = -1; + f->f_restricted = (builtins != tstate->interp->builtins); f->f_lineno = code->co_firstlineno; - f->f_restricted = (builtins != tstate->interp->builtins); - f->f_iblock = 0; f->f_nlocals = code->co_nlocals; f->f_stacksize = code->co_stacksize; f->f_ncells = ncells;