Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 45961) +++ Python/ceval.c (working copy) @@ -16,6 +16,8 @@ #include +// #define FUNC_STATS + #ifndef WITH_TSC #define READ_TIMESTAMP(var) @@ -92,7 +94,7 @@ static PyObject *ext_do_call(PyObject *, PyObject ***, int, int, int); static PyObject *update_keyword_args(PyObject *, int, PyObject ***,PyObject *); static PyObject *update_star_args(int, int, PyObject *, PyObject ***); -static PyObject *load_args(PyObject ***, int); +static PyObject *load_args(PyObject ***, int, int*); #define CALL_FLAG_VAR 1 #define CALL_FLAG_KW 2 @@ -3440,6 +3442,48 @@ } } +/* Maximum # of arguments to optimize for. */ +#define _MAX_ARG_COUNT 8 +/* # of tuples to hold on to (cache). Must be a power of 2. */ +#define _NUM_ARG_HOLDERS 64 +static PyObject* _args_holder[_NUM_ARG_HOLDERS]; +static unsigned _last_index = 0; +#if defined(Py_DEBUG) || defined(FUNC_STATS) +/* This must be a power of 2 so it's calculated properly. */ +#define _MAX_ARG_COUNT_STATS 32 +static int _Py_NumArgsCount[_MAX_ARG_COUNT_STATS]; +static int _Py_CachedArgsHit = 0; +static int _Py_CachedArgsMissedSize = 0; +static int _Py_CachedArgsMissedEmpty = 0; +#endif + +int +_PyEval_Init(void) +{ + return 1; +} + +void +_PyEval_Fini(void) +{ + int i; + +#if defined(Py_DEBUG) || defined(FUNC_STATS) + if (Py_GETENV("PYTHONFUNCSTATS")) { + fprintf(stderr, "Hits: %d, Size misses: %d, empty misses: %d\n", + _Py_CachedArgsHit, _Py_CachedArgsMissedSize, + _Py_CachedArgsMissedEmpty); + + fprintf(stdout, "Arg counts:\n"); + for (i = 0; i < _MAX_ARG_COUNT_STATS; i++) + fprintf(stderr, " [%3d] = %d\n", i, _Py_NumArgsCount[i]); + } +#endif + + for (i = 0; i < _NUM_ARG_HOLDERS; i++) + Py_CLEAR(_args_holder[i]); +} + static void err_args(PyObject *func, int flags, int nargs) { @@ -3507,11 +3551,11 @@ if (PyCFunction_Check(func) && nk == 0) { int flags = PyCFunction_GET_FLAGS(func); PyThreadState *tstate = PyThreadState_GET(); + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); PCALL(PCALL_CFUNCTION); if (flags & (METH_NOARGS | METH_O)) { - PyCFunction meth = PyCFunction_GET_FUNCTION(func); - PyObject *self = PyCFunction_GET_SELF(func); if (flags & METH_NOARGS && na == 0) { C_TRACE(x, (*meth)(self,NULL)); } @@ -3526,11 +3570,32 @@ } } else { + int needs_release; PyObject *callargs; - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); READ_TIMESTAMP(*pintr0); - C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + if ((flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) + == METH_VARARGS) { + C_TRACE(x, (*meth)(self,callargs)); + } + else { + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + } READ_TIMESTAMP(*pintr1); + if (needs_release) { + _last_index--; + if (callargs->ob_refcnt == 2) { + int i; + for (i = 0; i < na; i++) { + w = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(w); + } + ((PyVarObject*)callargs)->ob_size = 0; + } + else { + _args_holder[_last_index] = NULL; + } + } Py_XDECREF(callargs); } } else { @@ -3541,13 +3606,12 @@ PCALL(PCALL_BOUND_METHOD); Py_INCREF(self); func = PyMethod_GET_FUNCTION(func); - Py_INCREF(func); Py_DECREF(*pfunc); *pfunc = self; na++; n++; - } else - Py_INCREF(func); + } + Py_INCREF(func); READ_TIMESTAMP(*pintr0); if (PyFunction_Check(func)) x = fast_function(func, pp_stack, n, na, nk); @@ -3697,13 +3761,44 @@ } static PyObject * -load_args(PyObject ***pp_stack, int na) +load_args(PyObject ***pp_stack, int na, int *needs_release) { - PyObject *args = PyTuple_New(na); + PyObject *args = _args_holder[_last_index]; PyObject *w; - if (args == NULL) - return NULL; +#if defined(Py_DEBUG) || defined(FUNC_STATS) + _Py_NumArgsCount[na & (_MAX_ARG_COUNT_STATS - 1)]++; +#endif + + *needs_release = na < _MAX_ARG_COUNT; + if (*needs_release && args) { +#if defined(Py_DEBUG) || defined(FUNC_STATS) + _Py_CachedArgsHit++; +#endif + ((PyVarObject*)args)->ob_size = na; + Py_INCREF(args); + _last_index = (_last_index + 1) & (_NUM_ARG_HOLDERS - 1); + } + else { + int size = na; + if (size < _MAX_ARG_COUNT) + size = _MAX_ARG_COUNT; +#if defined(Py_DEBUG) || defined(FUNC_STATS) + if (na < _MAX_ARG_COUNT) + _Py_CachedArgsMissedEmpty++; + else + _Py_CachedArgsMissedSize++; +#endif + args = PyTuple_New(size); + if (args == NULL) + return NULL; + if (na < _MAX_ARG_COUNT) { + _args_holder[_last_index] = args; + ((PyVarObject*)args)->ob_size = na; + Py_INCREF(args); + _last_index = (_last_index + 1) & (_NUM_ARG_HOLDERS - 1); + } + } while (--na >= 0) { w = EXT_POP(*pp_stack); PyTuple_SET_ITEM(args, na, w); @@ -3717,13 +3812,14 @@ PyObject *callargs = NULL; PyObject *kwdict = NULL; PyObject *result = NULL; + int needs_release; if (nk > 0) { kwdict = update_keyword_args(NULL, nk, pp_stack, func); if (kwdict == NULL) goto call_fail; } - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); if (callargs == NULL) goto call_fail; #ifdef CALL_PROFILE @@ -3741,6 +3837,20 @@ PCALL(PCALL_OTHER); #endif result = PyObject_Call(func, callargs, kwdict); + if (needs_release) { + _last_index--; + if (callargs->ob_refcnt == 2) { + int i; + for (i = 0; i < na; i++) { + PyObject *tmp = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(tmp); + } + ((PyVarObject*)callargs)->ob_size = 0; + } + else { + _args_holder[_last_index] = NULL; + } + } call_fail: Py_XDECREF(callargs); Py_XDECREF(kwdict); Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 45831) +++ Python/pythonrun.c (working copy) @@ -56,6 +56,8 @@ static void initsigs(void); static void call_sys_exitfunc(void); static void call_ll_exitfuncs(void); +extern int _PyEval_Init(void); +extern void _PyEval_Fini(void); extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); @@ -183,6 +185,9 @@ if (!_PyFrame_Init()) Py_FatalError("Py_Initialize: can't init frames"); + if (!_PyEval_Init()) + Py_FatalError("Py_Initialize: can't init eval"); + if (!_PyInt_Init()) Py_FatalError("Py_Initialize: can't init ints"); @@ -458,6 +463,7 @@ /* Cleanup Unicode implementation */ _PyUnicode_Fini(); #endif + _PyEval_Fini(); /* XXX Still allocated: - various static ad-hoc pointers to interned strings Index: Misc/SpecialBuilds.txt =================================================================== --- Misc/SpecialBuilds.txt (revision 45831) +++ Misc/SpecialBuilds.txt (working copy) @@ -151,7 +151,7 @@ This is what is generally meant by "a debug build" of Python. -Py_DEBUG implies LLTRACE, Py_REF_DEBUG, Py_TRACE_REFS, and +Py_DEBUG implies FUNC_STATS, LLTRACE, Py_REF_DEBUG, Py_TRACE_REFS, and PYMALLOC_DEBUG (if WITH_PYMALLOC is enabled). In addition, C assert()s are enabled (via the C way: by not defining NDEBUG), and some routines do additional sanity checks inside "#ifdef Py_DEBUG" @@ -208,6 +208,18 @@ for which the first allocation of an object of that type occurred most recently is at the front of the list. --------------------------------------------------------------------------- +FUNC_STATS introduced for Python 2.5 + +Keep statistics about the number of times a cached tuple argument holder +is used (hit) or missed used due to the argument size being too large +(currently 8 arguments) or due to the callee holding a reference +to the argument. Py_DEBUG implies FUNC_STATS. + +Special gimmicks: + +envar PYTHONMALLOCSTATS + If this envar exists, the histogram is printed to stderr in Py_Finalize(). +--------------------------------------------------------------------------- LLTRACE introduced well before 1.0 Compile in support for Low Level TRACE-ing of the main interpreter loop.