diff -r 26e7d3935798 Misc/SpecialBuilds.txt --- a/Misc/SpecialBuilds.txt Sun Jan 13 16:04:05 2008 +0100 +++ b/Misc/SpecialBuilds.txt Mon Jan 14 00:19:20 2008 +0100 @@ -155,7 +155,7 @@ Py_DEBUG This is what is generally meant by "a debug build" of Python. -Py_DEBUG implies LLTRACE, Py_REF_DEBUG, Py_TRACE_REFS, and +Py_DEBUG implies FUNC_STATS, LLTRACE, Py_REF_DEBUG, Py_TRACE_REFS, and PYMALLOC_DEBUG (if WITH_PYMALLOC is enabled). In addition, C assert()s are enabled (via the C way: by not defining NDEBUG), and some routines do additional sanity checks inside "#ifdef Py_DEBUG" @@ -212,6 +212,18 @@ sys.getcounts() for which the first allocation of an object of that type occurred most recently is at the front of the list. --------------------------------------------------------------------------- +FUNC_STATS introduced for Python 2.5 + +Keep statistics about the number of times a cached tuple argument holder +is used (hit) or missed used due to the argument size being too large +(currently 8 arguments) or due to the callee holding a reference +to the argument. Py_DEBUG implies FUNC_STATS. + +Special gimmicks: + +envar PYTHONMALLOCSTATS + If this envar exists, the histogram is printed to stderr in Py_Finalize(). +--------------------------------------------------------------------------- LLTRACE introduced well before 1.0 Compile in support for Low Level TRACE-ing of the main interpreter loop. diff -r 26e7d3935798 Python/ceval.c --- a/Python/ceval.c Sun Jan 13 16:04:05 2008 +0100 +++ b/Python/ceval.c Mon Jan 14 00:19:20 2008 +0100 @@ -97,7 +97,7 @@ static PyObject * update_keyword_args(Py static PyObject * update_keyword_args(PyObject *, int, PyObject ***, PyObject *); static PyObject * update_star_args(int, int, PyObject *, PyObject ***); -static PyObject * load_args(PyObject ***, int); +static PyObject * load_args(PyObject ***, int, int *); #define CALL_FLAG_VAR 1 #define CALL_FLAG_KW 2 @@ -715,7 +715,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int an argument which depends on the situation. The global trace function is also called whenever an exception is detected. */ - if (call_trace_protected(tstate->c_tracefunc, + if (call_trace_protected(tstate->c_tracefunc, tstate->c_traceobj, f, PyTrace_CALL, Py_None)) { /* Trace function raised an error */ @@ -747,10 +747,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int this wasn't always true before 2.3! PyFrame_New now sets f->f_lasti to -1 (i.e. the index *before* the first instruction) and YIELD_VALUE doesn't fiddle with f_lasti any more. So this - does work. Promise. + does work. Promise. When the PREDICT() macros are enabled, some opcode pairs follow in - direct succession without updating f->f_lasti. A successful + direct succession without updating f->f_lasti. A successful prediction effectively links the two codes together as if they were a single new opcode; accordingly,f->f_lasti will point to the first code in the pair (for instance, GET_ITER followed by @@ -2179,7 +2179,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int because it prevents detection of a control-break in tight loops like "while 1: pass". Compile with this option turned-on when you need the speed-up and do not need break checking inside tight loops (ones - that contain only instructions ending with goto fast_next_opcode). + that contain only instructions ending with goto fast_next_opcode). */ goto fast_next_opcode; #else @@ -3510,6 +3510,48 @@ PyEval_GetFuncDesc(PyObject *func) } else { return " object"; } +} + +/* Maximum # of arguments to optimize for. */ +#define _MAX_ARG_COUNT 8 +/* # of tuples to hold on to (cache). Must be a power of 2. */ +#define _NUM_ARG_HOLDERS 64 +static PyObject* _args_holder[_NUM_ARG_HOLDERS]; +static unsigned _last_index = 0; +#if defined(Py_DEBUG) || defined(FUNC_STATS) +/* This must be a power of 2 so it's calculated properly. */ +#define _MAX_ARG_COUNT_STATS 32 +static int _Py_NumArgsCount[_MAX_ARG_COUNT_STATS]; +static int _Py_CachedArgsHit = 0; +static int _Py_CachedArgsMissedSize = 0; +static int _Py_CachedArgsMissedEmpty = 0; +#endif + +int +_PyEval_Init(void) +{ + return 1; +} + +void +_PyEval_Fini(void) +{ + int i; + +#if defined(Py_DEBUG) || defined(FUNC_STATS) + if (Py_GETENV("PYTHONFUNCSTATS")) { + fprintf(stderr, "Hits: %d, Size misses: %d, empty misses: %d\n", + _Py_CachedArgsHit, _Py_CachedArgsMissedSize, + _Py_CachedArgsMissedEmpty); + + fprintf(stdout, "Arg counts:\n"); + for (i = 0; i < _MAX_ARG_COUNT_STATS; i++) + fprintf(stderr, " [%3d] = %d\n", i, _Py_NumArgsCount[i]); + } +#endif + + for (i = 0; i < _NUM_ARG_HOLDERS; i++) + Py_CLEAR(_args_holder[i]); } static void @@ -3579,11 +3621,11 @@ call_function(PyObject ***pp_stack, int if (PyCFunction_Check(func) && nk == 0) { int flags = PyCFunction_GET_FLAGS(func); PyThreadState *tstate = PyThreadState_GET(); + PyCFunction meth = PyCFunction_GET_FUNCTION(func); + PyObject *self = PyCFunction_GET_SELF(func); PCALL(PCALL_CFUNCTION); if (flags & (METH_NOARGS | METH_O)) { - PyCFunction meth = PyCFunction_GET_FUNCTION(func); - PyObject *self = PyCFunction_GET_SELF(func); if (flags & METH_NOARGS && na == 0) { C_TRACE(x, (*meth)(self,NULL)); } @@ -3598,11 +3640,32 @@ call_function(PyObject ***pp_stack, int } } else { + int needs_release; PyObject *callargs; - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); READ_TIMESTAMP(*pintr0); - C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + if ((flags & ~(METH_CLASS | METH_STATIC | METH_COEXIST)) + == METH_VARARGS) { + C_TRACE(x, (*meth)(self,callargs)); + } + else { + C_TRACE(x, PyCFunction_Call(func,callargs,NULL)); + } READ_TIMESTAMP(*pintr1); + if (needs_release) { + _last_index--; + if (callargs->ob_refcnt == 2) { + int i; + for (i = 0; i < na; i++) { + w = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(w); + } + ((PyVarObject*)callargs)->ob_size = 0; + } + else { + _args_holder[_last_index] = NULL; + } + } Py_XDECREF(callargs); } } else { @@ -3613,13 +3676,12 @@ call_function(PyObject ***pp_stack, int PCALL(PCALL_BOUND_METHOD); Py_INCREF(self); func = PyMethod_GET_FUNCTION(func); - Py_INCREF(func); Py_DECREF(*pfunc); *pfunc = self; na++; n++; - } else - Py_INCREF(func); + } + Py_INCREF(func); READ_TIMESTAMP(*pintr0); if (PyFunction_Check(func)) x = fast_function(func, pp_stack, n, na, nk); @@ -3769,13 +3831,44 @@ update_star_args(int nstack, int nstar, } static PyObject * -load_args(PyObject ***pp_stack, int na) -{ - PyObject *args = PyTuple_New(na); +load_args(PyObject ***pp_stack, int na, int *needs_release) +{ + PyObject *args = _args_holder[_last_index]; PyObject *w; - if (args == NULL) - return NULL; +#if defined(Py_DEBUG) || defined(FUNC_STATS) + _Py_NumArgsCount[na & (_MAX_ARG_COUNT_STATS - 1)]++; +#endif + + *needs_release = na < _MAX_ARG_COUNT; + if (*needs_release && args) { +#if defined(Py_DEBUG) || defined(FUNC_STATS) + _Py_CachedArgsHit++; +#endif + ((PyVarObject*)args)->ob_size = na; + Py_INCREF(args); + _last_index = (_last_index + 1) & (_NUM_ARG_HOLDERS - 1); + } + else { + int size = na; + if (size < _MAX_ARG_COUNT) + size = _MAX_ARG_COUNT; +#if defined(Py_DEBUG) || defined(FUNC_STATS) + if (na < _MAX_ARG_COUNT) + _Py_CachedArgsMissedEmpty++; + else + _Py_CachedArgsMissedSize++; +#endif + args = PyTuple_New(size); + if (args == NULL) + return NULL; + if (na < _MAX_ARG_COUNT) { + _args_holder[_last_index] = args; + ((PyVarObject*)args)->ob_size = na; + Py_INCREF(args); + _last_index = (_last_index + 1) & (_NUM_ARG_HOLDERS - 1); + } + } while (--na >= 0) { w = EXT_POP(*pp_stack); PyTuple_SET_ITEM(args, na, w); @@ -3789,13 +3882,14 @@ do_call(PyObject *func, PyObject ***pp_s PyObject *callargs = NULL; PyObject *kwdict = NULL; PyObject *result = NULL; + int needs_release; if (nk > 0) { kwdict = update_keyword_args(NULL, nk, pp_stack, func); if (kwdict == NULL) goto call_fail; } - callargs = load_args(pp_stack, na); + callargs = load_args(pp_stack, na, &needs_release); if (callargs == NULL) goto call_fail; #ifdef CALL_PROFILE @@ -3813,6 +3907,20 @@ do_call(PyObject *func, PyObject ***pp_s PCALL(PCALL_OTHER); #endif result = PyObject_Call(func, callargs, kwdict); + if (needs_release) { + _last_index--; + if (callargs->ob_refcnt == 2) { + int i; + for (i = 0; i < na; i++) { + PyObject *tmp = PyTuple_GET_ITEM(callargs, i); + Py_DECREF(tmp); + } + ((PyVarObject*)callargs)->ob_size = 0; + } + else { + _args_holder[_last_index] = NULL; + } + } call_fail: Py_XDECREF(callargs); Py_XDECREF(kwdict); diff -r 26e7d3935798 Python/pythonrun.c --- a/Python/pythonrun.c Sun Jan 13 16:04:05 2008 +0100 +++ b/Python/pythonrun.c Mon Jan 14 00:19:20 2008 +0100 @@ -59,6 +59,8 @@ static void initsigs(void); static void initsigs(void); static void call_sys_exitfunc(void); static void call_ll_exitfuncs(void); +extern int _PyEval_Init(void); +extern void _PyEval_Fini(void); extern void _PyUnicode_Init(void); extern void _PyUnicode_Fini(void); @@ -189,6 +191,9 @@ Py_InitializeEx(int install_sigs) if (!_PyFrame_Init()) Py_FatalError("Py_Initialize: can't init frames"); + + if (!_PyEval_Init()) + Py_FatalError("Py_Initialize: can't init eval"); if (!_PyInt_Init()) Py_FatalError("Py_Initialize: can't init ints"); @@ -475,6 +480,7 @@ Py_Finalize(void) /* Cleanup Unicode implementation */ _PyUnicode_Fini(); #endif + _PyEval_Fini(); /* XXX Still allocated: - various static ad-hoc pointers to interned strings