Index: Include/ceval.h =================================================================== --- Include/ceval.h (revision 81561) +++ Include/ceval.h (working copy) @@ -22,6 +22,10 @@ PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetThreadProfile(PyThreadState *, + Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetThreadTrace(PyThreadState *, + Py_tracefunc, PyObject *); struct _frame; /* Avoid including frameobject.h */ Index: Lib/test/test_cprofile.py =================================================================== --- Lib/test/test_cprofile.py (revision 81561) +++ Lib/test/test_cprofile.py (working copy) @@ -1,7 +1,11 @@ """Test suite for the cProfile module.""" import sys +import unittest from test.support import run_unittest, TESTFN, unlink +from functools import partial +import threading +import time # rip off all interesting stuff from test_profile import cProfile @@ -28,9 +32,87 @@ sys.stderr = sys.__stderr__ unlink(TESTFN) +class Barrier(object): + def __init__(self, n): + self.cond = threading.Condition() + self.n = n + self.entered = self.released = -1 + def cross(self): + with self.cond: + self.entered += 1 + me = self.entered + if me - self.released >= self.n: + self.released += self.n + self.cond.notify_all() + else: + while me > self.released: + self.cond.wait() + +class ThreadTest(unittest.TestCase): + #simple forwarding functions with different names + def FuncA(self, callee): + return callee() + def FuncB(self, callee): + return callee() + def FuncC(self, callee): + return callee() + + def Sleeper(self): + time.sleep(0) + + def ThreadMain(self, callee, t, barrier): + barrier.cross() + #all threads are awake here + barrier.cross() + #profiler is on here + + t0 = time.clock() + while True: + callee() + if time.clock() - t0 > t: + break + barrier.cross() + #profiler is turned off here + + def GetFunctions(self): + return [partial(self.FuncA, self.Sleeper), + partial(self.FuncB, self.Sleeper), + partial(self.FuncC, self.Sleeper)] + + def StartThreads(self, barrier): + funcs = self.GetFunctions() + signal = [] + threads = [] + for func in funcs: + t = threading.Thread(target=self.ThreadMain, args = (func, 1, barrier)) + t.start() + + def GetData(self): + barrier = Barrier(4) + self.StartThreads(barrier) + barrier.cross() + #all threads are started here. + #enable(allthreads) only enables profiling for already existing threads, so + #we must have all threads of interest ready when we enable profiling + p = cProfile.Profile() + p.enable(allthreads=True) + barrier.cross() + #workers run here + barrier.cross() + p.disable() + p.create_stats() + return p.stats + + def test_Threads(self): + keys = self.GetData().keys() + for f in ["FuncA", "FuncB", "FuncC"]: + found = [key for key in keys if f in key] + self.assertEqual(len(found), 1) + + def test_main(): - run_unittest(CProfileTest) + run_unittest(CProfileTest, ThreadTest) def main(): if '-r' not in sys.argv: Index: Modules/_lsprof.c =================================================================== --- Modules/_lsprof.c (revision 81561) +++ Modules/_lsprof.c (working copy) @@ -98,23 +98,39 @@ typedef struct _ProfilerContext { PY_LONG_LONG t0; PY_LONG_LONG subt; + PY_LONG_LONG paused; struct _ProfilerContext *previous; ProfilerEntry *ctxEntry; + char is_recursion; + char is_subcall_recursion; } ProfilerContext; +typedef struct _ProfilerStack { + rotating_node_t header; + ProfilerContext *currentProfilerContext; + PY_LONG_LONG t0; /* When did stack become non-current? */ +} ProfilerStack; + typedef struct { PyObject_HEAD + rotating_node_t *profilerStacks; rotating_node_t *profilerEntries; - ProfilerContext *currentProfilerContext; + ProfilerStack *currentProfilerStack; ProfilerContext *freelistProfilerContext; - int flags; PyObject *externalTimer; double externalTimerUnit; + PY_LONG_LONG currentTime; + int flags; + int nProfilerStacks; } ProfilerObject; +#define CURRENT_CONTEXT(pObj) \ + ((pObj)->currentProfilerStack->currentProfilerContext) + #define POF_ENABLED 0x001 #define POF_SUBCALLS 0x002 #define POF_BUILTINS 0x004 +#define POF_THREADS 0x008 #define POF_NOMEMORY 0x100 static PyTypeObject PyProfiler_Type; @@ -303,26 +319,62 @@ { RotatingTree_Enum(pObj->profilerEntries, freeEntry, NULL); pObj->profilerEntries = EMPTY_ROTATING_TREE; - /* release the memory hold by the ProfilerContexts */ - if (pObj->currentProfilerContext) { - free(pObj->currentProfilerContext); - pObj->currentProfilerContext = NULL; - } + /* release the memory held by the free list of ProfilerContexts */ while (pObj->freelistProfilerContext) { ProfilerContext *c = pObj->freelistProfilerContext; pObj->freelistProfilerContext = c->previous; free(c); } - pObj->freelistProfilerContext = NULL; } static void +checkRecursion(ProfilerContext *self, int nStacks, long recursionLevel, long subcallRecursionLevel) +{ + ProfilerEntry *entry = self->ctxEntry; + ProfilerContext *p; + self->is_recursion = self->is_subcall_recursion = 0; + if (subcallRecursionLevel > 1) { + ProfilerEntry *previous_entry; + if (nStacks <= 1) { + /* We can trust the sum */ + self->is_recursion = self->is_subcall_recursion = 1; + return; + } + /* possible subcall recursion (and hence recursion) on this stack. */ + previous_entry = self->previous->ctxEntry; + for(p = self->previous; p; p = p->previous) { + if (p->ctxEntry == entry) { + self->is_recursion = 1; + if (p->previous && p->previous->ctxEntry == previous_entry) { + self->is_subcall_recursion = 1; + return; + } + } + } + } + else if (recursionLevel > 1) { + if (nStacks <= 1) { + self->is_recursion = 1; + return; + } + /* possible regular recursion on this stack. Check it. */ + for(p = self->previous; p; p = p->previous) { + if (p->ctxEntry == entry) { + self->is_recursion = 1; + return; + } + } + } +} + +static void initContext(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry) { + long subcallRecursionLevel = 0; self->ctxEntry = entry; - self->subt = 0; - self->previous = pObj->currentProfilerContext; - pObj->currentProfilerContext = self; + self->subt = self->paused = 0; + self->previous = CURRENT_CONTEXT(pObj); + CURRENT_CONTEXT(pObj) = self; ++entry->recursionLevel; if ((pObj->flags & POF_SUBCALLS) && self->previous) { /* find or create an entry for me in my caller's entry */ @@ -331,20 +383,29 @@ if (subentry == NULL) subentry = newSubEntry(pObj, caller, entry); if (subentry) - ++subentry->recursionLevel; + subcallRecursionLevel = ++subentry->recursionLevel; } - self->t0 = CALL_TIMER(pObj); + checkRecursion(self, pObj->nProfilerStacks, entry->recursionLevel, + subcallRecursionLevel); + self->t0 = pObj->currentTime; } static void Stop(ProfilerObject *pObj, ProfilerContext *self, ProfilerEntry *entry) { - PY_LONG_LONG tt = CALL_TIMER(pObj) - self->t0; - PY_LONG_LONG it = tt - self->subt; - if (self->previous) + PY_LONG_LONG tt, it; + + tt = pObj->currentTime - self->t0; + tt -= self->paused; + it = tt - self->subt; + + if (self->previous) { self->previous->subt += tt; - pObj->currentProfilerContext = self->previous; - if (--entry->recursionLevel == 0) + self->previous->paused += self->paused; + } + CURRENT_CONTEXT(pObj) = self->previous; + --entry->recursionLevel; + if (!self->is_recursion) entry->tt += tt; else ++entry->recursivecallcount; @@ -355,7 +416,8 @@ ProfilerEntry *caller = self->previous->ctxEntry; ProfilerSubEntry *subentry = getSubEntry(pObj, caller, entry); if (subentry) { - if (--subentry->recursionLevel == 0) + --subentry->recursionLevel; + if (!self->is_subcall_recursion) subentry->tt += tt; else ++subentry->recursivecallcount; @@ -417,7 +479,7 @@ ProfilerEntry *profEntry; ProfilerContext *pContext; - pContext = pObj->currentProfilerContext; + pContext = CURRENT_CONTEXT(pObj); if (pContext == NULL) return; profEntry = getEntry(pObj, key); @@ -425,17 +487,68 @@ Stop(pObj, pContext, profEntry); } else { - pObj->currentProfilerContext = pContext->previous; + CURRENT_CONTEXT(pObj) = pContext->previous; } /* put pContext into the free list */ pContext->previous = pObj->freelistProfilerContext; pObj->freelistProfilerContext = pContext; } +static void +SelectStackByKey(ProfilerObject *pObj, void *key) +{ + ProfilerStack *old = pObj->currentProfilerStack; + ProfilerStack *stack; + + if (old) { + if (old->header.key == key) + return; + old->t0 = pObj->currentTime; + } + stack = (ProfilerStack*)RotatingTree_Get(&pObj->profilerStacks, key); + if (stack) { + if (stack->currentProfilerContext) + stack->currentProfilerContext->paused += pObj->currentTime - stack->t0; + } + else { + stack = (ProfilerStack*) malloc(sizeof(ProfilerStack)); + if (stack != NULL) { + stack->currentProfilerContext = NULL; + stack->header.key = key; + RotatingTree_Add(&pObj->profilerStacks, (rotating_node_t*)stack); + ++pObj->nProfilerStacks; + } + } + pObj->currentProfilerStack = stack; +} + +static void +SelectStack(ProfilerObject *pObj) +{ + PyThreadState *tstate = PyThreadState_GET(); + SelectStackByKey(pObj, tstate); +} + static int profiler_callback(PyObject *self, PyFrameObject *frame, int what, PyObject *arg) { + ProfilerObject *pObj = (ProfilerObject*)self; + { + /* keep error state, see ptrace_enter_call above. + * We could keep this more focused, only really needed + * when calling a user time function, and initializing + * a user object + */ + PyObject *et, *ev, *tb; + PyErr_Fetch(&et, &ev, &tb); + pObj->currentTime = CALL_TIMER(pObj); + SelectStack(pObj); + PyErr_Restore(et, ev, tb); + } + if (pObj->currentProfilerStack == NULL) + return 0; + switch (what) { /* the 'frame' of a called function is about to start its execution */ @@ -678,14 +791,43 @@ return 0; } +/* turn tracing on or off, either for the current thread + * or for all threads in all interpreter states. + * Using all threads ensures that no time will leak. + */ +static void +set_profile(int threads, Py_tracefunc func, PyObject *arg) +{ + PyInterpreterState *istate; + if (!threads) { + PyEval_SetProfile(func, arg); + return; + } + istate = PyInterpreterState_Head(); + for(istate = PyInterpreterState_Head(); + istate; + istate = PyInterpreterState_Next(istate)) + { + PyThreadState *tstate; + for(tstate = PyInterpreterState_ThreadHead(istate); + tstate; + tstate = PyThreadState_Next(tstate)) + { + PyEval_SetThreadProfile(tstate, func, arg); + } + } +} + PyDoc_STRVAR(enable_doc, "\ -enable(subcalls=True, builtins=True)\n\ +enable(subcalls=True, builtins=True, global=False)\n\ \n\ Start collecting profiling information.\n\ If 'subcalls' is True, also records for each function\n\ statistics separated according to its current caller.\n\ If 'builtins' is True, records the time spent in\n\ built-in functions separately from their caller.\n\ +If 'global' is True, the profiling data is collected from\n\ +all threads.\n\ "); static PyObject* @@ -693,34 +835,59 @@ { int subcalls = -1; int builtins = -1; - static char *kwlist[] = {"subcalls", "builtins", 0}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|ii:enable", - kwlist, &subcalls, &builtins)) + int threads = 0; + static char *kwlist[] = {"subcalls", "builtins", "allthreads", 0}; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|iii:enable", + kwlist, &subcalls, &builtins, &threads)) return NULL; if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0) return NULL; - PyEval_SetProfile(profiler_callback, (PyObject*)self); + set_profile(threads, profiler_callback, (PyObject *)self); self->flags |= POF_ENABLED; + if (threads) + self->flags |= POF_THREADS; Py_INCREF(Py_None); return Py_None; } static void -flush_unmatched(ProfilerObject *pObj) +flush_unmatched_current(ProfilerObject *pObj) { - while (pObj->currentProfilerContext) { - ProfilerContext *pContext = pObj->currentProfilerContext; + if (!pObj->currentProfilerStack) + return; + while (CURRENT_CONTEXT(pObj)) { + ProfilerContext *pContext = CURRENT_CONTEXT(pObj); ProfilerEntry *profEntry= pContext->ctxEntry; if (profEntry) Stop(pObj, pContext, profEntry); else - pObj->currentProfilerContext = pContext->previous; + CURRENT_CONTEXT(pObj) = pContext->previous; if (pContext) free(pContext); } } +static int +flush_unmatched_enum(rotating_node_t *n, void *arg) +{ + ProfilerStack *t = (ProfilerStack*)n; + ProfilerObject *pObj = (ProfilerObject*)arg; + pObj->currentProfilerStack = t; + flush_unmatched_current(pObj); + free(t); + return 0; +} + +static void +flush_unmatched(ProfilerObject *pObj) +{ + RotatingTree_Enum(pObj->profilerStacks, &flush_unmatched_enum, pObj); + pObj->profilerStacks = EMPTY_ROTATING_TREE; + pObj->currentProfilerStack = NULL; + pObj->nProfilerStacks = 0; +} + PyDoc_STRVAR(disable_doc, "\ disable()\n\ \n\ @@ -730,8 +897,9 @@ static PyObject* profiler_disable(ProfilerObject *self, PyObject* noarg) { - self->flags &= ~POF_ENABLED; - PyEval_SetProfile(NULL, NULL); + int threads = self->flags & POF_THREADS; + self->flags &= ~(POF_ENABLED | POF_THREADS); + set_profile(threads, NULL, NULL); flush_unmatched(self); if (pending_exception(self)) return NULL; @@ -748,6 +916,7 @@ static PyObject* profiler_clear(ProfilerObject *pObj, PyObject* noarg) { + flush_unmatched(pObj); clearEntries(pObj); Py_INCREF(Py_None); return Py_None; @@ -757,7 +926,7 @@ profiler_dealloc(ProfilerObject *op) { if (op->flags & POF_ENABLED) - PyEval_SetProfile(NULL, NULL); + set_profile(op->flags & POF_THREADS, NULL, NULL); flush_unmatched(op); clearEntries(op); Py_XDECREF(op->externalTimer); @@ -902,4 +1071,4 @@ empty_tuple = PyTuple_New(0); initialized = 1; return module; -} +} \ No newline at end of file Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 81561) +++ Python/ceval.c (working copy) @@ -3626,6 +3626,20 @@ PyEval_SetProfile(Py_tracefunc func, PyObject *arg) { PyThreadState *tstate = PyThreadState_GET(); + PyEval_SetThreadProfile(tstate, func, arg); +} + +void +PyEval_SetTrace(Py_tracefunc func, PyObject *arg) +{ + PyThreadState *tstate = PyThreadState_GET(); + PyEval_SetThreadTrace(tstate, func, arg); +} + +void +PyEval_SetThreadProfile(PyThreadState *tstate, + Py_tracefunc func, PyObject *arg) +{ PyObject *temp = tstate->c_profileobj; Py_XINCREF(arg); tstate->c_profilefunc = NULL; @@ -3640,9 +3654,9 @@ } void -PyEval_SetTrace(Py_tracefunc func, PyObject *arg) +PyEval_SetThreadTrace(PyThreadState *tstate, + Py_tracefunc func, PyObject *arg) { - PyThreadState *tstate = PyThreadState_GET(); PyObject *temp = tstate->c_traceobj; _Py_TracingPossible += (func != NULL) - (tstate->c_tracefunc != NULL); Py_XINCREF(arg);