diff -r e0531ff61e8f .hgtags --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgtags Sun Mar 28 08:32:51 2010 +0300 @@ -0,0 +1,1 @@ +a0fa2e6ded5738b0e11eca623f7f15e778e2dd19 fast-reschedule diff -r e0531ff61e8f Include/mutexed.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Include/mutexed.h Sun Mar 28 08:32:51 2010 +0300 @@ -0,0 +1,150 @@ +/* + * Mutex related macros for the BFS scheduler - taken from ceval_gil.h. + */ + +#include + +#ifndef _POSIX_THREADS +/* This means pthreads are not implemented in libc headers, hence the macro + not present in unistd.h. But they still can be implemented as an external + library (e.g. gnu pth in pthread emulation) */ +# ifdef HAVE_PTHREAD_H +# include /* _POSIX_THREADS */ +# endif +#endif + +#ifdef _POSIX_THREADS + +/* + * POSIX support + */ + +#include + +#define ADD_MICROSECONDS(tv, interval) \ +do { \ + tv.tv_usec += (long) interval; \ + tv.tv_sec += tv.tv_usec / 1000000; \ + tv.tv_usec %= 1000000; \ +} while (0) + +/* We assume all modern POSIX systems have gettimeofday() */ +#ifdef GETTIMEOFDAY_NO_TZ +#define GETTIMEOFDAY(ptv) gettimeofday(ptv) +#else +#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL) +#endif + +#define MUTEX_T pthread_mutex_t +#define MUTEX_INIT(mut) \ + if (pthread_mutex_init(&mut, NULL)) { \ + Py_FatalError("pthread_mutex_init(" #mut ") failed"); }; +#define MUTEX_LOCK(mut) \ + if (pthread_mutex_lock(&mut)) { \ + Py_FatalError("pthread_mutex_lock(" #mut ") failed"); }; +#define MUTEX_UNLOCK(mut) \ + if (pthread_mutex_unlock(&mut)) { \ + Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); }; + +#define COND_T pthread_cond_t +#define COND_INIT(cond) \ + if (pthread_cond_init(&cond, NULL)) { \ + Py_FatalError("pthread_cond_init(" #cond ") failed"); }; +#define COND_DESTROY(cond) \ + if (pthread_cond_destroy(&cond)) { \ + Py_FatalError("pthread_cond_destroy(" #cond ") failed"); }; +#define COND_RESET(cond) +#define COND_SIGNAL(cond) \ + if (pthread_cond_signal(&cond)) { \ + Py_FatalError("pthread_cond_signal(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + if (pthread_cond_wait(&cond, &mut)) { \ + Py_FatalError("pthread_cond_wait(" #cond ") failed"); }; +#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \ + { \ + int r; \ + struct timespec ts; \ + struct timeval deadline; \ + \ + GETTIMEOFDAY(&deadline); \ + ADD_MICROSECONDS(deadline, microseconds); \ + ts.tv_sec = deadline.tv_sec; \ + ts.tv_nsec = deadline.tv_usec * 1000; \ + \ + r = pthread_cond_timedwait(&cond, &mut, &ts); \ + if (r == ETIMEDOUT) \ + timeout_result = 1; \ + else if (r) \ + Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \ + else \ + timeout_result = 0; \ + } \ + +#elif defined(NT_THREADS) + +/* + * Windows (2000 and later, as well as (hopefully) CE) support + */ + +#include + +/* Use critical section since it is significantly faster than a mutex object. + * It should be enough for the needs of the BFS scheduler since on any + * signaled event there is exactly one waiting thread. However the macros + * are not suited for the general case so the code should probably move + * somewhere else. */ + +#define MUTEX_T CRITICAL_SECTION +#define MUTEX_INIT(mut) \ + InitializeCriticalSection(&mut); +#define MUTEX_LOCK(mut) \ + EnterCriticalSection(&mut); +#define MUTEX_UNLOCK(mut) \ + LeaveCriticalSection(&mut); + +#undef COND_T +#define COND_T HANDLE +#define COND_INIT(cond) \ + /* auto-reset, non-signalled */ \ + if (!(cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { \ + Py_FatalError("CreateMutex(" #cond ") failed"); }; +#define COND_DESTROY(mut) \ + if (!CloseHandle(mut)) { \ + Py_FatalError("CloseHandle(" #mut ") failed"); }; +#define COND_RESET(cond) \ + if (!ResetEvent(cond)) { \ + Py_FatalError("ResetEvent(" #cond ") failed"); }; +#define COND_SIGNAL(cond) \ + if (!SetEvent(cond)) { \ + Py_FatalError("SetEvent(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + { \ + MUTEX_UNLOCK(mut); \ + if (WaitForSingleObject(cond, INFINITE) != WAIT_OBJECT_0) \ + Py_FatalError("WaitForSingleObject(" #cond ") failed"); \ + MUTEX_LOCK(mut); \ + } +#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \ + { \ + DWORD r; \ + MUTEX_UNLOCK(mut); \ + r = WaitForSingleObject(cond, microseconds / 1000); \ + if (r == WAIT_TIMEOUT) { \ + MUTEX_LOCK(mut); \ + timeout_result = 1; \ + } \ + else if (r != WAIT_OBJECT_0) \ + Py_FatalError("WaitForSingleObject(" #cond ") failed"); \ + else { \ + MUTEX_LOCK(mut); \ + timeout_result = 0; \ + } \ + } + +#else + +#error You need either a POSIX-compatible or a Windows system! + +#endif /* _POSIX_THREADS, NT_THREADS */ + + diff -r e0531ff61e8f Include/pystate.h --- a/Include/pystate.h Mon Mar 22 03:53:52 2010 +0100 +++ b/Include/pystate.h Sun Mar 28 08:32:51 2010 +0300 @@ -8,6 +8,21 @@ extern "C" { #endif +#include "pythread.h" + +/* Temporarily work around cross platform (nt/posix) compilation issues. + * Can't include mutexed.h directly on NT since it includes windows.h + * and breaks compliation solution-wide. */ + +#undef COND_T +#ifdef MS_WINDOWS +typedef void *COND_HANDLE; +#define COND_T COND_HANDLE +#else +#include "mutexed.h" +#endif + + /* State shared between threads */ struct _ts; /* Forward */ @@ -55,6 +70,46 @@ #define PyTrace_C_EXCEPTION 5 #define PyTrace_C_RETURN 6 +#define CONTAINER(type, member, ptr) \ + ((type *)((char *)(ptr) - offsetof(type, member))) + +struct _list { + struct _list *next; + struct _list *prev; +}; + +/* Py_LOCAL_INLINE() does not work - see issue bugs.python.org/issue5553 */ + +#ifdef MS_WINDOWS +#define _LOCAL(type) static type +#else +#define _LOCAL(type) static inline type +#endif + +_LOCAL(void) _list_init(struct _list *l) { + l->next = l; + l->prev = l; +} + +_LOCAL(int) _list_empty(struct _list *l) { + return l->next == l; +} + +_LOCAL(void) _list_append(struct _list *l, struct _list *item) { + struct _list *tail = l->prev; + tail->next = item; + item->prev = tail; + item->next = l; + l->prev = item; +} + +_LOCAL(void) _list_pop(struct _list *item) { + item->next->prev = item->prev; + item->prev->next = item->next; + item->prev = NULL; + item->next = NULL; +} + typedef struct _ts { /* See Python/ceval.c for comments explaining most fields */ @@ -88,8 +143,6 @@ PyObject *dict; /* Stores per-thread state */ - /* XXX doesn't mean anything anymore (the comment below is obsolete) - => deprecate or remove? */ /* tick_counter is incremented whenever the check_interval ticker * reaches zero. The purpose is to give a useful measure of the number * of interpreted bytecode instructions in a given thread. This @@ -98,15 +151,21 @@ */ int tick_counter; + COND_T bfs_cond; + struct _list bfs_list; + long double bfs_slice; + long double bfs_deadline; + long double bfs_timestamp; + int gilstate_counter; PyObject *async_exc; /* Asynchronous exception to raise */ long thread_id; /* Thread id where this tstate was created */ /* XXX signal handlers should also be here */ - } PyThreadState; +#define THREAD_STATE(ptr) CONTAINER(PyThreadState, bfs_list, ptr) PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); diff -r e0531ff61e8f Python/bfs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Python/bfs.c Sun Mar 28 08:32:51 2010 +0300 @@ -0,0 +1,341 @@ +/* + * Python/bfs.c + * + * Simplified implementation of the Brain F**k Scheduler by Con Kolivas + * http://ck.kolivas.org/patches/bfs/sched-BFS.txt + * + * "The goal of the Brain F**k Scheduler, referred to as BFS from here on, is to + * completely do away with the complex designs of the past for the cpu process + * scheduler and instead implement one that is very simple in basic design. + * The main focus of BFS is to achieve excellent desktop interactivity and + * responsiveness without heuristics and tuning knobs that are difficult to + * understand, impossible to model and predict the effect of, and when tuned to + * one workload cause massive detriment to another." - Con Kolivas + * + * Notes: + * The following is a straight forward implementation of Con's basic design + * as described in the link above. However, there are several gotchas specific + * to Python which needed working around. + * + * 1) Clock - Except for Linux, most platforms do not provide an API to query + * thread's running time with high precision. Therefore timing is done with a + * wall clock. The scheduler seems to behave reasonably with a wall clock even + * under load, probably since the OS scheduler does not tend to preempt IO + * bound threads. + * + * 2) Precision - The code assumes good clock precision (better than 1ms). + * It is probably a good idea to handle the case where thread running time + * until voluntary yield to scheduler was un-measurable (0) by low precision + * clock. + * + * 3) Slow ticks - Querying the clock between ticks is too expensive, so it + * is done once every 1000 ticks. However, since even a single slow tick can + * deplete a slice (10ms), a special flag exists (bfs_check_depleted) which + * is set by waiting threads and which forces the running thread to check + * its remaining slice by the next tick. + * + * 4) Fast reschedule - On a system with thread-running-time clock (e.g. + * Linux) it is possible to add a nice trick which allows a thread to yield + * and reschedule without switching to another thread if done quickly enough. + * (For example zlib.compress(some-small-value)). Currently the thread + * will schedule a new thread when it yields and then preempt it on + * rescheduling (if justified), which is reasonable but more expensive. + * The reason a thread-running-time clock is needed is that this mechanism + * can cause an IO bound thread behave as a CPU bound thread, and then it + * will loose the OS 'protection' (see discussion about clock above). + * The code for this trick is left out of this version for the sake of + * keeping it simple initially :). This technique noticeably boosts + * performance. + */ + + +#include "time.h" +#include "mutexed.h" + + +/* Round robin interval - thread time slice for running (in seconds). + * It is set at 10ms to match Windows tick resolution. */ +static const long double rr_interval = 0.010; + +/* Magic number taken from Con's implementation. There it is computed as + * 1.1 ^ 20 (nice level of regular thread). Basically it controls the load + * beyond which threads tend to expire their deadline. Expired threads are + * handled in FIFO order, which means that at such load IO bound threads + * will no longer preempt running threads. */ +#define DEADLINE_FACTOR 6 + +/* Protect access to data structures. */ +static MUTEX_T bfs_mutex; + +/* List all threads waiting for schedule. */ +static struct _list bfs_rq; + +/* Flag threads are waiting for schedule */ +static volatile int bfs_threads_waiting = 0; + +/* Point to current and previous running thread. */ +static volatile PyThreadState *bfs_thread = NULL; + +/* Flag currently running thread to yield immediately. */ +static volatile int bfs_preempt = 0; + +/* Flag currently running thread to check remaining slice immediately. + * This is required since even a single slow Python tick can deplete + * a 10ms slice, let alone 1000 ticks. */ +static volatile int bfs_check_depleted = 0; + +static int bfs_initialized = 0; + + +#undef VERBOSE +#ifdef VERBOSE +#define TRACE(v) printf v +#else +#define TRACE(v) +#endif + + +#ifdef MS_WINDOWS + +/* Return system wide timestamp in seconds (with usec precision). */ +static long double get_timestamp(void) { + static LARGE_INTEGER ctrStart; + static long double divisor = 0.0; + LARGE_INTEGER now; + + if (divisor == 0.0) { + LARGE_INTEGER freq; + QueryPerformanceCounter(&ctrStart); + QueryPerformanceFrequency(&freq); + divisor = (long double) freq.QuadPart; + } + + QueryPerformanceCounter(&now); + return (now.QuadPart - ctrStart.QuadPart) / divisor; +} + +#elif defined(HAVE_GETTIMEOFDAY) + +/* Return system wide timestamp in seconds (with usec precision). */ +static inline long double get_timestamp(void) { + struct timeval tv; + GETTIMEOFDAY(&tv); + return (long double) tv.tv_sec + tv.tv_usec * 0.000001; +} + +#else + +#error You need either a POSIX-compatible or a Windows system! + +#endif /* MS_WINDOWS, HAVE_GETTIMEOFDAY */ + +#define GET_TIMESTAMP() (timestamp? timestamp: get_timestamp()) + + +static void bfs_init(void) { + MUTEX_INIT(bfs_mutex); + _list_init(&bfs_rq); + bfs_initialized = 1; +} + + +/* Pick next thread to schedule based on earliest deadline. */ +static PyThreadState *bfs_find_task(long double timestamp) { + struct _list *item; + PyThreadState *task; + + if (_list_empty(&bfs_rq)) + return NULL; + + timestamp = GET_TIMESTAMP(); + item = bfs_rq.next; + task = THREAD_STATE(item); + + /* Search for thread with earliest deadline or first expired deadline. + * IO bound threads typically have expired deadlines since they naturally + * take longer than their original deadline to deplete their slice. */ + for (item = item->next; item != &bfs_rq && task->bfs_deadline > timestamp; item = item->next) { + PyThreadState *tstate = THREAD_STATE(item); + if (tstate->bfs_deadline < task->bfs_deadline) { + task = tstate; + } + } + + return task; +} + + +/* Wait for currently running thread to signal this thread (tstate) + * to take over the schedule. Call with locked mutex */ +static void _bfs_timed_wait(PyThreadState *tstate) { + /* Use timeout to guard against slow ticks of running thread, but + * multiply by number of waiting threads to prevent growing number of + * context switches. */ + int timeout = (int) (bfs_threads_waiting * rr_interval * 1000000 / 2); + int timed_out = 0; + + COND_TIMED_WAIT(tstate->bfs_cond, bfs_mutex, timeout, timed_out); + + /* Flag running thread to check slice depletion immediately. it has possibly + * been doing slow ticks (e.g. regular expression searches) and depleted its + * slice. */ + if (timed_out) { + bfs_check_depleted = 1; + COMPUTE_EVAL_BREAKER(); + } +} + + +/* Schedule (tstate) thread for running. */ +static void bfs_schedule(PyThreadState *tstate) { +#ifdef VERBOSE + long double _ts = get_timestamp(); + long double _slice = tstate->bfs_slice; + long double _deadline = tstate->bfs_deadline; +#endif + + /* Minimize number of timestamp queries. */ + long double timestamp = 0; + + /* Refill depleted slice and reset scheduling deadline. */ + if (tstate->bfs_slice <= 0) { + tstate->bfs_slice = rr_interval; + timestamp = GET_TIMESTAMP(); + tstate->bfs_deadline = timestamp + rr_interval * DEADLINE_FACTOR; + } + + MUTEX_LOCK(bfs_mutex); + + TRACE(("SCHD %p - %Lf, slice=%Lf, deadline-in=%Lf, deadline-on=%Lf\n", tstate, _ts, _slice, _deadline - _ts, tstate->bfs_deadline)); + + /* Schedule immediately if no thread is currently running. */ + if (bfs_thread == NULL) { + bfs_thread = tstate; + } + /* Else enqueue for future scheduling */ + else { + _list_append(&bfs_rq, &tstate->bfs_list); + bfs_threads_waiting++; + + timestamp = GET_TIMESTAMP(); + + /* Flag running thread to check depletion of its slice. */ + if (bfs_thread->bfs_slice <= timestamp - bfs_thread->bfs_timestamp) { + bfs_check_depleted = 1; + COMPUTE_EVAL_BREAKER(); + } + + /* Preempt running thread if its deadline is less urgent, unless + * its deadline has already expired, in which case handle + * request as FIFO. */ + if (tstate->bfs_deadline < bfs_thread->bfs_deadline && + bfs_thread->bfs_deadline >= timestamp) { + bfs_preempt = 1; + COMPUTE_EVAL_BREAKER(); + } + + /* Wait until schedule. */ + while (bfs_thread != tstate) { + _bfs_timed_wait(tstate); + } + + /* Reset timestamp so it is queried again below. */ + timestamp = 0; + } + + TRACE(("TAKE %p - %Lf\n", tstate, get_timestamp())); + + MUTEX_UNLOCK(bfs_mutex); + + timestamp = GET_TIMESTAMP(); + tstate->bfs_timestamp = timestamp; +} + + +/* Yield schedule to another thread. */ +static void bfs_yield(PyThreadState *tstate) { + /* Minimize number of timestamp queries. */ + long double timestamp = 0; + + PyThreadState *task; + + /* Update running time slice (book keeping). */ + if (tstate != NULL && tstate->bfs_slice > 0) { + timestamp = GET_TIMESTAMP(); + tstate->bfs_slice -= timestamp - tstate->bfs_timestamp; + } + + MUTEX_LOCK(bfs_mutex); + + TRACE(("DROP %p - %Lf\n", tstate, get_timestamp())); + + /* Reset preemption flags - we heard the shout. */ + bfs_preempt = 0; + bfs_check_depleted = 0; + COMPUTE_EVAL_BREAKER(); + + /* Find earliest deadline thread to run */ + task = bfs_find_task(timestamp); + bfs_thread = task; + + /* Schedule found task to run. */ + if (task != NULL) { + _list_pop(&task->bfs_list); + bfs_threads_waiting--; + if (!bfs_threads_waiting) { + RESET_TICKS(); + } + } + + MUTEX_UNLOCK(bfs_mutex); + + /* Signal outside the mutex, may reduce contention (at least on Windows) + * and is OK since exactly one thread is waiting on a condition. */ + if (task != NULL) { + COND_SIGNAL(task->bfs_cond); + } +} + + + +/* Check if thread depleted its running time slice. */ +_LOCAL(int) bfs_depleted_slice(PyThreadState *tstate) { + /* A reading is about 1 usec on a modern CPU. */ + return tstate->bfs_slice <= get_timestamp() - tstate->bfs_timestamp; +} + + +/* Propose scheduler to switch to a different thread - a la cooperative + * multitasking. */ +_LOCAL(void) bfs_checkpoint(PyThreadState *tstate) { + TRACE(("CHCK %p - %Lf, preempt=%d, check_deplated=%d\n", tstate, get_timestamp(), bfs_preempt, bfs_check_depleted)); + + bfs_check_depleted = 0; + COMPUTE_EVAL_BREAKER(); + + if (bfs_preempt || bfs_depleted_slice(tstate)) { + bfs_yield(tstate); + bfs_schedule(tstate); + } +} + + +/* Cancel thread request for schedule. + * Assumes canceled thread is not running or being scheduled concurrently.*/ +void bfs_cancel(PyThreadState *tstate) { + if (tstate->bfs_list.next == NULL) + return; + + MUTEX_LOCK(bfs_mutex); + + if (bfs_thread == tstate) + Py_FatalError("Attempt to cancel scheduled thread."); + + if (tstate->bfs_list.next != NULL) { + _list_pop(&tstate->bfs_list); + } + + MUTEX_UNLOCK(bfs_mutex); +} + + diff -r e0531ff61e8f Python/ceval.c --- a/Python/ceval.c Mon Mar 22 03:53:52 2010 +0100 +++ b/Python/ceval.c Sun Mar 28 08:32:51 2010 +0300 @@ -217,13 +217,7 @@ #define COMPUTE_EVAL_BREAKER() \ - (eval_breaker = gil_drop_request | pendingcalls_to_do | pending_async_exc) - -#define SET_GIL_DROP_REQUEST() \ - do { gil_drop_request = 1; eval_breaker = 1; } while (0) - -#define RESET_GIL_DROP_REQUEST() \ - do { gil_drop_request = 0; COMPUTE_EVAL_BREAKER(); } while (0) + (eval_breaker = bfs_preempt | bfs_check_depleted | pendingcalls_to_do | pending_async_exc) #define SIGNAL_PENDING_CALLS() \ do { pendingcalls_to_do = 1; eval_breaker = 1; } while (0) @@ -247,34 +241,44 @@ static PyThread_type_lock pending_lock = 0; /* for pending calls */ static long main_thread = 0; +static int threads_init = 0; /* This single variable consolidates all requests to break out of the fast path in the eval loop. */ static volatile int eval_breaker = 0; -/* Request for droppping the GIL */ -static volatile int gil_drop_request = 0; /* Request for running pending calls */ static volatile int pendingcalls_to_do = 0; /* Request for looking at the `async_exc` field of the current thread state */ static volatile int pending_async_exc = 0; -#include "ceval_gil.h" +static volatile int ticks = 2; + +#define NTH_TICK(i) (ticks % 1000 == i) +#define RESET_TICKS() ticks = 2 + +#include "bfs.c" + +#define EVAL_BREAKER() (eval_breaker) int PyEval_ThreadsInitialized(void) { - return gil_created(); + return threads_init == 1; } void PyEval_InitThreads(void) { - if (gil_created()) - return; - create_gil(); - take_gil(PyThreadState_GET()); + if (PyEval_ThreadsInitialized()) + return; + + bfs_init(); + bfs_schedule(PyThreadState_GET()); + main_thread = PyThread_get_thread_ident(); if (!pending_lock) pending_lock = PyThread_allocate_lock(); + + threads_init = 1; } void @@ -283,7 +287,8 @@ PyThreadState *tstate = PyThreadState_GET(); if (tstate == NULL) Py_FatalError("PyEval_AcquireLock: current thread state is NULL"); - take_gil(tstate); + + bfs_schedule(tstate); } void @@ -293,7 +298,7 @@ We therefore avoid PyThreadState_GET() which dumps a fatal error in debug mode. */ - drop_gil(_PyThreadState_Current); + bfs_yield(_PyThreadState_Current); } void @@ -301,9 +306,11 @@ { if (tstate == NULL) Py_FatalError("PyEval_AcquireThread: NULL new thread state"); - /* Check someone has called PyEval_InitThreads() to create the lock */ - assert(gil_created()); - take_gil(tstate); + + /* Check someone has called PyEval_InitThreads() to create the lock */ + assert(PyEval_ThreadsInitialized()); + + bfs_schedule(tstate); if (PyThreadState_Swap(tstate) != NULL) Py_FatalError( "PyEval_AcquireThread: non-NULL old thread state"); @@ -316,7 +323,8 @@ Py_FatalError("PyEval_ReleaseThread: NULL thread state"); if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("PyEval_ReleaseThread: wrong thread state"); - drop_gil(tstate); + + bfs_yield(tstate); } /* This function is called from PyOS_AfterFork to ensure that newly @@ -330,15 +338,17 @@ PyObject *threading, *result; PyThreadState *tstate = PyThreadState_GET(); - if (!gil_created()) - return; + if (!PyEval_ThreadsInitialized()) + return; + /*XXX Can't use PyThread_free_lock here because it does too much error-checking. Doing this cleanly would require adding a new function to each thread_*.h. Instead, just create a new lock and waste a little bit of memory */ - recreate_gil(); + bfs_init(); + pending_lock = PyThread_allocate_lock(); - take_gil(tstate); + bfs_schedule(tstate); main_thread = PyThread_get_thread_ident(); /* Update the threading module with the new state. @@ -361,8 +371,10 @@ #else static int eval_breaker = 0; -static int gil_drop_request = 0; static int pending_async_exc = 0; + +#define EVAL_BREAKER() (eval_breaker) + #endif /* WITH_THREAD */ /* This function is used to signal that async exceptions are waiting to be @@ -384,11 +396,14 @@ PyThreadState *tstate = PyThreadState_Swap(NULL); if (tstate == NULL) Py_FatalError("PyEval_SaveThread: NULL tstate"); -#ifdef WITH_THREAD - if (gil_created()) - drop_gil(tstate); -#endif - return tstate; + +#ifdef WITH_THREAD + if (PyEval_ThreadsInitialized()) { + bfs_yield(tstate); + } +#endif + + return tstate; } void @@ -396,14 +411,16 @@ { if (tstate == NULL) Py_FatalError("PyEval_RestoreThread: NULL tstate"); -#ifdef WITH_THREAD - if (gil_created()) { + +#ifdef WITH_THREAD + if (PyEval_ThreadsInitialized()) { int err = errno; - take_gil(tstate); + bfs_schedule(tstate); errno = err; } #endif - PyThreadState_Swap(tstate); + + PyThreadState_Swap(tstate); } @@ -843,7 +860,7 @@ #define DISPATCH() \ { \ - if (!eval_breaker) { \ + if (!eval_breaker && !NTH_TICK(0)) { \ FAST_DISPATCH(); \ } \ continue; \ @@ -1217,7 +1234,12 @@ async I/O handler); see Py_AddPendingCall() and Py_MakePendingCalls() above. */ - if (eval_breaker) { + /* gcc will segfault on an attempt to increment ticks without + * the if clause. */ + if (bfs_threads_waiting) + ticks++; + + if (EVAL_BREAKER() || NTH_TICK(1)) { if (*next_instr == SETUP_FINALLY) { /* Make the last opcode before a try: finally: block uninterruptable. */ @@ -1233,20 +1255,18 @@ goto on_error; } } - if (gil_drop_request) { -#ifdef WITH_THREAD - /* Give another thread a chance */ - if (PyThreadState_Swap(NULL) != tstate) - Py_FatalError("ceval: tstate mix-up"); - drop_gil(tstate); - - /* Other threads may run now */ - - take_gil(tstate); - if (PyThreadState_Swap(tstate) != NULL) - Py_FatalError("ceval: orphan tstate"); -#endif - } + +#ifdef WITH_THREAD + /* Give another thread a chance */ + if (PyThreadState_Swap(NULL) != tstate) + Py_FatalError("ceval: tstate mix-up"); + + bfs_checkpoint(tstate); + + if (PyThreadState_Swap(tstate) != NULL) + Py_FatalError("ceval: orphan tstate"); +#endif + /* Check for asynchronous exceptions. */ if (tstate->async_exc != NULL) { x = tstate->async_exc; diff -r e0531ff61e8f Python/pystate.c --- a/Python/pystate.c Mon Mar 22 03:53:52 2010 +0100 +++ b/Python/pystate.c Sun Mar 28 08:32:51 2010 +0300 @@ -2,6 +2,7 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" +#include "mutexed.h" /* -------------------------------------------------------------------------- CAUTION @@ -198,6 +199,13 @@ tstate->c_profileobj = NULL; tstate->c_traceobj = NULL; + COND_INIT(tstate->bfs_cond); + tstate->bfs_list.next = NULL; + tstate->bfs_list.prev = NULL; + tstate->bfs_slice = 0; + tstate->bfs_deadline = 0; + tstate->bfs_timestamp = 0; + if (init) _PyThreadState_Init(tstate); @@ -271,8 +279,8 @@ if (Py_VerboseFlag && tstate->frame != NULL) fprintf(stderr, "PyThreadState_Clear: warning: thread still has a frame\n"); - - Py_CLEAR(tstate->frame); + + Py_CLEAR(tstate->frame); Py_CLEAR(tstate->dict); Py_CLEAR(tstate->async_exc); @@ -304,7 +312,8 @@ interp = tstate->interp; if (interp == NULL) Py_FatalError("PyThreadState_Delete: NULL interp"); - HEAD_LOCK(); + + HEAD_LOCK(); for (p = &interp->tstate_head; ; p = &(*p)->next) { if (*p == NULL) Py_FatalError( @@ -345,6 +354,10 @@ #ifdef WITH_THREAD + +/* Defined in Python/bfs.c */ +PyAPI_FUNC(void) bfs_cancel(PyThreadState *tstate); + void PyThreadState_DeleteCurrent() { @@ -354,6 +367,11 @@ "PyThreadState_DeleteCurrent: no current tstate"); _PyThreadState_Current = NULL; tstate_delete_common(tstate); + + bfs_cancel(tstate); + COND_SIGNAL(tstate->bfs_cond); + COND_DESTROY(tstate->bfs_cond); + if (autoTLSkey && PyThread_get_key_value(autoTLSkey) == tstate) PyThread_delete_key_value(autoTLSkey); PyEval_ReleaseLock(); diff -r e0531ff61e8f Python/sysmodule.c --- a/Python/sysmodule.c Mon Mar 22 03:53:52 2010 +0100 +++ b/Python/sysmodule.c Sun Mar 28 08:32:51 2010 +0300 @@ -490,7 +490,7 @@ static PyObject * sys_setswitchinterval(PyObject *self, PyObject *args) { - double d; + /*double d; if (!PyArg_ParseTuple(args, "d:setswitchinterval", &d)) return NULL; if (d <= 0.0) { @@ -498,7 +498,7 @@ "switch interval must be strictly positive"); return NULL; } - _PyEval_SetSwitchInterval((unsigned long) (1e6 * d)); + _PyEval_SetSwitchInterval((unsigned long) (1e6 * d));*/ Py_INCREF(Py_None); return Py_None; } @@ -518,7 +518,7 @@ static PyObject * sys_getswitchinterval(PyObject *self, PyObject *args) { - return PyFloat_FromDouble(1e-6 * _PyEval_GetSwitchInterval()); + return PyFloat_FromDouble(1000.0);//1e-6 * _PyEval_GetSwitchInterval()); } PyDoc_STRVAR(getswitchinterval_doc,