diff -r e0531ff61e8f .hgtags
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgtags	Sun Mar 28 08:32:51 2010 +0300
@@ -0,0 +1,1 @@
+a0fa2e6ded5738b0e11eca623f7f15e778e2dd19 fast-reschedule
diff -r e0531ff61e8f Include/mutexed.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Include/mutexed.h	Sun Mar 28 08:32:51 2010 +0300
@@ -0,0 +1,150 @@
+/*
+ * Mutex related macros for the BFS scheduler - taken from ceval_gil.h.
+ */
+
+#include <stdlib.h>
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
+
+#ifdef _POSIX_THREADS
+
+/*
+ * POSIX support
+ */
+
+#include <pthread.h>
+
+#define ADD_MICROSECONDS(tv, interval) \
+do { \
+    tv.tv_usec += (long) interval; \
+    tv.tv_sec += tv.tv_usec / 1000000; \
+    tv.tv_usec %= 1000000; \
+} while (0)
+
+/* We assume all modern POSIX systems have gettimeofday() */
+#ifdef GETTIMEOFDAY_NO_TZ
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv)
+#else
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
+#endif
+
+#define MUTEX_T pthread_mutex_t
+#define MUTEX_INIT(mut) \
+    if (pthread_mutex_init(&mut, NULL)) { \
+        Py_FatalError("pthread_mutex_init(" #mut ") failed"); };
+#define MUTEX_LOCK(mut) \
+    if (pthread_mutex_lock(&mut)) { \
+        Py_FatalError("pthread_mutex_lock(" #mut ") failed"); };
+#define MUTEX_UNLOCK(mut) \
+    if (pthread_mutex_unlock(&mut)) { \
+        Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); };
+
+#define COND_T pthread_cond_t
+#define COND_INIT(cond) \
+    if (pthread_cond_init(&cond, NULL)) { \
+        Py_FatalError("pthread_cond_init(" #cond ") failed"); };
+#define COND_DESTROY(cond) \
+    if (pthread_cond_destroy(&cond)) { \
+        Py_FatalError("pthread_cond_destroy(" #cond ") failed"); };
+#define COND_RESET(cond)
+#define COND_SIGNAL(cond) \
+    if (pthread_cond_signal(&cond)) { \
+        Py_FatalError("pthread_cond_signal(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+    if (pthread_cond_wait(&cond, &mut)) { \
+        Py_FatalError("pthread_cond_wait(" #cond ") failed"); };
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+    { \
+        int r; \
+        struct timespec ts; \
+        struct timeval deadline; \
+        \
+        GETTIMEOFDAY(&deadline); \
+        ADD_MICROSECONDS(deadline, microseconds); \
+        ts.tv_sec = deadline.tv_sec; \
+        ts.tv_nsec = deadline.tv_usec * 1000; \
+        \
+        r = pthread_cond_timedwait(&cond, &mut, &ts); \
+        if (r == ETIMEDOUT) \
+            timeout_result = 1; \
+        else if (r) \
+            Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \
+        else \
+            timeout_result = 0; \
+    } \
+
+#elif defined(NT_THREADS)
+
+/*
+ * Windows (2000 and later, as well as (hopefully) CE) support
+ */
+
+#include <windows.h>
+
+/* Use critical section since it is significantly faster than a mutex object.
+ * It should be enough for the needs of the BFS scheduler since on any 
+ * signaled event there is exactly one waiting thread. However the macros
+ * are not suited for the general case so the code should probably move
+ * somewhere else. */
+
+#define MUTEX_T CRITICAL_SECTION
+#define MUTEX_INIT(mut) \
+    InitializeCriticalSection(&mut);
+#define MUTEX_LOCK(mut) \
+    EnterCriticalSection(&mut);
+#define MUTEX_UNLOCK(mut) \
+    LeaveCriticalSection(&mut);
+
+#undef COND_T
+#define COND_T HANDLE
+#define COND_INIT(cond) \
+    /* auto-reset, non-signalled */ \
+    if (!(cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { \
+        Py_FatalError("CreateMutex(" #cond ") failed"); };
+#define COND_DESTROY(mut) \
+    if (!CloseHandle(mut)) { \
+        Py_FatalError("CloseHandle(" #mut ") failed"); };
+#define COND_RESET(cond) \
+    if (!ResetEvent(cond)) { \
+        Py_FatalError("ResetEvent(" #cond ") failed"); };
+#define COND_SIGNAL(cond) \
+    if (!SetEvent(cond)) { \
+        Py_FatalError("SetEvent(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+    { \
+        MUTEX_UNLOCK(mut); \
+        if (WaitForSingleObject(cond, INFINITE) != WAIT_OBJECT_0) \
+            Py_FatalError("WaitForSingleObject(" #cond ") failed"); \
+        MUTEX_LOCK(mut); \
+    }
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+    { \
+        DWORD r; \
+        MUTEX_UNLOCK(mut); \
+        r = WaitForSingleObject(cond, microseconds / 1000); \
+        if (r == WAIT_TIMEOUT) { \
+            MUTEX_LOCK(mut); \
+            timeout_result = 1; \
+        } \
+        else if (r != WAIT_OBJECT_0) \
+            Py_FatalError("WaitForSingleObject(" #cond ") failed"); \
+        else { \
+            MUTEX_LOCK(mut); \
+            timeout_result = 0; \
+        } \
+    }
+
+#else
+
+#error You need either a POSIX-compatible or a Windows system!
+
+#endif /* _POSIX_THREADS, NT_THREADS */
+
+
diff -r e0531ff61e8f Include/pystate.h
--- a/Include/pystate.h	Mon Mar 22 03:53:52 2010 +0100
+++ b/Include/pystate.h	Sun Mar 28 08:32:51 2010 +0300
@@ -8,6 +8,21 @@
 extern "C" {
 #endif
 
+#include "pythread.h"
+
+/* Temporarily work around cross platform (nt/posix) compilation issues. 
+ * Can't include mutexed.h directly on NT since it includes windows.h 
+ * and breaks compliation solution-wide. */
+
+#undef COND_T
+#ifdef MS_WINDOWS
+typedef void *COND_HANDLE;
+#define COND_T COND_HANDLE
+#else
+#include "mutexed.h"
+#endif
+
+
 /* State shared between threads */
 
 struct _ts; /* Forward */
@@ -55,6 +70,46 @@
 #define PyTrace_C_EXCEPTION 5
 #define PyTrace_C_RETURN 6
 
+#define CONTAINER(type, member, ptr) \
+   ((type *)((char *)(ptr) - offsetof(type, member))) 
+
+struct _list {
+    struct _list *next;
+    struct _list *prev;
+};
+
+/* Py_LOCAL_INLINE() does not work - see issue bugs.python.org/issue5553 */
+
+#ifdef MS_WINDOWS
+#define _LOCAL(type) static type
+#else
+#define _LOCAL(type) static inline type
+#endif
+
+_LOCAL(void) _list_init(struct _list *l) {
+    l->next = l;
+    l->prev = l;
+}
+
+_LOCAL(int) _list_empty(struct _list *l) {
+    return l->next == l;
+}
+
+_LOCAL(void) _list_append(struct _list *l, struct _list *item) {
+    struct _list *tail = l->prev;
+    tail->next = item;
+    item->prev = tail;
+    item->next = l;
+    l->prev = item;
+}
+
+_LOCAL(void) _list_pop(struct _list *item) {
+    item->next->prev = item->prev;
+    item->prev->next = item->next;
+    item->prev = NULL;
+    item->next = NULL;
+}
+
 typedef struct _ts {
     /* See Python/ceval.c for comments explaining most fields */
 
@@ -88,8 +143,6 @@
 
     PyObject *dict;  /* Stores per-thread state */
 
-    /* XXX doesn't mean anything anymore (the comment below is obsolete)
-       => deprecate or remove? */
     /* tick_counter is incremented whenever the check_interval ticker
      * reaches zero. The purpose is to give a useful measure of the number
      * of interpreted bytecode instructions in a given thread.  This
@@ -98,15 +151,21 @@
      */
     int tick_counter;
 
+    COND_T bfs_cond;
+    struct _list bfs_list;
+    long double bfs_slice;
+    long double bfs_deadline;
+    long double bfs_timestamp;
+
     int gilstate_counter;
 
     PyObject *async_exc; /* Asynchronous exception to raise */
     long thread_id; /* Thread id where this tstate was created */
 
     /* XXX signal handlers should also be here */
-
 } PyThreadState;
 
+#define THREAD_STATE(ptr) CONTAINER(PyThreadState, bfs_list, ptr)
 
 PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void);
 PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *);
diff -r e0531ff61e8f Python/bfs.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Python/bfs.c	Sun Mar 28 08:32:51 2010 +0300
@@ -0,0 +1,341 @@
+/*
+ * Python/bfs.c
+ *
+ * Simplified implementation of the Brain F**k Scheduler by Con Kolivas
+ * http://ck.kolivas.org/patches/bfs/sched-BFS.txt
+ *
+ * "The goal of the Brain F**k Scheduler, referred to as BFS from here on, is to
+ * completely do away with the complex designs of the past for the cpu process
+ * scheduler and instead implement one that is very simple in basic design.
+ * The main focus of BFS is to achieve excellent desktop interactivity and
+ * responsiveness without heuristics and tuning knobs that are difficult to
+ * understand, impossible to model and predict the effect of, and when tuned to
+ * one workload cause massive detriment to another." - Con Kolivas
+ *
+ * Notes:
+ * The following is a straight forward implementation of Con's basic design
+ * as described in the link above. However, there are several gotchas specific
+ * to Python which needed working around.
+ *
+ * 1) Clock - Except for Linux, most platforms do not provide an API to query
+ * thread's running time with high precision. Therefore timing is done with a
+ * wall clock. The scheduler seems to behave reasonably with a wall clock even 
+ * under load, probably since the OS scheduler does not tend to preempt IO 
+ * bound threads.
+ *
+ * 2) Precision - The code assumes good clock precision (better than 1ms). 
+ * It is probably a good idea to handle the case where thread running time 
+ * until voluntary yield to scheduler was un-measurable (0) by low precision 
+ * clock.
+ *
+ * 3) Slow ticks - Querying the clock between ticks is too expensive, so it
+ * is done once every 1000 ticks. However, since even a single slow tick can
+ * deplete a slice (10ms), a special flag exists (bfs_check_depleted) which 
+ * is set by waiting threads and which forces the running thread to check
+ * its remaining slice by the next tick.
+ *
+ * 4) Fast reschedule - On a system with thread-running-time clock (e.g. 
+ * Linux) it is possible to add a nice trick which allows a thread to yield
+ * and reschedule without switching to another thread if done quickly enough.
+ * (For example zlib.compress(some-small-value)). Currently the thread 
+ * will schedule a new thread when it yields and then preempt it on 
+ * rescheduling (if justified), which is reasonable but more expensive. 
+ * The reason a thread-running-time clock is needed is that this mechanism 
+ * can cause an IO bound thread behave as a CPU bound thread, and then it 
+ * will loose the OS 'protection' (see discussion about clock above).
+ * The code for this trick is left out of this version for the sake of
+ * keeping it simple initially :). This technique noticeably boosts 
+ * performance.
+ */
+
+
+#include "time.h"
+#include "mutexed.h"
+
+
+/* Round robin interval - thread time slice for running (in seconds). 
+ * It is set at 10ms to match Windows tick resolution. */
+static const long double rr_interval = 0.010;
+
+/* Magic number taken from Con's implementation. There it is computed as
+ * 1.1 ^ 20 (nice level of regular thread). Basically it controls the load
+ * beyond which threads tend to expire their deadline. Expired threads are 
+ * handled in FIFO order, which means that at such load IO bound threads
+ * will no longer preempt running threads. */
+#define DEADLINE_FACTOR 6
+
+/* Protect access to data structures. */
+static MUTEX_T bfs_mutex;
+
+/* List all threads waiting for schedule. */
+static struct _list bfs_rq;
+
+/* Flag threads are waiting for schedule */
+static volatile int bfs_threads_waiting = 0;
+
+/* Point to current and previous running thread. */
+static volatile PyThreadState *bfs_thread = NULL;
+
+/* Flag currently running thread to yield immediately. */
+static volatile int bfs_preempt = 0;
+
+/* Flag currently running thread to check remaining slice immediately. 
+ * This is required since even a single slow Python tick can deplete
+ * a 10ms slice, let alone 1000 ticks. */
+static volatile int bfs_check_depleted = 0;
+
+static int bfs_initialized = 0;
+
+
+#undef VERBOSE
+#ifdef VERBOSE
+#define TRACE(v) printf v
+#else
+#define TRACE(v)
+#endif
+
+
+#ifdef MS_WINDOWS
+
+/* Return system wide timestamp in seconds (with usec precision). */
+static long double get_timestamp(void) {
+	static LARGE_INTEGER ctrStart;
+	static long double divisor = 0.0;
+	LARGE_INTEGER now;
+
+	if (divisor == 0.0) {
+		LARGE_INTEGER freq;
+		QueryPerformanceCounter(&ctrStart);
+		QueryPerformanceFrequency(&freq);
+		divisor = (long double) freq.QuadPart;
+	}
+
+	QueryPerformanceCounter(&now);
+	return (now.QuadPart - ctrStart.QuadPart) / divisor;
+}
+
+#elif defined(HAVE_GETTIMEOFDAY)
+
+/* Return system wide timestamp in seconds (with usec precision). */
+static inline long double get_timestamp(void) {
+    struct timeval tv;
+    GETTIMEOFDAY(&tv);
+    return (long double) tv.tv_sec + tv.tv_usec * 0.000001;
+}
+
+#else
+
+#error You need either a POSIX-compatible or a Windows system!
+
+#endif /* MS_WINDOWS, HAVE_GETTIMEOFDAY */
+
+#define GET_TIMESTAMP() (timestamp? timestamp: get_timestamp())
+
+
+static void bfs_init(void) {
+    MUTEX_INIT(bfs_mutex);
+    _list_init(&bfs_rq);
+    bfs_initialized = 1;
+}
+
+
+/* Pick next thread to schedule based on earliest deadline. */
+static PyThreadState *bfs_find_task(long double timestamp) {
+    struct _list *item;
+    PyThreadState *task;
+
+    if (_list_empty(&bfs_rq))
+        return NULL;
+
+    timestamp = GET_TIMESTAMP();
+    item = bfs_rq.next;
+    task = THREAD_STATE(item);
+
+    /* Search for thread with earliest deadline or first expired deadline. 
+     * IO bound threads typically have expired deadlines since they naturally
+     * take longer than their original deadline to deplete their slice. */
+    for (item = item->next; item != &bfs_rq && task->bfs_deadline > timestamp; item = item->next) {
+        PyThreadState *tstate = THREAD_STATE(item);
+        if (tstate->bfs_deadline < task->bfs_deadline) {
+            task = tstate;
+        }
+    }
+
+    return task;
+}
+
+
+/* Wait for currently running thread to signal this thread (tstate)
+ * to take over the schedule. Call with locked mutex */
+static void _bfs_timed_wait(PyThreadState *tstate) {
+    /* Use timeout to guard against slow ticks of running thread, but
+     * multiply by number of waiting threads to prevent growing number of
+     * context switches. */
+    int timeout = (int) (bfs_threads_waiting * rr_interval * 1000000 / 2);
+    int timed_out = 0;
+
+    COND_TIMED_WAIT(tstate->bfs_cond, bfs_mutex, timeout, timed_out);
+    
+    /* Flag running thread to check slice depletion immediately. it has possibly
+     * been doing slow ticks (e.g. regular expression searches) and depleted its 
+     * slice. */
+    if (timed_out) {
+        bfs_check_depleted = 1;
+        COMPUTE_EVAL_BREAKER();
+    }
+}
+
+
+/* Schedule (tstate) thread for running. */
+static void bfs_schedule(PyThreadState *tstate) {
+#ifdef VERBOSE
+    long double _ts = get_timestamp();
+    long double _slice = tstate->bfs_slice;
+    long double _deadline = tstate->bfs_deadline;
+#endif
+
+    /* Minimize number of timestamp queries. */
+    long double timestamp = 0;
+
+    /* Refill depleted slice and reset scheduling deadline. */
+    if (tstate->bfs_slice <= 0) {
+        tstate->bfs_slice = rr_interval;
+        timestamp = GET_TIMESTAMP();
+        tstate->bfs_deadline = timestamp + rr_interval * DEADLINE_FACTOR;
+    }
+
+    MUTEX_LOCK(bfs_mutex);
+
+    TRACE(("SCHD %p - %Lf, slice=%Lf, deadline-in=%Lf, deadline-on=%Lf\n", tstate, _ts, _slice, _deadline - _ts, tstate->bfs_deadline));
+
+    /* Schedule immediately if no thread is currently running. */
+    if (bfs_thread == NULL) {
+        bfs_thread = tstate;
+    }
+    /* Else enqueue for future scheduling */
+    else {
+        _list_append(&bfs_rq, &tstate->bfs_list);
+        bfs_threads_waiting++;
+      
+        timestamp = GET_TIMESTAMP();
+
+        /* Flag running thread to check depletion of its slice. */ 
+        if (bfs_thread->bfs_slice <= timestamp - bfs_thread->bfs_timestamp) {
+            bfs_check_depleted = 1;
+            COMPUTE_EVAL_BREAKER();
+        }
+
+        /* Preempt running thread if its deadline is less urgent, unless
+         * its deadline has already expired, in which case handle 
+         * request as FIFO. */
+        if (tstate->bfs_deadline < bfs_thread->bfs_deadline && 
+            bfs_thread->bfs_deadline >= timestamp) {
+            bfs_preempt = 1;
+            COMPUTE_EVAL_BREAKER();
+        }
+
+        /* Wait until schedule. */
+        while (bfs_thread != tstate) {
+            _bfs_timed_wait(tstate);
+        }
+
+        /* Reset timestamp so it is queried again below. */
+        timestamp = 0;
+    }
+
+    TRACE(("TAKE %p - %Lf\n", tstate, get_timestamp()));
+
+    MUTEX_UNLOCK(bfs_mutex);
+
+    timestamp = GET_TIMESTAMP();
+    tstate->bfs_timestamp = timestamp;
+}
+
+
+/* Yield schedule to another thread. */
+static void bfs_yield(PyThreadState *tstate) {
+    /* Minimize number of timestamp queries. */
+    long double timestamp = 0;
+
+    PyThreadState *task;
+
+    /* Update running time slice (book keeping). */
+    if (tstate != NULL && tstate->bfs_slice > 0) {
+        timestamp = GET_TIMESTAMP();
+        tstate->bfs_slice -= timestamp - tstate->bfs_timestamp;
+    } 
+
+    MUTEX_LOCK(bfs_mutex);
+
+    TRACE(("DROP %p - %Lf\n", tstate, get_timestamp()));
+
+    /* Reset preemption flags - we heard the shout. */
+    bfs_preempt = 0;
+    bfs_check_depleted = 0;
+    COMPUTE_EVAL_BREAKER();
+
+    /* Find earliest deadline thread to run */
+    task = bfs_find_task(timestamp);
+    bfs_thread = task;
+   
+    /* Schedule found task to run. */ 
+    if (task != NULL) {
+        _list_pop(&task->bfs_list);
+        bfs_threads_waiting--;
+        if (!bfs_threads_waiting) {
+            RESET_TICKS();
+        }
+    }
+
+    MUTEX_UNLOCK(bfs_mutex);
+
+    /* Signal outside the mutex, may reduce contention (at least on Windows) 
+     * and is OK since exactly one thread is waiting on a condition. */
+    if (task != NULL) {
+        COND_SIGNAL(task->bfs_cond);
+    }
+}
+
+
+
+/* Check if thread depleted its running time slice. */
+_LOCAL(int) bfs_depleted_slice(PyThreadState *tstate) {
+    /* A reading is about 1 usec on a modern CPU. */
+    return tstate->bfs_slice <= get_timestamp() - tstate->bfs_timestamp;
+}
+
+
+/* Propose scheduler to switch to a different thread - a la cooperative 
+ * multitasking. */
+_LOCAL(void) bfs_checkpoint(PyThreadState *tstate) {
+    TRACE(("CHCK %p - %Lf, preempt=%d, check_deplated=%d\n", tstate, get_timestamp(), bfs_preempt, bfs_check_depleted));
+
+    bfs_check_depleted = 0;
+    COMPUTE_EVAL_BREAKER();
+
+    if (bfs_preempt || bfs_depleted_slice(tstate)) {
+        bfs_yield(tstate);
+        bfs_schedule(tstate);
+    }
+}
+
+
+/* Cancel thread request for schedule. 
+ * Assumes canceled thread is not running or being scheduled concurrently.*/
+void bfs_cancel(PyThreadState *tstate) {
+    if (tstate->bfs_list.next == NULL) 
+        return;
+
+    MUTEX_LOCK(bfs_mutex);
+
+    if (bfs_thread == tstate) 
+        Py_FatalError("Attempt to cancel scheduled thread.");
+
+    if (tstate->bfs_list.next != NULL) {
+        _list_pop(&tstate->bfs_list);
+    }
+
+    MUTEX_UNLOCK(bfs_mutex);
+}
+
+
diff -r e0531ff61e8f Python/ceval.c
--- a/Python/ceval.c	Mon Mar 22 03:53:52 2010 +0100
+++ b/Python/ceval.c	Sun Mar 28 08:32:51 2010 +0300
@@ -217,13 +217,7 @@
 
 
 #define COMPUTE_EVAL_BREAKER() \
-	(eval_breaker = gil_drop_request | pendingcalls_to_do | pending_async_exc)
-
-#define SET_GIL_DROP_REQUEST() \
-	do { gil_drop_request = 1; eval_breaker = 1; } while (0)
-
-#define RESET_GIL_DROP_REQUEST() \
-	do { gil_drop_request = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+	(eval_breaker = bfs_preempt | bfs_check_depleted | pendingcalls_to_do | pending_async_exc)
 
 #define SIGNAL_PENDING_CALLS() \
 	do { pendingcalls_to_do = 1; eval_breaker = 1; } while (0)
@@ -247,34 +241,44 @@
 
 static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static long main_thread = 0;
+static int threads_init = 0;
 /* This single variable consolidates all requests to break out of the fast path
    in the eval loop. */
 static volatile int eval_breaker = 0;
-/* Request for droppping the GIL */
-static volatile int gil_drop_request = 0;
 /* Request for running pending calls */
 static volatile int pendingcalls_to_do = 0; 
 /* Request for looking at the `async_exc` field of the current thread state */
 static volatile int pending_async_exc = 0;
 
-#include "ceval_gil.h"
+static volatile int ticks = 2;
+
+#define NTH_TICK(i) (ticks % 1000 == i)
+#define RESET_TICKS() ticks = 2
+
+#include "bfs.c"
+
+#define EVAL_BREAKER() (eval_breaker)
 
 int
 PyEval_ThreadsInitialized(void)
 {
-	return gil_created();
+    return threads_init == 1;
 }
 
 void
 PyEval_InitThreads(void)
 {
-	if (gil_created())
-		return;
-	create_gil();
-	take_gil(PyThreadState_GET());
+    if (PyEval_ThreadsInitialized())
+        return;
+
+    bfs_init();
+    bfs_schedule(PyThreadState_GET());
+
 	main_thread = PyThread_get_thread_ident();
 	if (!pending_lock)
 		pending_lock = PyThread_allocate_lock();
+    
+    threads_init = 1;
 }
 
 void
@@ -283,7 +287,8 @@
 	PyThreadState *tstate = PyThreadState_GET();
 	if (tstate == NULL)
 		Py_FatalError("PyEval_AcquireLock: current thread state is NULL");
-	take_gil(tstate);
+	
+    bfs_schedule(tstate);
 }
 
 void
@@ -293,7 +298,7 @@
 	   We therefore avoid PyThreadState_GET() which dumps a fatal error
 	   in debug mode.
 	*/
-	drop_gil(_PyThreadState_Current);
+	bfs_yield(_PyThreadState_Current);
 }
 
 void
@@ -301,9 +306,11 @@
 {
 	if (tstate == NULL)
 		Py_FatalError("PyEval_AcquireThread: NULL new thread state");
-	/* Check someone has called PyEval_InitThreads() to create the lock */
-	assert(gil_created());
-	take_gil(tstate);
+	
+    /* Check someone has called PyEval_InitThreads() to create the lock */
+	assert(PyEval_ThreadsInitialized());
+
+	bfs_schedule(tstate);
 	if (PyThreadState_Swap(tstate) != NULL)
 		Py_FatalError(
 			"PyEval_AcquireThread: non-NULL old thread state");
@@ -316,7 +323,8 @@
 		Py_FatalError("PyEval_ReleaseThread: NULL thread state");
 	if (PyThreadState_Swap(NULL) != tstate)
 		Py_FatalError("PyEval_ReleaseThread: wrong thread state");
-	drop_gil(tstate);
+	
+    bfs_yield(tstate);
 }
 
 /* This function is called from PyOS_AfterFork to ensure that newly
@@ -330,15 +338,17 @@
 	PyObject *threading, *result;
 	PyThreadState *tstate = PyThreadState_GET();
 
-	if (!gil_created())
-		return;
+	if (!PyEval_ThreadsInitialized())
+		return;
+
 	/*XXX Can't use PyThread_free_lock here because it does too
 	  much error-checking.  Doing this cleanly would require
 	  adding a new function to each thread_*.h.  Instead, just
 	  create a new lock and waste a little bit of memory */
-	recreate_gil();
+	bfs_init();
+
 	pending_lock = PyThread_allocate_lock();
-	take_gil(tstate);
+	bfs_schedule(tstate);
 	main_thread = PyThread_get_thread_ident();
 
 	/* Update the threading module with the new state.
@@ -361,8 +371,10 @@
 
 #else
 static int eval_breaker = 0;
-static int gil_drop_request = 0;
 static int pending_async_exc = 0;
+
+#define EVAL_BREAKER() (eval_breaker)
+
 #endif /* WITH_THREAD */
 
 /* This function is used to signal that async exceptions are waiting to be
@@ -384,11 +396,14 @@
 	PyThreadState *tstate = PyThreadState_Swap(NULL);
 	if (tstate == NULL)
 		Py_FatalError("PyEval_SaveThread: NULL tstate");
-#ifdef WITH_THREAD
-	if (gil_created())
-		drop_gil(tstate);
-#endif
-	return tstate;
+
+#ifdef WITH_THREAD
+	if (PyEval_ThreadsInitialized()) {
+		bfs_yield(tstate);
+    }
+#endif
+
+    return tstate;
 }
 
 void
@@ -396,14 +411,16 @@
 {
 	if (tstate == NULL)
 		Py_FatalError("PyEval_RestoreThread: NULL tstate");
-#ifdef WITH_THREAD
-	if (gil_created()) {
+
+#ifdef WITH_THREAD
+	if (PyEval_ThreadsInitialized()) {
 		int err = errno;
-		take_gil(tstate);
+		bfs_schedule(tstate);
 		errno = err;
 	}
 #endif
-	PyThreadState_Swap(tstate);
+
+    PyThreadState_Swap(tstate);
 }
 
 
@@ -843,7 +860,7 @@
 
 #define DISPATCH() \
 	{ \
-		if (!eval_breaker) { \
+		if (!eval_breaker && !NTH_TICK(0)) { \
 			FAST_DISPATCH(); \
 		} \
 		continue; \
@@ -1217,7 +1234,12 @@
 		   async I/O handler); see Py_AddPendingCall() and
 		   Py_MakePendingCalls() above. */
 
-		if (eval_breaker) {
+        /* gcc will segfault on an attempt to increment ticks without
+         * the if clause. */
+        if (bfs_threads_waiting)
+            ticks++;
+
+		if (EVAL_BREAKER() || NTH_TICK(1)) {
 			if (*next_instr == SETUP_FINALLY) {
 				/* Make the last opcode before
 				   a try: finally: block uninterruptable. */
@@ -1233,20 +1255,18 @@
 					goto on_error;
 				}
 			}
-			if (gil_drop_request) {
-#ifdef WITH_THREAD
-				/* Give another thread a chance */
-				if (PyThreadState_Swap(NULL) != tstate)
-					Py_FatalError("ceval: tstate mix-up");
-				drop_gil(tstate);
-	
-				/* Other threads may run now */
-	
-				take_gil(tstate);
-				if (PyThreadState_Swap(tstate) != NULL)
-					Py_FatalError("ceval: orphan tstate");
-#endif
-			}
+            
+#ifdef WITH_THREAD
+            /* Give another thread a chance */
+            if (PyThreadState_Swap(NULL) != tstate)
+                Py_FatalError("ceval: tstate mix-up");
+
+            bfs_checkpoint(tstate);
+
+            if (PyThreadState_Swap(tstate) != NULL)
+                Py_FatalError("ceval: orphan tstate");
+#endif
+
 			/* Check for asynchronous exceptions. */
 			if (tstate->async_exc != NULL) {
 				x = tstate->async_exc;
diff -r e0531ff61e8f Python/pystate.c
--- a/Python/pystate.c	Mon Mar 22 03:53:52 2010 +0100
+++ b/Python/pystate.c	Sun Mar 28 08:32:51 2010 +0300
@@ -2,6 +2,7 @@
 /* Thread and interpreter state structures and their interfaces */
 
 #include "Python.h"
+#include "mutexed.h"
 
 /* --------------------------------------------------------------------------
 CAUTION
@@ -198,6 +199,13 @@
 		tstate->c_profileobj = NULL;
 		tstate->c_traceobj = NULL;
 
+        COND_INIT(tstate->bfs_cond);
+        tstate->bfs_list.next = NULL;
+        tstate->bfs_list.prev = NULL;
+        tstate->bfs_slice = 0;
+        tstate->bfs_deadline = 0;
+        tstate->bfs_timestamp = 0;
+
 		if (init)
 			_PyThreadState_Init(tstate);
 
@@ -271,8 +279,8 @@
 	if (Py_VerboseFlag && tstate->frame != NULL)
 		fprintf(stderr,
 		  "PyThreadState_Clear: warning: thread still has a frame\n");
-
-	Py_CLEAR(tstate->frame);
+	
+    Py_CLEAR(tstate->frame);
 
 	Py_CLEAR(tstate->dict);
 	Py_CLEAR(tstate->async_exc);
@@ -304,7 +312,8 @@
 	interp = tstate->interp;
 	if (interp == NULL)
 		Py_FatalError("PyThreadState_Delete: NULL interp");
-	HEAD_LOCK();
+
+        HEAD_LOCK();
 	for (p = &interp->tstate_head; ; p = &(*p)->next) {
 		if (*p == NULL)
 			Py_FatalError(
@@ -345,6 +354,10 @@
 
 
 #ifdef WITH_THREAD
+
+/* Defined in Python/bfs.c */
+PyAPI_FUNC(void) bfs_cancel(PyThreadState *tstate);
+
 void
 PyThreadState_DeleteCurrent()
 {
@@ -354,6 +367,11 @@
 			"PyThreadState_DeleteCurrent: no current tstate");
 	_PyThreadState_Current = NULL;
 	tstate_delete_common(tstate);
+
+    bfs_cancel(tstate);
+    COND_SIGNAL(tstate->bfs_cond);
+    COND_DESTROY(tstate->bfs_cond);
+
 	if (autoTLSkey && PyThread_get_key_value(autoTLSkey) == tstate)
 		PyThread_delete_key_value(autoTLSkey);
 	PyEval_ReleaseLock();
diff -r e0531ff61e8f Python/sysmodule.c
--- a/Python/sysmodule.c	Mon Mar 22 03:53:52 2010 +0100
+++ b/Python/sysmodule.c	Sun Mar 28 08:32:51 2010 +0300
@@ -490,7 +490,7 @@
 static PyObject *
 sys_setswitchinterval(PyObject *self, PyObject *args)
 {
-	double d;
+	/*double d;
 	if (!PyArg_ParseTuple(args, "d:setswitchinterval", &d))
 		return NULL;
 	if (d <= 0.0) {
@@ -498,7 +498,7 @@
 				"switch interval must be strictly positive");
 		return NULL;
 	}
-	_PyEval_SetSwitchInterval((unsigned long) (1e6 * d));
+	_PyEval_SetSwitchInterval((unsigned long) (1e6 * d));*/
 	Py_INCREF(Py_None);
 	return Py_None;
 }
@@ -518,7 +518,7 @@
 static PyObject *
 sys_getswitchinterval(PyObject *self, PyObject *args)
 {
-	return PyFloat_FromDouble(1e-6 * _PyEval_GetSwitchInterval());
+	return PyFloat_FromDouble(1000.0);//1e-6 * _PyEval_GetSwitchInterval());
 }
 
 PyDoc_STRVAR(getswitchinterval_doc,