Index: Include/ceval.h
===================================================================
--- Include/ceval.h	(revision 79534)
+++ Include/ceval.h	(working copy)
@@ -126,6 +126,7 @@
 PyAPI_FUNC(int)  PyEval_ThreadsInitialized(void);
 PyAPI_FUNC(void) PyEval_InitThreads(void);
 PyAPI_FUNC(void) PyEval_AcquireLock(void);
+PyAPI_FUNC(void) PyEval_AcquireLockPri(int pri);
 PyAPI_FUNC(void) PyEval_ReleaseLock(void);
 PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate);
 PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
Index: Include/pythread.h
===================================================================
--- Include/pythread.h	(revision 79534)
+++ Include/pythread.h	(working copy)
@@ -4,6 +4,7 @@
 
 typedef void *PyThread_type_lock;
 typedef void *PyThread_type_sema;
+typedef struct _gil_type *PyThread_type_gil;
 
 #ifdef __cplusplus
 extern "C" {
@@ -21,6 +22,12 @@
 #define NOWAIT_LOCK	0
 PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock);
 
+/* special GIL functions */
+PyAPI_FUNC(PyThread_type_gil) PyThread_allocate_gil(void);
+PyAPI_FUNC(void) PyThread_free_gil(PyThread_type_gil gil);
+PyAPI_FUNC(void) PyThread_acquire_gil(PyThread_type_gil gil, int pri);
+PyAPI_FUNC(void) PyThread_release_gil(PyThread_type_gil gil);
+
 PyAPI_FUNC(size_t) PyThread_get_stacksize(void);
 PyAPI_FUNC(int) PyThread_set_stacksize(size_t);
 
Index: PCbuild/pythoncore.vcproj
===================================================================
--- PCbuild/pythoncore.vcproj	(revision 79534)
+++ PCbuild/pythoncore.vcproj	(working copy)
@@ -1851,6 +1851,10 @@
 				>
 			</File>
 			<File
+				RelativePath="..\Python\thread_gil.h"
+				>
+			</File>
+			<File
 				RelativePath="..\Python\thread_nt.h"
 				>
 			</File>
Index: Python/ceval.c
===================================================================
--- Python/ceval.c	(revision 79534)
+++ Python/ceval.c	(working copy)
@@ -232,7 +232,7 @@
 #endif
 #include "pythread.h"
 
-static PyThread_type_lock interpreter_lock = 0; /* This is the GIL */
+static PyThread_type_gil interpreter_lock = 0; /* This is the GIL */
 static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static long main_thread = 0;
 
@@ -247,21 +247,27 @@
 {
 	if (interpreter_lock)
 		return;
-	interpreter_lock = PyThread_allocate_lock();
-	PyThread_acquire_lock(interpreter_lock, 1);
+	interpreter_lock = PyThread_allocate_gil();
+	PyThread_acquire_gil(interpreter_lock, 1);
 	main_thread = PyThread_get_thread_ident();
 }
 
 void
 PyEval_AcquireLock(void)
 {
-	PyThread_acquire_lock(interpreter_lock, 1);
+	PyThread_acquire_gil(interpreter_lock, 1);
 }
 
 void
+PyEval_AcquireLockPri(int pri)
+{
+	PyThread_acquire_gil(interpreter_lock, pri);
+}
+
+void
 PyEval_ReleaseLock(void)
 {
-	PyThread_release_lock(interpreter_lock);
+	PyThread_release_gil(interpreter_lock);
 }
 
 void
@@ -271,7 +277,7 @@
 		Py_FatalError("PyEval_AcquireThread: NULL new thread state");
 	/* Check someone has called PyEval_InitThreads() to create the lock */
 	assert(interpreter_lock);
-	PyThread_acquire_lock(interpreter_lock, 1);
+	PyThread_acquire_gil(interpreter_lock, 1);
 	if (PyThreadState_Swap(tstate) != NULL)
 		Py_FatalError(
 			"PyEval_AcquireThread: non-NULL old thread state");
@@ -284,7 +290,7 @@
 		Py_FatalError("PyEval_ReleaseThread: NULL thread state");
 	if (PyThreadState_Swap(NULL) != tstate)
 		Py_FatalError("PyEval_ReleaseThread: wrong thread state");
-	PyThread_release_lock(interpreter_lock);
+	PyThread_release_gil(interpreter_lock);
 }
 
 /* This function is called from PyOS_AfterFork to ensure that newly
@@ -304,9 +310,9 @@
 	  much error-checking.  Doing this cleanly would require
 	  adding a new function to each thread_*.h.  Instead, just
 	  create a new lock and waste a little bit of memory */
-	interpreter_lock = PyThread_allocate_lock();
+	interpreter_lock = PyThread_allocate_gil();
 	pending_lock = PyThread_allocate_lock();
-	PyThread_acquire_lock(interpreter_lock, 1);
+	PyThread_acquire_gil(interpreter_lock, 1);
 	main_thread = PyThread_get_thread_ident();
 
 	/* Update the threading module with the new state.
@@ -340,7 +346,7 @@
 		Py_FatalError("PyEval_SaveThread: NULL tstate");
 #ifdef WITH_THREAD
 	if (interpreter_lock)
-		PyThread_release_lock(interpreter_lock);
+		PyThread_release_gil(interpreter_lock);
 #endif
 	return tstate;
 }
@@ -353,14 +359,20 @@
 #ifdef WITH_THREAD
 	if (interpreter_lock) {
 		int err = errno;
-		PyThread_acquire_lock(interpreter_lock, 1);
+		/* high priority gil request */
+		PyThread_acquire_gil(interpreter_lock, 0);
 		errno = err;
 	}
 #endif
 	PyThreadState_Swap(tstate);
 }
 
+void _PyEval_RelinquishGil()
+{
+	_Py_Ticker = 0;
+}
 
+
 /* Mechanism whereby asynchronously executing callbacks (e.g. UNIX
    signal handlers or Mac I/O completion routines) can schedule calls
    to a function to be called synchronously.
@@ -1012,11 +1024,11 @@
 
 				if (PyThreadState_Swap(NULL) != tstate)
 					Py_FatalError("ceval: tstate mix-up");
-				PyThread_release_lock(interpreter_lock);
+				PyThread_release_gil(interpreter_lock);
 
 				/* Other threads may run now */
 
-				PyThread_acquire_lock(interpreter_lock, 1);
+				PyThread_acquire_gil(interpreter_lock, 1);
 				if (PyThreadState_Swap(tstate) != NULL)
 					Py_FatalError("ceval: orphan tstate");
 
Index: Python/thread.c
===================================================================
--- Python/thread.c	(revision 79534)
+++ Python/thread.c	(working copy)
@@ -419,3 +419,32 @@
 }
 
 #endif /* Py_HAVE_NATIVE_TLS */
+
+#ifndef THREAD_GIL_FUNCTIONS
+
+/* default gil implementation */
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+	return (PyThread_type_gil) PyThread_allocate_lock();
+}
+
+void
+PyThread_free_gil(PyThread_type_gil gil)
+{
+	PyThread_free_lock((PyThread_type_lock)gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil gil, int pri)
+{
+	PyThread_acquire_lock((PyThread_type_lock)gil, 1);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil gil)
+{
+	PyThread_release_lock((PyThread_type_lock)gil);
+}
+#endif
Index: Python/thread_gil.h
===================================================================
--- Python/thread_gil.h	(revision 0)
+++ Python/thread_gil.h	(revision 0)
@@ -0,0 +1,264 @@
+/* definitionas of the PyThread_*_gil functions, using portable
+ * macros for mutexes and critical sections
+ */
+
+#define THREAD_GIL_FUNCTIONS
+
+#if !defined LEGACY_GIL && !defined ROUNDROBIN_GIL && !defined PRIORITY_GIL
+#define LEGACY_GIL
+#endif
+
+#ifdef LEGACY_GIL
+
+/* This GIL implementation is the one in use with pthreads.
+ * It has problems because there is nothing preventing the same
+ * thread to reaquire the gil immediately after releasing it to
+ * someone else.  This causes problems in the Release/Reacquire
+ * pattern in ceval.c
+ * This is because modern implementation of mutexes typcially
+ * are greedy, allowing an interloping thread to aqcuire the mutex
+ * once it is free, not adhering to a strict FIFO order.  This is
+ * done to reduce lock-convoying problems.
+ * Traditional windows implementation has not had this issue because
+ * it depends on Event objects for its locks and they respect
+ * the FIFO order of waiting objects.
+ */
+
+typedef struct GIL_T
+{
+	MUTEX_T mutex;
+	COND_T cond;
+	char locked;
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+	GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+	if (!gil)
+		return NULL;
+	MUTEX_INIT(gil->mutex);
+	COND_INIT(gil->cond);
+	gil->locked = 0;
+	return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_FINI(gil->mutex);
+	COND_FINI(gil->cond);
+	PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_LOCK(gil->mutex);
+	while(gil->locked)
+		COND_WAIT(gil->cond, gil->mutex);
+	gil->locked = 1;
+	MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_LOCK(gil->mutex);
+	gil->locked = 0;
+	COND_SIGNAL(gil->cond);
+	MUTEX_UNLOCK(gil->mutex);
+}
+#endif
+
+#ifdef ROUNDROBIN_GIL
+
+/* This is a simple improvement on the previous GIL.  A thread will
+ * enter the condition wait state if there is another thread present
+ * that has not woken up.  This prevents a thread from jumping the
+ * queue on another thread waiting and allows the Release/Reacquire
+ * pattern in ceval.c to work properly
+ */
+
+typedef struct GIL_T
+{
+	MUTEX_T mutex;
+	COND_T cond;
+	int n_waiting;
+	char locked;
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+	GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+	if (!gil)
+		return NULL;
+	MUTEX_INIT(gil->mutex);
+	COND_INIT(gil->cond);
+	gil->locked = 0;
+	gil->n_waiting = 0;
+	return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_FINI(gil->mutex);
+	COND_FINI(gil->cond);
+	PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_LOCK(gil->mutex);
+	/* go back in the line if someone else is already waiting
+	 * for the GIL
+	 */
+	if (gil->locked || gil->n_waiting) {
+		++gil->n_waiting;
+		do {
+			COND_WAIT(gil->cond, gil->mutex);
+		} while( gil->locked);
+		--gil->n_waiting;
+	}
+	gil->locked = 1;
+	MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_LOCK(gil->mutex);
+	gil->locked = 0;
+	COND_SIGNAL(gil->cond);
+	MUTEX_UNLOCK(gil->mutex);
+}
+#endif
+
+#ifdef PRIORITY_GIL
+
+/* Finally, a priority based GIL.  Each priority level has its own
+ * condition variable. When the lock is released, it is handed off
+ * to the highest level that is currently waiting for the lock.
+ * No one gets the lock until it is handed to them.
+ * Priority is based on a priority value given when the lock is
+ * acquired.  The idea is that the ceval Release/Reacquire step
+ * uses low priority (1) because it is volounteerily giving up the
+ * GIL. Other functions use high priority (0) because they have
+ * just finished IO or other such business and are eager to
+ * continue.  A cpu bound thread that relinquishes the GIL in ceval.c
+ * will therefore yield to an IO type gil request that is waiting.
+ * In addition, we can request ceval.c to immediately give up the
+ * GIL if we see that the gil was last aqcuired with a lower
+ * priority than we have.
+ */
+
+#define PRIORITY_LEVELS 2
+#define INTERRUPT_GIL
+
+#ifdef INTERRUPT_GIL
+void _PyEval_RelinquishGil();
+#endif
+
+typedef struct GIL_LEVEL_T
+{
+	COND_T cond;
+	int n_queue;    /* threads in wait */
+} GIL_LEVEL_T;
+
+typedef struct GIL_T
+{
+	MUTEX_T mutex;
+	int state;    /* -1 = free, 0 and positive: locked at priority 'state' */
+	int handoff;  /* -1 = no handoff */
+	GIL_LEVEL_T levels[PRIORITY_LEVELS];
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+	int i;
+	GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+	if (!gil)
+		return NULL;
+	MUTEX_INIT(gil->mutex);
+	gil->state = -1;    /* free */
+	gil->handoff = -1;  /* no handoff */
+	for(i = 0; i < PRIORITY_LEVELS; ++i) {
+		COND_INIT(gil->levels[i].cond);
+		gil->levels[i].n_queue = 0;
+	}
+	return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+	int i;
+	GIL_T *gil = (GIL_T*)_gil;
+	MUTEX_FINI(gil->mutex);
+	for(i = 0; i < PRIORITY_LEVELS; ++i)
+		COND_FINI(gil->levels[i].cond);
+	PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	
+	if (pri<0)
+		pri = 0;
+	else if (pri >= PRIORITY_LEVELS)
+		pri = PRIORITY_LEVELS - 1;
+
+	MUTEX_LOCK(gil->mutex);
+
+	if (gil->state != -1) {
+#ifdef INTERRUPT_GIL
+		if (pri < gil->state)
+			/* we trump whomever is running, ask for the gil */
+			_PyEval_RelinquishGil();
+#endif
+		/* wait for lock handoff */
+		++gil->levels[pri].n_queue;
+		while (gil->handoff != pri)
+			COND_WAIT(gil->levels[pri].cond, gil->mutex);
+		/* we were handed the lock */
+		gil->handoff = -1;
+	}
+	gil->state = pri;
+	MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+	GIL_T *gil = (GIL_T*)_gil;
+	int i;
+	MUTEX_LOCK(gil->mutex);
+	/* find highest priority waiting thread to hand lock off to */
+	for (i = 0; i < PRIORITY_LEVELS; ++i)
+		if (gil->levels[i].n_queue > 0) {
+			COND_SIGNAL(gil->levels[i].cond);
+			--gil->levels[i].n_queue;
+			gil->handoff = i;
+			break;
+		}
+	if (i == PRIORITY_LEVELS) {
+		/* nothing found */
+		gil->handoff = -1;
+		gil->state = -1;
+	}
+	MUTEX_UNLOCK(gil->mutex);
+}
+
+#endif
\ No newline at end of file
Index: Python/thread_nt.h
===================================================================
--- Python/thread_nt.h	(revision 79534)
+++ Python/thread_nt.h	(working copy)
@@ -229,7 +229,7 @@
 {
 	dprintf(("%ld: PyThread_free_lock(%p) called\n", PyThread_get_thread_ident(),aLock));
 
-	FreeNonRecursiveMutex(aLock) ;
+	FreeNonRecursiveMutex((PNRMUTEX)aLock) ;
 }
 
 /*
@@ -357,3 +357,123 @@
 {}
 
 #endif
+
+/* PyThread_*_gil functions */
+
+/* portable mutexex and critical sections for use in thread_gil.h */
+
+#define MUTEX_T CRITICAL_SECTION
+#define MUTEX_INIT(mut) do { \
+    if (!(InitializeCriticalSectionAndSpinCount(&(mut), 4000))) \
+        Py_FatalError("CreateMutex(" #mut ") failed"); \
+} while (0)
+#define MUTEX_FINI(mut) \
+    DeleteCriticalSection(&(mut))
+#define MUTEX_LOCK(mut) \
+    EnterCriticalSection(&(mut))
+#define MUTEX_UNLOCK(mut) \
+    LeaveCriticalSection(&(mut))
+
+/* We emulate condition variables with a semaphore.
+   We use a Semaphore rather than an auto-reset event, because although
+   an auto-resent event might appear to solve the lost-wakeup bug (race
+   condition between releasing the outer lock and waiting) because it
+   maintains state even though a wait hasn't happened, there is still
+   a lost wakeup problem if more than one thread are interrupted in the
+   critical place.  A semaphore solves that.
+   Because it is ok to signal a condition variable with no one
+   waiting, we need to keep track of the number of
+   waiting threads.  Otherwise, the semaphore's state could rise
+   without bound.
+
+   Generic emulations of the pthread_cond_* API using
+   Win32 functions can be found on the Web.
+   The following read can be edificating (or not):
+   http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+*/
+typedef struct COND_T
+{
+    HANDLE sem;    /* the semaphore */
+    int n_waiting; /* how many are unreleased */
+} COND_T;
+
+__inline void _cond_init(COND_T *cond)
+{
+    /* a semaphore with a larg max value,  The positive value
+     * is only needed to catch those "lost wakeup" events and
+     * race conditions when a timed wait elapses.  It is also
+     * useful for "broadcast". 
+     */
+    if (!(cond->sem = CreateSemaphore(NULL, 0, 1000, NULL)))
+        Py_FatalError("CreateSemaphore() failed");
+    cond->n_waiting = 0;
+}
+
+__inline void _cond_fini(COND_T *cond)
+{
+	BOOL ok = CloseHandle(cond->sem);
+	if (!ok)
+		Py_FatalError("CloseHandle() failed");
+}
+
+__inline void _cond_wait(COND_T *cond, MUTEX_T *mut)
+{
+    ++cond->n_waiting;
+    MUTEX_UNLOCK(*mut);
+    /* "lost wakeup bug" would occur if the caller were interrupter here,
+     * but we are safe because we are using a semaphore wich has an internal
+     * count
+     */
+    if (WaitForSingleObject(cond->sem, INFINITE) == WAIT_FAILED)
+        Py_FatalError("WaitForSingleObject() failed");
+    MUTEX_LOCK(*mut);
+}
+
+__inline int _cond_timed_wait(COND_T *cond, MUTEX_T *mut,
+                              int us, int *result)
+{
+    DWORD r;
+    ++cond->n_waiting;
+    MUTEX_UNLOCK(*mut);
+    r = WaitForSingleObject(cond->sem, us / 1000);
+    InterlockedDecrement(&cond->n_waiting);
+    if (r == WAIT_FAILED)
+        Py_FatalError("WaitForSingleObject() failed");
+    MUTEX_LOCK(*mut);
+    if (r == WAIT_TIMEOUT)
+        /* possible race with _cond_signal, see below */
+        --cond->n_waiting; 
+    return r == WAIT_TIMEOUT;
+}
+
+__inline void _cond_signal(COND_T  *cond) {
+	if (cond->n_waiting > 0) {
+		if (!ReleaseSemaphore(cond->sem, 1, NULL)) {
+			Py_FatalError("ReleaseSemaphore() failed");
+		}
+		/* Note the possible race condition here if a timeout
+		 * occurs.  It is possible that this "Release" was ineffective
+		 * and we decremented twice and left the semaphore in a positive
+		 * state.  That is ok, because
+		 * it will cause the next "wait" to just go through directly and
+		 * the universe rights itself
+		 */
+		--cond->n_waiting;
+	}
+}
+
+#define COND_INIT(cond) \
+	_cond_init(&(cond))
+#define COND_FINI(cond) \
+	_cond_fini(&(cond))
+#define COND_SIGNAL(cond) \
+	_cond_signal(&(cond))
+#define COND_WAIT(cond, mut) \
+    _cond_wait(&(cond), &(mut))
+#define COND_TIMED_WAIT(cond, mut, us, timeout_result) do { \
+    (timeout_result) = _cond_timed_wait(&(cond), &(mut), us); \
+} while (0)
+
+/* choose roundrobin because that is the traditional behaviour on Windows */
+#define ROUNDROBIN_GIL
+#include "thread_gil.h"  /* platform independedn GIL immplementation */
Index: Python/thread_pthread.h
===================================================================
--- Python/thread_pthread.h	(revision 79534)
+++ Python/thread_pthread.h	(working copy)
@@ -491,3 +491,64 @@
 }
 
 #define THREAD_SET_STACKSIZE(x)	_pythread_pthread_set_stacksize(x)
+
+/* PyThread_*_gil functions */
+
+/* portable mutexex and critical sections for use in thread_gil.h */
+
+#define ADD_MICROSECONDS(tv, interval) \
+do { \
+    tv.tv_usec += (long) interval; \
+    tv.tv_sec += tv.tv_usec / 1000000; \
+    tv.tv_usec %= 1000000; \
+} while (0)
+
+/* We assume all modern POSIX systems have gettimeofday() */
+#ifdef GETTIMEOFDAY_NO_TZ
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv)
+#else
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
+#endif
+
+#define MUTEX_T pthread_mutex_t
+#define MUTEX_INIT(mut) \
+    if (pthread_mutex_init(&mut, NULL)) { \
+        Py_FatalError("pthread_mutex_init(" #mut ") failed"); };
+#define MUTEX_LOCK(mut) \
+    if (pthread_mutex_lock(&mut)) { \
+        Py_FatalError("pthread_mutex_lock(" #mut ") failed"); };
+#define MUTEX_UNLOCK(mut) \
+    if (pthread_mutex_unlock(&mut)) { \
+        Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); };
+
+#define COND_T pthread_cond_t
+#define COND_INIT(cond) \
+    if (pthread_cond_init(&cond, NULL)) { \
+        Py_FatalError("pthread_cond_init(" #cond ") failed"); };
+#define COND_SIGNAL(cond) \
+    if (pthread_cond_signal(&cond)) { \
+        Py_FatalError("pthread_cond_signal(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+    if (pthread_cond_wait(&cond, &mut)) { \
+        Py_FatalError("pthread_cond_wait(" #cond ") failed"); };
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+    { \
+        int r; \
+        struct timespec ts; \
+        struct timeval deadline; \
+        \
+        GETTIMEOFDAY(&deadline); \
+        ADD_MICROSECONDS(deadline, microseconds); \
+        ts.tv_sec = deadline.tv_sec; \
+        ts.tv_nsec = deadline.tv_usec * 1000; \
+        \
+        r = pthread_cond_timedwait(&cond, &mut, &ts); \
+        if (r == ETIMEDOUT) \
+            timeout_result = 1; \
+        else if (r) \
+            Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \
+        else \
+            timeout_result = 0; \
+    } \
+
+#include "thread_gil.h"  /* platform independent GIL implementation */