Index: Include/ceval.h
===================================================================
--- Include/ceval.h (revision 79534)
+++ Include/ceval.h (working copy)
@@ -126,6 +126,7 @@
PyAPI_FUNC(int) PyEval_ThreadsInitialized(void);
PyAPI_FUNC(void) PyEval_InitThreads(void);
PyAPI_FUNC(void) PyEval_AcquireLock(void);
+PyAPI_FUNC(void) PyEval_AcquireLockPri(int pri);
PyAPI_FUNC(void) PyEval_ReleaseLock(void);
PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate);
PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate);
Index: Include/pythread.h
===================================================================
--- Include/pythread.h (revision 79534)
+++ Include/pythread.h (working copy)
@@ -4,6 +4,7 @@
typedef void *PyThread_type_lock;
typedef void *PyThread_type_sema;
+typedef struct _gil_type *PyThread_type_gil;
#ifdef __cplusplus
extern "C" {
@@ -21,6 +22,12 @@
#define NOWAIT_LOCK 0
PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock);
+/* special GIL functions */
+PyAPI_FUNC(PyThread_type_gil) PyThread_allocate_gil(void);
+PyAPI_FUNC(void) PyThread_free_gil(PyThread_type_gil gil);
+PyAPI_FUNC(void) PyThread_acquire_gil(PyThread_type_gil gil, int pri);
+PyAPI_FUNC(void) PyThread_release_gil(PyThread_type_gil gil);
+
PyAPI_FUNC(size_t) PyThread_get_stacksize(void);
PyAPI_FUNC(int) PyThread_set_stacksize(size_t);
Index: PCbuild/pythoncore.vcproj
===================================================================
--- PCbuild/pythoncore.vcproj (revision 79534)
+++ PCbuild/pythoncore.vcproj (working copy)
@@ -1851,6 +1851,10 @@
>
+
+
Index: Python/ceval.c
===================================================================
--- Python/ceval.c (revision 79534)
+++ Python/ceval.c (working copy)
@@ -232,7 +232,7 @@
#endif
#include "pythread.h"
-static PyThread_type_lock interpreter_lock = 0; /* This is the GIL */
+static PyThread_type_gil interpreter_lock = 0; /* This is the GIL */
static PyThread_type_lock pending_lock = 0; /* for pending calls */
static long main_thread = 0;
@@ -247,21 +247,27 @@
{
if (interpreter_lock)
return;
- interpreter_lock = PyThread_allocate_lock();
- PyThread_acquire_lock(interpreter_lock, 1);
+ interpreter_lock = PyThread_allocate_gil();
+ PyThread_acquire_gil(interpreter_lock, 1);
main_thread = PyThread_get_thread_ident();
}
void
PyEval_AcquireLock(void)
{
- PyThread_acquire_lock(interpreter_lock, 1);
+ PyThread_acquire_gil(interpreter_lock, 1);
}
void
+PyEval_AcquireLockPri(int pri)
+{
+ PyThread_acquire_gil(interpreter_lock, pri);
+}
+
+void
PyEval_ReleaseLock(void)
{
- PyThread_release_lock(interpreter_lock);
+ PyThread_release_gil(interpreter_lock);
}
void
@@ -271,7 +277,7 @@
Py_FatalError("PyEval_AcquireThread: NULL new thread state");
/* Check someone has called PyEval_InitThreads() to create the lock */
assert(interpreter_lock);
- PyThread_acquire_lock(interpreter_lock, 1);
+ PyThread_acquire_gil(interpreter_lock, 1);
if (PyThreadState_Swap(tstate) != NULL)
Py_FatalError(
"PyEval_AcquireThread: non-NULL old thread state");
@@ -284,7 +290,7 @@
Py_FatalError("PyEval_ReleaseThread: NULL thread state");
if (PyThreadState_Swap(NULL) != tstate)
Py_FatalError("PyEval_ReleaseThread: wrong thread state");
- PyThread_release_lock(interpreter_lock);
+ PyThread_release_gil(interpreter_lock);
}
/* This function is called from PyOS_AfterFork to ensure that newly
@@ -304,9 +310,9 @@
much error-checking. Doing this cleanly would require
adding a new function to each thread_*.h. Instead, just
create a new lock and waste a little bit of memory */
- interpreter_lock = PyThread_allocate_lock();
+ interpreter_lock = PyThread_allocate_gil();
pending_lock = PyThread_allocate_lock();
- PyThread_acquire_lock(interpreter_lock, 1);
+ PyThread_acquire_gil(interpreter_lock, 1);
main_thread = PyThread_get_thread_ident();
/* Update the threading module with the new state.
@@ -340,7 +346,7 @@
Py_FatalError("PyEval_SaveThread: NULL tstate");
#ifdef WITH_THREAD
if (interpreter_lock)
- PyThread_release_lock(interpreter_lock);
+ PyThread_release_gil(interpreter_lock);
#endif
return tstate;
}
@@ -353,14 +359,20 @@
#ifdef WITH_THREAD
if (interpreter_lock) {
int err = errno;
- PyThread_acquire_lock(interpreter_lock, 1);
+ /* high priority gil request */
+ PyThread_acquire_gil(interpreter_lock, 0);
errno = err;
}
#endif
PyThreadState_Swap(tstate);
}
+void _PyEval_RelinquishGil()
+{
+ _Py_Ticker = 0;
+}
+
/* Mechanism whereby asynchronously executing callbacks (e.g. UNIX
signal handlers or Mac I/O completion routines) can schedule calls
to a function to be called synchronously.
@@ -1012,11 +1024,11 @@
if (PyThreadState_Swap(NULL) != tstate)
Py_FatalError("ceval: tstate mix-up");
- PyThread_release_lock(interpreter_lock);
+ PyThread_release_gil(interpreter_lock);
/* Other threads may run now */
- PyThread_acquire_lock(interpreter_lock, 1);
+ PyThread_acquire_gil(interpreter_lock, 1);
if (PyThreadState_Swap(tstate) != NULL)
Py_FatalError("ceval: orphan tstate");
Index: Python/thread.c
===================================================================
--- Python/thread.c (revision 79534)
+++ Python/thread.c (working copy)
@@ -419,3 +419,32 @@
}
#endif /* Py_HAVE_NATIVE_TLS */
+
+#ifndef THREAD_GIL_FUNCTIONS
+
+/* default gil implementation */
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+ return (PyThread_type_gil) PyThread_allocate_lock();
+}
+
+void
+PyThread_free_gil(PyThread_type_gil gil)
+{
+ PyThread_free_lock((PyThread_type_lock)gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil gil, int pri)
+{
+ PyThread_acquire_lock((PyThread_type_lock)gil, 1);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil gil)
+{
+ PyThread_release_lock((PyThread_type_lock)gil);
+}
+#endif
Index: Python/thread_gil.h
===================================================================
--- Python/thread_gil.h (revision 0)
+++ Python/thread_gil.h (revision 0)
@@ -0,0 +1,264 @@
+/* definitionas of the PyThread_*_gil functions, using portable
+ * macros for mutexes and critical sections
+ */
+
+#define THREAD_GIL_FUNCTIONS
+
+#if !defined LEGACY_GIL && !defined ROUNDROBIN_GIL && !defined PRIORITY_GIL
+#define LEGACY_GIL
+#endif
+
+#ifdef LEGACY_GIL
+
+/* This GIL implementation is the one in use with pthreads.
+ * It has problems because there is nothing preventing the same
+ * thread to reaquire the gil immediately after releasing it to
+ * someone else. This causes problems in the Release/Reacquire
+ * pattern in ceval.c
+ * This is because modern implementation of mutexes typcially
+ * are greedy, allowing an interloping thread to aqcuire the mutex
+ * once it is free, not adhering to a strict FIFO order. This is
+ * done to reduce lock-convoying problems.
+ * Traditional windows implementation has not had this issue because
+ * it depends on Event objects for its locks and they respect
+ * the FIFO order of waiting objects.
+ */
+
+typedef struct GIL_T
+{
+ MUTEX_T mutex;
+ COND_T cond;
+ char locked;
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+ GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+ if (!gil)
+ return NULL;
+ MUTEX_INIT(gil->mutex);
+ COND_INIT(gil->cond);
+ gil->locked = 0;
+ return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_FINI(gil->mutex);
+ COND_FINI(gil->cond);
+ PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_LOCK(gil->mutex);
+ while(gil->locked)
+ COND_WAIT(gil->cond, gil->mutex);
+ gil->locked = 1;
+ MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_LOCK(gil->mutex);
+ gil->locked = 0;
+ COND_SIGNAL(gil->cond);
+ MUTEX_UNLOCK(gil->mutex);
+}
+#endif
+
+#ifdef ROUNDROBIN_GIL
+
+/* This is a simple improvement on the previous GIL. A thread will
+ * enter the condition wait state if there is another thread present
+ * that has not woken up. This prevents a thread from jumping the
+ * queue on another thread waiting and allows the Release/Reacquire
+ * pattern in ceval.c to work properly
+ */
+
+typedef struct GIL_T
+{
+ MUTEX_T mutex;
+ COND_T cond;
+ int n_waiting;
+ char locked;
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+ GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+ if (!gil)
+ return NULL;
+ MUTEX_INIT(gil->mutex);
+ COND_INIT(gil->cond);
+ gil->locked = 0;
+ gil->n_waiting = 0;
+ return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_FINI(gil->mutex);
+ COND_FINI(gil->cond);
+ PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_LOCK(gil->mutex);
+ /* go back in the line if someone else is already waiting
+ * for the GIL
+ */
+ if (gil->locked || gil->n_waiting) {
+ ++gil->n_waiting;
+ do {
+ COND_WAIT(gil->cond, gil->mutex);
+ } while( gil->locked);
+ --gil->n_waiting;
+ }
+ gil->locked = 1;
+ MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_LOCK(gil->mutex);
+ gil->locked = 0;
+ COND_SIGNAL(gil->cond);
+ MUTEX_UNLOCK(gil->mutex);
+}
+#endif
+
+#ifdef PRIORITY_GIL
+
+/* Finally, a priority based GIL. Each priority level has its own
+ * condition variable. When the lock is released, it is handed off
+ * to the highest level that is currently waiting for the lock.
+ * No one gets the lock until it is handed to them.
+ * Priority is based on a priority value given when the lock is
+ * acquired. The idea is that the ceval Release/Reacquire step
+ * uses low priority (1) because it is volounteerily giving up the
+ * GIL. Other functions use high priority (0) because they have
+ * just finished IO or other such business and are eager to
+ * continue. A cpu bound thread that relinquishes the GIL in ceval.c
+ * will therefore yield to an IO type gil request that is waiting.
+ * In addition, we can request ceval.c to immediately give up the
+ * GIL if we see that the gil was last aqcuired with a lower
+ * priority than we have.
+ */
+
+#define PRIORITY_LEVELS 2
+#define INTERRUPT_GIL
+
+#ifdef INTERRUPT_GIL
+void _PyEval_RelinquishGil();
+#endif
+
+typedef struct GIL_LEVEL_T
+{
+ COND_T cond;
+ int n_queue; /* threads in wait */
+} GIL_LEVEL_T;
+
+typedef struct GIL_T
+{
+ MUTEX_T mutex;
+ int state; /* -1 = free, 0 and positive: locked at priority 'state' */
+ int handoff; /* -1 = no handoff */
+ GIL_LEVEL_T levels[PRIORITY_LEVELS];
+} GIL_T;
+
+PyThread_type_gil
+PyThread_allocate_gil(void)
+{
+ int i;
+ GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T));
+ if (!gil)
+ return NULL;
+ MUTEX_INIT(gil->mutex);
+ gil->state = -1; /* free */
+ gil->handoff = -1; /* no handoff */
+ for(i = 0; i < PRIORITY_LEVELS; ++i) {
+ COND_INIT(gil->levels[i].cond);
+ gil->levels[i].n_queue = 0;
+ }
+ return (PyThread_type_gil) gil;
+}
+
+void
+PyThread_free_gil(PyThread_type_gil _gil)
+{
+ int i;
+ GIL_T *gil = (GIL_T*)_gil;
+ MUTEX_FINI(gil->mutex);
+ for(i = 0; i < PRIORITY_LEVELS; ++i)
+ COND_FINI(gil->levels[i].cond);
+ PyMem_FREE(gil);
+}
+
+void
+PyThread_acquire_gil(PyThread_type_gil _gil, int pri)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+
+ if (pri<0)
+ pri = 0;
+ else if (pri >= PRIORITY_LEVELS)
+ pri = PRIORITY_LEVELS - 1;
+
+ MUTEX_LOCK(gil->mutex);
+
+ if (gil->state != -1) {
+#ifdef INTERRUPT_GIL
+ if (pri < gil->state)
+ /* we trump whomever is running, ask for the gil */
+ _PyEval_RelinquishGil();
+#endif
+ /* wait for lock handoff */
+ ++gil->levels[pri].n_queue;
+ while (gil->handoff != pri)
+ COND_WAIT(gil->levels[pri].cond, gil->mutex);
+ /* we were handed the lock */
+ gil->handoff = -1;
+ }
+ gil->state = pri;
+ MUTEX_UNLOCK(gil->mutex);
+}
+
+void
+PyThread_release_gil(PyThread_type_gil _gil)
+{
+ GIL_T *gil = (GIL_T*)_gil;
+ int i;
+ MUTEX_LOCK(gil->mutex);
+ /* find highest priority waiting thread to hand lock off to */
+ for (i = 0; i < PRIORITY_LEVELS; ++i)
+ if (gil->levels[i].n_queue > 0) {
+ COND_SIGNAL(gil->levels[i].cond);
+ --gil->levels[i].n_queue;
+ gil->handoff = i;
+ break;
+ }
+ if (i == PRIORITY_LEVELS) {
+ /* nothing found */
+ gil->handoff = -1;
+ gil->state = -1;
+ }
+ MUTEX_UNLOCK(gil->mutex);
+}
+
+#endif
\ No newline at end of file
Index: Python/thread_nt.h
===================================================================
--- Python/thread_nt.h (revision 79534)
+++ Python/thread_nt.h (working copy)
@@ -229,7 +229,7 @@
{
dprintf(("%ld: PyThread_free_lock(%p) called\n", PyThread_get_thread_ident(),aLock));
- FreeNonRecursiveMutex(aLock) ;
+ FreeNonRecursiveMutex((PNRMUTEX)aLock) ;
}
/*
@@ -357,3 +357,123 @@
{}
#endif
+
+/* PyThread_*_gil functions */
+
+/* portable mutexex and critical sections for use in thread_gil.h */
+
+#define MUTEX_T CRITICAL_SECTION
+#define MUTEX_INIT(mut) do { \
+ if (!(InitializeCriticalSectionAndSpinCount(&(mut), 4000))) \
+ Py_FatalError("CreateMutex(" #mut ") failed"); \
+} while (0)
+#define MUTEX_FINI(mut) \
+ DeleteCriticalSection(&(mut))
+#define MUTEX_LOCK(mut) \
+ EnterCriticalSection(&(mut))
+#define MUTEX_UNLOCK(mut) \
+ LeaveCriticalSection(&(mut))
+
+/* We emulate condition variables with a semaphore.
+ We use a Semaphore rather than an auto-reset event, because although
+ an auto-resent event might appear to solve the lost-wakeup bug (race
+ condition between releasing the outer lock and waiting) because it
+ maintains state even though a wait hasn't happened, there is still
+ a lost wakeup problem if more than one thread are interrupted in the
+ critical place. A semaphore solves that.
+ Because it is ok to signal a condition variable with no one
+ waiting, we need to keep track of the number of
+ waiting threads. Otherwise, the semaphore's state could rise
+ without bound.
+
+ Generic emulations of the pthread_cond_* API using
+ Win32 functions can be found on the Web.
+ The following read can be edificating (or not):
+ http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
+*/
+typedef struct COND_T
+{
+ HANDLE sem; /* the semaphore */
+ int n_waiting; /* how many are unreleased */
+} COND_T;
+
+__inline void _cond_init(COND_T *cond)
+{
+ /* a semaphore with a larg max value, The positive value
+ * is only needed to catch those "lost wakeup" events and
+ * race conditions when a timed wait elapses. It is also
+ * useful for "broadcast".
+ */
+ if (!(cond->sem = CreateSemaphore(NULL, 0, 1000, NULL)))
+ Py_FatalError("CreateSemaphore() failed");
+ cond->n_waiting = 0;
+}
+
+__inline void _cond_fini(COND_T *cond)
+{
+ BOOL ok = CloseHandle(cond->sem);
+ if (!ok)
+ Py_FatalError("CloseHandle() failed");
+}
+
+__inline void _cond_wait(COND_T *cond, MUTEX_T *mut)
+{
+ ++cond->n_waiting;
+ MUTEX_UNLOCK(*mut);
+ /* "lost wakeup bug" would occur if the caller were interrupter here,
+ * but we are safe because we are using a semaphore wich has an internal
+ * count
+ */
+ if (WaitForSingleObject(cond->sem, INFINITE) == WAIT_FAILED)
+ Py_FatalError("WaitForSingleObject() failed");
+ MUTEX_LOCK(*mut);
+}
+
+__inline int _cond_timed_wait(COND_T *cond, MUTEX_T *mut,
+ int us, int *result)
+{
+ DWORD r;
+ ++cond->n_waiting;
+ MUTEX_UNLOCK(*mut);
+ r = WaitForSingleObject(cond->sem, us / 1000);
+ InterlockedDecrement(&cond->n_waiting);
+ if (r == WAIT_FAILED)
+ Py_FatalError("WaitForSingleObject() failed");
+ MUTEX_LOCK(*mut);
+ if (r == WAIT_TIMEOUT)
+ /* possible race with _cond_signal, see below */
+ --cond->n_waiting;
+ return r == WAIT_TIMEOUT;
+}
+
+__inline void _cond_signal(COND_T *cond) {
+ if (cond->n_waiting > 0) {
+ if (!ReleaseSemaphore(cond->sem, 1, NULL)) {
+ Py_FatalError("ReleaseSemaphore() failed");
+ }
+ /* Note the possible race condition here if a timeout
+ * occurs. It is possible that this "Release" was ineffective
+ * and we decremented twice and left the semaphore in a positive
+ * state. That is ok, because
+ * it will cause the next "wait" to just go through directly and
+ * the universe rights itself
+ */
+ --cond->n_waiting;
+ }
+}
+
+#define COND_INIT(cond) \
+ _cond_init(&(cond))
+#define COND_FINI(cond) \
+ _cond_fini(&(cond))
+#define COND_SIGNAL(cond) \
+ _cond_signal(&(cond))
+#define COND_WAIT(cond, mut) \
+ _cond_wait(&(cond), &(mut))
+#define COND_TIMED_WAIT(cond, mut, us, timeout_result) do { \
+ (timeout_result) = _cond_timed_wait(&(cond), &(mut), us); \
+} while (0)
+
+/* choose roundrobin because that is the traditional behaviour on Windows */
+#define ROUNDROBIN_GIL
+#include "thread_gil.h" /* platform independedn GIL immplementation */
Index: Python/thread_pthread.h
===================================================================
--- Python/thread_pthread.h (revision 79534)
+++ Python/thread_pthread.h (working copy)
@@ -491,3 +491,64 @@
}
#define THREAD_SET_STACKSIZE(x) _pythread_pthread_set_stacksize(x)
+
+/* PyThread_*_gil functions */
+
+/* portable mutexex and critical sections for use in thread_gil.h */
+
+#define ADD_MICROSECONDS(tv, interval) \
+do { \
+ tv.tv_usec += (long) interval; \
+ tv.tv_sec += tv.tv_usec / 1000000; \
+ tv.tv_usec %= 1000000; \
+} while (0)
+
+/* We assume all modern POSIX systems have gettimeofday() */
+#ifdef GETTIMEOFDAY_NO_TZ
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv)
+#else
+#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
+#endif
+
+#define MUTEX_T pthread_mutex_t
+#define MUTEX_INIT(mut) \
+ if (pthread_mutex_init(&mut, NULL)) { \
+ Py_FatalError("pthread_mutex_init(" #mut ") failed"); };
+#define MUTEX_LOCK(mut) \
+ if (pthread_mutex_lock(&mut)) { \
+ Py_FatalError("pthread_mutex_lock(" #mut ") failed"); };
+#define MUTEX_UNLOCK(mut) \
+ if (pthread_mutex_unlock(&mut)) { \
+ Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); };
+
+#define COND_T pthread_cond_t
+#define COND_INIT(cond) \
+ if (pthread_cond_init(&cond, NULL)) { \
+ Py_FatalError("pthread_cond_init(" #cond ") failed"); };
+#define COND_SIGNAL(cond) \
+ if (pthread_cond_signal(&cond)) { \
+ Py_FatalError("pthread_cond_signal(" #cond ") failed"); };
+#define COND_WAIT(cond, mut) \
+ if (pthread_cond_wait(&cond, &mut)) { \
+ Py_FatalError("pthread_cond_wait(" #cond ") failed"); };
+#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
+ { \
+ int r; \
+ struct timespec ts; \
+ struct timeval deadline; \
+ \
+ GETTIMEOFDAY(&deadline); \
+ ADD_MICROSECONDS(deadline, microseconds); \
+ ts.tv_sec = deadline.tv_sec; \
+ ts.tv_nsec = deadline.tv_usec * 1000; \
+ \
+ r = pthread_cond_timedwait(&cond, &mut, &ts); \
+ if (r == ETIMEDOUT) \
+ timeout_result = 1; \
+ else if (r) \
+ Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \
+ else \
+ timeout_result = 0; \
+ } \
+
+#include "thread_gil.h" /* platform independent GIL implementation */