Index: Include/ceval.h =================================================================== --- Include/ceval.h (revision 79534) +++ Include/ceval.h (working copy) @@ -126,6 +126,7 @@ PyAPI_FUNC(int) PyEval_ThreadsInitialized(void); PyAPI_FUNC(void) PyEval_InitThreads(void); PyAPI_FUNC(void) PyEval_AcquireLock(void); +PyAPI_FUNC(void) PyEval_AcquireLockPri(int pri); PyAPI_FUNC(void) PyEval_ReleaseLock(void); PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); Index: Include/pythread.h =================================================================== --- Include/pythread.h (revision 79534) +++ Include/pythread.h (working copy) @@ -4,6 +4,7 @@ typedef void *PyThread_type_lock; typedef void *PyThread_type_sema; +typedef struct _gil_type *PyThread_type_gil; #ifdef __cplusplus extern "C" { @@ -21,6 +22,12 @@ #define NOWAIT_LOCK 0 PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock); +/* special GIL functions */ +PyAPI_FUNC(PyThread_type_gil) PyThread_allocate_gil(void); +PyAPI_FUNC(void) PyThread_free_gil(PyThread_type_gil gil); +PyAPI_FUNC(void) PyThread_acquire_gil(PyThread_type_gil gil, int pri); +PyAPI_FUNC(void) PyThread_release_gil(PyThread_type_gil gil); + PyAPI_FUNC(size_t) PyThread_get_stacksize(void); PyAPI_FUNC(int) PyThread_set_stacksize(size_t); Index: PCbuild/pythoncore.vcproj =================================================================== --- PCbuild/pythoncore.vcproj (revision 79534) +++ PCbuild/pythoncore.vcproj (working copy) @@ -1851,6 +1851,10 @@ > + + Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 79534) +++ Python/ceval.c (working copy) @@ -232,7 +232,7 @@ #endif #include "pythread.h" -static PyThread_type_lock interpreter_lock = 0; /* This is the GIL */ +static PyThread_type_gil interpreter_lock = 0; /* This is the GIL */ static PyThread_type_lock pending_lock = 0; /* for pending calls */ static long main_thread = 0; @@ -247,21 +247,27 @@ { if (interpreter_lock) return; - interpreter_lock = PyThread_allocate_lock(); - PyThread_acquire_lock(interpreter_lock, 1); + interpreter_lock = PyThread_allocate_gil(); + PyThread_acquire_gil(interpreter_lock, 1); main_thread = PyThread_get_thread_ident(); } void PyEval_AcquireLock(void) { - PyThread_acquire_lock(interpreter_lock, 1); + PyThread_acquire_gil(interpreter_lock, 1); } void +PyEval_AcquireLockPri(int pri) +{ + PyThread_acquire_gil(interpreter_lock, pri); +} + +void PyEval_ReleaseLock(void) { - PyThread_release_lock(interpreter_lock); + PyThread_release_gil(interpreter_lock); } void @@ -271,7 +277,7 @@ Py_FatalError("PyEval_AcquireThread: NULL new thread state"); /* Check someone has called PyEval_InitThreads() to create the lock */ assert(interpreter_lock); - PyThread_acquire_lock(interpreter_lock, 1); + PyThread_acquire_gil(interpreter_lock, 1); if (PyThreadState_Swap(tstate) != NULL) Py_FatalError( "PyEval_AcquireThread: non-NULL old thread state"); @@ -284,7 +290,7 @@ Py_FatalError("PyEval_ReleaseThread: NULL thread state"); if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("PyEval_ReleaseThread: wrong thread state"); - PyThread_release_lock(interpreter_lock); + PyThread_release_gil(interpreter_lock); } /* This function is called from PyOS_AfterFork to ensure that newly @@ -304,9 +310,9 @@ much error-checking. Doing this cleanly would require adding a new function to each thread_*.h. Instead, just create a new lock and waste a little bit of memory */ - interpreter_lock = PyThread_allocate_lock(); + interpreter_lock = PyThread_allocate_gil(); pending_lock = PyThread_allocate_lock(); - PyThread_acquire_lock(interpreter_lock, 1); + PyThread_acquire_gil(interpreter_lock, 1); main_thread = PyThread_get_thread_ident(); /* Update the threading module with the new state. @@ -340,7 +346,7 @@ Py_FatalError("PyEval_SaveThread: NULL tstate"); #ifdef WITH_THREAD if (interpreter_lock) - PyThread_release_lock(interpreter_lock); + PyThread_release_gil(interpreter_lock); #endif return tstate; } @@ -353,14 +359,20 @@ #ifdef WITH_THREAD if (interpreter_lock) { int err = errno; - PyThread_acquire_lock(interpreter_lock, 1); + /* high priority gil request */ + PyThread_acquire_gil(interpreter_lock, 0); errno = err; } #endif PyThreadState_Swap(tstate); } +void _PyEval_RelinquishGil() +{ + _Py_Ticker = 0; +} + /* Mechanism whereby asynchronously executing callbacks (e.g. UNIX signal handlers or Mac I/O completion routines) can schedule calls to a function to be called synchronously. @@ -1012,11 +1024,11 @@ if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("ceval: tstate mix-up"); - PyThread_release_lock(interpreter_lock); + PyThread_release_gil(interpreter_lock); /* Other threads may run now */ - PyThread_acquire_lock(interpreter_lock, 1); + PyThread_acquire_gil(interpreter_lock, 1); if (PyThreadState_Swap(tstate) != NULL) Py_FatalError("ceval: orphan tstate"); Index: Python/thread.c =================================================================== --- Python/thread.c (revision 79534) +++ Python/thread.c (working copy) @@ -419,3 +419,32 @@ } #endif /* Py_HAVE_NATIVE_TLS */ + +#ifndef THREAD_GIL_FUNCTIONS + +/* default gil implementation */ + +PyThread_type_gil +PyThread_allocate_gil(void) +{ + return (PyThread_type_gil) PyThread_allocate_lock(); +} + +void +PyThread_free_gil(PyThread_type_gil gil) +{ + PyThread_free_lock((PyThread_type_lock)gil); +} + +void +PyThread_acquire_gil(PyThread_type_gil gil, int pri) +{ + PyThread_acquire_lock((PyThread_type_lock)gil, 1); +} + +void +PyThread_release_gil(PyThread_type_gil gil) +{ + PyThread_release_lock((PyThread_type_lock)gil); +} +#endif Index: Python/thread_gil.h =================================================================== --- Python/thread_gil.h (revision 0) +++ Python/thread_gil.h (revision 0) @@ -0,0 +1,264 @@ +/* definitionas of the PyThread_*_gil functions, using portable + * macros for mutexes and critical sections + */ + +#define THREAD_GIL_FUNCTIONS + +#if !defined LEGACY_GIL && !defined ROUNDROBIN_GIL && !defined PRIORITY_GIL +#define LEGACY_GIL +#endif + +#ifdef LEGACY_GIL + +/* This GIL implementation is the one in use with pthreads. + * It has problems because there is nothing preventing the same + * thread to reaquire the gil immediately after releasing it to + * someone else. This causes problems in the Release/Reacquire + * pattern in ceval.c + * This is because modern implementation of mutexes typcially + * are greedy, allowing an interloping thread to aqcuire the mutex + * once it is free, not adhering to a strict FIFO order. This is + * done to reduce lock-convoying problems. + * Traditional windows implementation has not had this issue because + * it depends on Event objects for its locks and they respect + * the FIFO order of waiting objects. + */ + +typedef struct GIL_T +{ + MUTEX_T mutex; + COND_T cond; + char locked; +} GIL_T; + +PyThread_type_gil +PyThread_allocate_gil(void) +{ + GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T)); + if (!gil) + return NULL; + MUTEX_INIT(gil->mutex); + COND_INIT(gil->cond); + gil->locked = 0; + return (PyThread_type_gil) gil; +} + +void +PyThread_free_gil(PyThread_type_gil _gil) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_FINI(gil->mutex); + COND_FINI(gil->cond); + PyMem_FREE(gil); +} + +void +PyThread_acquire_gil(PyThread_type_gil _gil, int pri) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_LOCK(gil->mutex); + while(gil->locked) + COND_WAIT(gil->cond, gil->mutex); + gil->locked = 1; + MUTEX_UNLOCK(gil->mutex); +} + +void +PyThread_release_gil(PyThread_type_gil _gil) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_LOCK(gil->mutex); + gil->locked = 0; + COND_SIGNAL(gil->cond); + MUTEX_UNLOCK(gil->mutex); +} +#endif + +#ifdef ROUNDROBIN_GIL + +/* This is a simple improvement on the previous GIL. A thread will + * enter the condition wait state if there is another thread present + * that has not woken up. This prevents a thread from jumping the + * queue on another thread waiting and allows the Release/Reacquire + * pattern in ceval.c to work properly + */ + +typedef struct GIL_T +{ + MUTEX_T mutex; + COND_T cond; + int n_waiting; + char locked; +} GIL_T; + +PyThread_type_gil +PyThread_allocate_gil(void) +{ + GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T)); + if (!gil) + return NULL; + MUTEX_INIT(gil->mutex); + COND_INIT(gil->cond); + gil->locked = 0; + gil->n_waiting = 0; + return (PyThread_type_gil) gil; +} + +void +PyThread_free_gil(PyThread_type_gil _gil) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_FINI(gil->mutex); + COND_FINI(gil->cond); + PyMem_FREE(gil); +} + +void +PyThread_acquire_gil(PyThread_type_gil _gil, int pri) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_LOCK(gil->mutex); + /* go back in the line if someone else is already waiting + * for the GIL + */ + if (gil->locked || gil->n_waiting) { + ++gil->n_waiting; + do { + COND_WAIT(gil->cond, gil->mutex); + } while( gil->locked); + --gil->n_waiting; + } + gil->locked = 1; + MUTEX_UNLOCK(gil->mutex); +} + +void +PyThread_release_gil(PyThread_type_gil _gil) +{ + GIL_T *gil = (GIL_T*)_gil; + MUTEX_LOCK(gil->mutex); + gil->locked = 0; + COND_SIGNAL(gil->cond); + MUTEX_UNLOCK(gil->mutex); +} +#endif + +#ifdef PRIORITY_GIL + +/* Finally, a priority based GIL. Each priority level has its own + * condition variable. When the lock is released, it is handed off + * to the highest level that is currently waiting for the lock. + * No one gets the lock until it is handed to them. + * Priority is based on a priority value given when the lock is + * acquired. The idea is that the ceval Release/Reacquire step + * uses low priority (1) because it is volounteerily giving up the + * GIL. Other functions use high priority (0) because they have + * just finished IO or other such business and are eager to + * continue. A cpu bound thread that relinquishes the GIL in ceval.c + * will therefore yield to an IO type gil request that is waiting. + * In addition, we can request ceval.c to immediately give up the + * GIL if we see that the gil was last aqcuired with a lower + * priority than we have. + */ + +#define PRIORITY_LEVELS 2 +#define INTERRUPT_GIL + +#ifdef INTERRUPT_GIL +void _PyEval_RelinquishGil(); +#endif + +typedef struct GIL_LEVEL_T +{ + COND_T cond; + int n_queue; /* threads in wait */ +} GIL_LEVEL_T; + +typedef struct GIL_T +{ + MUTEX_T mutex; + int state; /* -1 = free, 0 and positive: locked at priority 'state' */ + int handoff; /* -1 = no handoff */ + GIL_LEVEL_T levels[PRIORITY_LEVELS]; +} GIL_T; + +PyThread_type_gil +PyThread_allocate_gil(void) +{ + int i; + GIL_T *gil = PyMem_MALLOC(sizeof(GIL_T)); + if (!gil) + return NULL; + MUTEX_INIT(gil->mutex); + gil->state = -1; /* free */ + gil->handoff = -1; /* no handoff */ + for(i = 0; i < PRIORITY_LEVELS; ++i) { + COND_INIT(gil->levels[i].cond); + gil->levels[i].n_queue = 0; + } + return (PyThread_type_gil) gil; +} + +void +PyThread_free_gil(PyThread_type_gil _gil) +{ + int i; + GIL_T *gil = (GIL_T*)_gil; + MUTEX_FINI(gil->mutex); + for(i = 0; i < PRIORITY_LEVELS; ++i) + COND_FINI(gil->levels[i].cond); + PyMem_FREE(gil); +} + +void +PyThread_acquire_gil(PyThread_type_gil _gil, int pri) +{ + GIL_T *gil = (GIL_T*)_gil; + + if (pri<0) + pri = 0; + else if (pri >= PRIORITY_LEVELS) + pri = PRIORITY_LEVELS - 1; + + MUTEX_LOCK(gil->mutex); + + if (gil->state != -1) { +#ifdef INTERRUPT_GIL + if (pri < gil->state) + /* we trump whomever is running, ask for the gil */ + _PyEval_RelinquishGil(); +#endif + /* wait for lock handoff */ + ++gil->levels[pri].n_queue; + while (gil->handoff != pri) + COND_WAIT(gil->levels[pri].cond, gil->mutex); + /* we were handed the lock */ + gil->handoff = -1; + } + gil->state = pri; + MUTEX_UNLOCK(gil->mutex); +} + +void +PyThread_release_gil(PyThread_type_gil _gil) +{ + GIL_T *gil = (GIL_T*)_gil; + int i; + MUTEX_LOCK(gil->mutex); + /* find highest priority waiting thread to hand lock off to */ + for (i = 0; i < PRIORITY_LEVELS; ++i) + if (gil->levels[i].n_queue > 0) { + COND_SIGNAL(gil->levels[i].cond); + --gil->levels[i].n_queue; + gil->handoff = i; + break; + } + if (i == PRIORITY_LEVELS) { + /* nothing found */ + gil->handoff = -1; + gil->state = -1; + } + MUTEX_UNLOCK(gil->mutex); +} + +#endif \ No newline at end of file Index: Python/thread_nt.h =================================================================== --- Python/thread_nt.h (revision 79534) +++ Python/thread_nt.h (working copy) @@ -229,7 +229,7 @@ { dprintf(("%ld: PyThread_free_lock(%p) called\n", PyThread_get_thread_ident(),aLock)); - FreeNonRecursiveMutex(aLock) ; + FreeNonRecursiveMutex((PNRMUTEX)aLock) ; } /* @@ -357,3 +357,123 @@ {} #endif + +/* PyThread_*_gil functions */ + +/* portable mutexex and critical sections for use in thread_gil.h */ + +#define MUTEX_T CRITICAL_SECTION +#define MUTEX_INIT(mut) do { \ + if (!(InitializeCriticalSectionAndSpinCount(&(mut), 4000))) \ + Py_FatalError("CreateMutex(" #mut ") failed"); \ +} while (0) +#define MUTEX_FINI(mut) \ + DeleteCriticalSection(&(mut)) +#define MUTEX_LOCK(mut) \ + EnterCriticalSection(&(mut)) +#define MUTEX_UNLOCK(mut) \ + LeaveCriticalSection(&(mut)) + +/* We emulate condition variables with a semaphore. + We use a Semaphore rather than an auto-reset event, because although + an auto-resent event might appear to solve the lost-wakeup bug (race + condition between releasing the outer lock and waiting) because it + maintains state even though a wait hasn't happened, there is still + a lost wakeup problem if more than one thread are interrupted in the + critical place. A semaphore solves that. + Because it is ok to signal a condition variable with no one + waiting, we need to keep track of the number of + waiting threads. Otherwise, the semaphore's state could rise + without bound. + + Generic emulations of the pthread_cond_* API using + Win32 functions can be found on the Web. + The following read can be edificating (or not): + http://www.cse.wustl.edu/~schmidt/win32-cv-1.html +*/ +typedef struct COND_T +{ + HANDLE sem; /* the semaphore */ + int n_waiting; /* how many are unreleased */ +} COND_T; + +__inline void _cond_init(COND_T *cond) +{ + /* a semaphore with a larg max value, The positive value + * is only needed to catch those "lost wakeup" events and + * race conditions when a timed wait elapses. It is also + * useful for "broadcast". + */ + if (!(cond->sem = CreateSemaphore(NULL, 0, 1000, NULL))) + Py_FatalError("CreateSemaphore() failed"); + cond->n_waiting = 0; +} + +__inline void _cond_fini(COND_T *cond) +{ + BOOL ok = CloseHandle(cond->sem); + if (!ok) + Py_FatalError("CloseHandle() failed"); +} + +__inline void _cond_wait(COND_T *cond, MUTEX_T *mut) +{ + ++cond->n_waiting; + MUTEX_UNLOCK(*mut); + /* "lost wakeup bug" would occur if the caller were interrupter here, + * but we are safe because we are using a semaphore wich has an internal + * count + */ + if (WaitForSingleObject(cond->sem, INFINITE) == WAIT_FAILED) + Py_FatalError("WaitForSingleObject() failed"); + MUTEX_LOCK(*mut); +} + +__inline int _cond_timed_wait(COND_T *cond, MUTEX_T *mut, + int us, int *result) +{ + DWORD r; + ++cond->n_waiting; + MUTEX_UNLOCK(*mut); + r = WaitForSingleObject(cond->sem, us / 1000); + InterlockedDecrement(&cond->n_waiting); + if (r == WAIT_FAILED) + Py_FatalError("WaitForSingleObject() failed"); + MUTEX_LOCK(*mut); + if (r == WAIT_TIMEOUT) + /* possible race with _cond_signal, see below */ + --cond->n_waiting; + return r == WAIT_TIMEOUT; +} + +__inline void _cond_signal(COND_T *cond) { + if (cond->n_waiting > 0) { + if (!ReleaseSemaphore(cond->sem, 1, NULL)) { + Py_FatalError("ReleaseSemaphore() failed"); + } + /* Note the possible race condition here if a timeout + * occurs. It is possible that this "Release" was ineffective + * and we decremented twice and left the semaphore in a positive + * state. That is ok, because + * it will cause the next "wait" to just go through directly and + * the universe rights itself + */ + --cond->n_waiting; + } +} + +#define COND_INIT(cond) \ + _cond_init(&(cond)) +#define COND_FINI(cond) \ + _cond_fini(&(cond)) +#define COND_SIGNAL(cond) \ + _cond_signal(&(cond)) +#define COND_WAIT(cond, mut) \ + _cond_wait(&(cond), &(mut)) +#define COND_TIMED_WAIT(cond, mut, us, timeout_result) do { \ + (timeout_result) = _cond_timed_wait(&(cond), &(mut), us); \ +} while (0) + +/* choose roundrobin because that is the traditional behaviour on Windows */ +#define ROUNDROBIN_GIL +#include "thread_gil.h" /* platform independedn GIL immplementation */ Index: Python/thread_pthread.h =================================================================== --- Python/thread_pthread.h (revision 79534) +++ Python/thread_pthread.h (working copy) @@ -491,3 +491,64 @@ } #define THREAD_SET_STACKSIZE(x) _pythread_pthread_set_stacksize(x) + +/* PyThread_*_gil functions */ + +/* portable mutexex and critical sections for use in thread_gil.h */ + +#define ADD_MICROSECONDS(tv, interval) \ +do { \ + tv.tv_usec += (long) interval; \ + tv.tv_sec += tv.tv_usec / 1000000; \ + tv.tv_usec %= 1000000; \ +} while (0) + +/* We assume all modern POSIX systems have gettimeofday() */ +#ifdef GETTIMEOFDAY_NO_TZ +#define GETTIMEOFDAY(ptv) gettimeofday(ptv) +#else +#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL) +#endif + +#define MUTEX_T pthread_mutex_t +#define MUTEX_INIT(mut) \ + if (pthread_mutex_init(&mut, NULL)) { \ + Py_FatalError("pthread_mutex_init(" #mut ") failed"); }; +#define MUTEX_LOCK(mut) \ + if (pthread_mutex_lock(&mut)) { \ + Py_FatalError("pthread_mutex_lock(" #mut ") failed"); }; +#define MUTEX_UNLOCK(mut) \ + if (pthread_mutex_unlock(&mut)) { \ + Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); }; + +#define COND_T pthread_cond_t +#define COND_INIT(cond) \ + if (pthread_cond_init(&cond, NULL)) { \ + Py_FatalError("pthread_cond_init(" #cond ") failed"); }; +#define COND_SIGNAL(cond) \ + if (pthread_cond_signal(&cond)) { \ + Py_FatalError("pthread_cond_signal(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + if (pthread_cond_wait(&cond, &mut)) { \ + Py_FatalError("pthread_cond_wait(" #cond ") failed"); }; +#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \ + { \ + int r; \ + struct timespec ts; \ + struct timeval deadline; \ + \ + GETTIMEOFDAY(&deadline); \ + ADD_MICROSECONDS(deadline, microseconds); \ + ts.tv_sec = deadline.tv_sec; \ + ts.tv_nsec = deadline.tv_usec * 1000; \ + \ + r = pthread_cond_timedwait(&cond, &mut, &ts); \ + if (r == ETIMEDOUT) \ + timeout_result = 1; \ + else if (r) \ + Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \ + else \ + timeout_result = 0; \ + } \ + +#include "thread_gil.h" /* platform independent GIL implementation */