Index: Python/ceval_gil.h =================================================================== --- Python/ceval_gil.h (revision 80497) +++ Python/ceval_gil.h (working copy) @@ -17,33 +17,30 @@ #undef FORCE_SWITCHING #define FORCE_SWITCHING - /* Notes about the implementation: - The GIL is just a boolean variable (gil_locked) whose access is protected - by a mutex (gil_mutex), and whose changes are signalled by a condition - variable (gil_cond). gil_mutex is taken for short periods of time, + by a mutex (gil_mutex), and whose changes are signalled by two condition + variables (gil_cpu_cond and gil_io_cond). gil_mutex is taken for short periods of time, and therefore mostly uncontended. - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be able to release the GIL on demand by another thread. A volatile boolean variable (gil_drop_request) is used for that purpose, which is checked at every turn of the eval loop. That variable is set after a wait of - `interval` microseconds on `gil_cond` has timed out. + `interval` microseconds. [Actually, another volatile boolean variable (eval_breaker) is used which ORs several conditions into one. Volatile booleans are sufficient as inter-thread signalling means since Python is run on cache-coherent architectures only.] - - A thread wanting to take the GIL will first let pass a given amount of - time (`interval` microseconds) before setting gil_drop_request. This - encourages a defined switching period, but doesn't enforce it since - opcodes can take an arbitrary time to execute. - - The `interval` value is available for the user to read and modify - using the Python API `sys.{get,set}switchinterval()`. + - A special monitoring thread is used to monitor thread switching. + It operates by recording how many thread switches have occurred, waiting + for a given amount of time ('interval' microseconds), and then + checking to see if any new thread switches have happend. If not, + it sets gil_drop_request to force a thread switch. - When a thread releases the GIL and gil_drop_request is set, that thread ensures that another GIL-awaiting thread gets scheduled. @@ -57,6 +54,20 @@ run and end up being the first to re-acquire it, making the "timeslices" much longer than expected. (Note: this mechanism is enabled with FORCE_SWITCHING above) + + - Threads are automatically separated into CPU-bound and I/O-bound + depending on whether or not they are forced to drop the GIL by time + expiration. A PyThreadState attribute cpu_bound is set accordingly. + I/O bound threads are always given priority and will immediately + preempty CPU-bound threads if necessary. A CPU-bound thread + becomes an I/O bound thread if it gives up the GIL before its + time has expired. + + - Two condition variables (gil_cpu_cond and gil_io_cond) are used + for waiting. When signalling, the I/O conditional variable is + given precedence unless there are no I/O bound threads waiting. + A thread always waits on a condition variable according to its + classification. */ #ifndef _POSIX_THREADS @@ -216,12 +227,28 @@ anyone else was scheduled after we dropped the GIL. */ static PyThreadState *gil_last_holder = NULL; +/* Scheduling behavior of last GIL holder. We need to keep it in + a separate variable because the gil_last_holder pointer is + not entirely reliable--the thread being pointed to might have + been destroyed. */ +static int gil_last_holder_cpu = 0; + +/* Number of threads waiting on the GIL */ +static unsigned long gil_num_waiting = 0; + +/* Number of threads waiting on the I/O gil */ +static unsigned long gil_num_io_waiting = 0; + /* This condition variable allows one or several threads to wait until the GIL is released. In addition, the mutex also protects the above variables. */ -static COND_T gil_cond; + +static COND_T gil_io_cond; /* Condition for I/O bound tasks */ +static COND_T gil_cpu_cond; /* Condition for CPU bound tasks */ + static MUTEX_T gil_mutex; + #ifdef FORCE_SWITCHING /* This condition variable helps the GIL-releasing thread wait for a GIL-awaiting thread to be scheduled and take the GIL. */ @@ -229,7 +256,91 @@ static MUTEX_T switch_mutex; #endif +/* Thread dedicated to thread switching. Monitors the number of thread + switches and makes the decision to have a thread forcefully drop + the GIL if necessary. This thread runs completely independently of + the interpreter, and makes no use of Python API calls. It should + be safe to leave it running and to forget about it */ +static int gil_monitor_running = 0; /* Flag indicating monitor execution */ +static int gil_monitor_suspend = 0; /* Flag indicating whether monitor should suspend */ +static MUTEX_T gil_monitor_lock; /* Mutex lock that controls execution of monitor */ +static COND_T gil_monitor_run; /* Cond for resuming the monitor if suspended */ + +#define MONITOR_TRIES_BEFORE_SUSPEND 2000 /* Number of repeated cycles the monitor will + execute before going to sleep if no threads + hold the GIL. */ + +void gil_suspend_monitor(void) { + MUTEX_LOCK(gil_monitor_lock); + gil_monitor_suspend = 1; + MUTEX_UNLOCK(gil_monitor_lock); +} + +void gil_resume_monitor(void) { + MUTEX_LOCK(gil_monitor_lock); + gil_monitor_suspend = 0; + COND_SIGNAL(gil_monitor_run); + MUTEX_UNLOCK(gil_monitor_lock); +} + +static MUTEX_T mon_mutex; +static COND_T mon_cond; + +static void gil_monitor(void *arg) { + unsigned long last_switch_no; + int timed_out; + int ntries = 0; + + MUTEX_INIT(mon_mutex); + COND_INIT(mon_cond); + gil_monitor_running = 1; + while(1) { + + /* Check if the monitor is to be suspended. If so, go into a deep sleep */ + MUTEX_LOCK(gil_monitor_lock); + while (gil_monitor_suspend) { + COND_WAIT(gil_monitor_run,gil_monitor_lock); + } + MUTEX_UNLOCK(gil_monitor_lock); + + /* Record the last switch number */ + last_switch_no = gil_switch_number; + + /* Sleep for the specified interval + * + * Discussion : This is a bit of hack. To wait, you need to have + * some kind of reliable mechanism for short-duration sleeping. A + * timed wait on a private condition variable that is never signaled works + * as well as anything else (especially since it was already implemented + * in this file). If you only wanted to sleep, you might have to adapt + * the function floatsleep() from Modules/timemodule.c. */ + + MUTEX_LOCK(mon_mutex); + COND_TIMED_WAIT(mon_cond,mon_mutex,INTERVAL,timed_out); + MUTEX_UNLOCK(mon_mutex); + + /* If the switch number is the same, force a context switch */ + if (last_switch_no == gil_switch_number) { + if (gil_num_waiting > 0) { + SET_GIL_DROP_REQUEST(); + } else if (!gil_locked && !gil_num_waiting) { + /* If there are no threads are waiting and no one is holding + the lock, increment a counter that's used to determine if we + later suspend */ + ntries++; + if (ntries == MONITOR_TRIES_BEFORE_SUSPEND) { + gil_suspend_monitor(); + ntries = 0; + } + } + } else { + /* A thread switch happened recently. Reset the number of tries */ + ntries = 0; + } + } +} + static int gil_created(void) { return gil_locked >= 0; @@ -241,12 +352,24 @@ #ifdef FORCE_SWITCHING MUTEX_INIT(switch_mutex); #endif - COND_INIT(gil_cond); + COND_INIT(gil_io_cond); + COND_INIT(gil_cpu_cond); + #ifdef FORCE_SWITCHING COND_INIT(switch_cond); #endif gil_locked = 0; gil_last_holder = NULL; + gil_last_holder_cpu = 0; + gil_num_waiting = 0; + gil_num_io_waiting = 0; + + if (!gil_monitor_running) { + /* Launch monitor thread */ + MUTEX_INIT(gil_monitor_lock); + COND_INIT(gil_monitor_run); + PyThread_start_new_thread(gil_monitor,NULL); + } } static void recreate_gil(void) @@ -264,7 +387,18 @@ MUTEX_LOCK(gil_mutex); gil_locked = 0; - COND_SIGNAL(gil_cond); + if (gil_num_io_waiting > 0) { + /* Signal I/O tasks if any */ + COND_SIGNAL(gil_io_cond); + } else { + /* If no I/O tasks, signal CPU-bound tasks */ + COND_SIGNAL(gil_cpu_cond); + } + /* Make the thread as CPU-bound or not depending on whether it was forced off */ + if (tstate) { + tstate->cpu_bound = gil_drop_request; + } + MUTEX_UNLOCK(gil_mutex); #ifdef FORCE_SWITCHING @@ -288,29 +422,55 @@ static void take_gil(PyThreadState *tstate) { int err; + int timeout = 0; + if (tstate == NULL) Py_FatalError("take_gil: NULL tstate"); err = errno; MUTEX_LOCK(gil_mutex); + gil_num_waiting += 1; + /* If I/O bound, additionally increase number of waiting I/O threads */ + if (!tstate->cpu_bound) { + gil_num_io_waiting += 1; + } + if (!gil_locked) - goto _ready; - - COND_RESET(gil_cond); - while (gil_locked) { - int timed_out = 0; - unsigned long saved_switchnum; + goto _ready; - saved_switchnum = gil_switch_number; - COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out); - /* If we timed out and no switch occurred in the meantime, it is time - to ask the GIL-holding thread to drop it. */ - if (timed_out && gil_locked && gil_switch_number == saved_switchnum) { - SET_GIL_DROP_REQUEST(); - } + /* Wait on the appropriate GIL depending on thread's classification */ + if (!tstate->cpu_bound) { + /* We are I/O bound. If the current thread is CPU-bound, force it off now! */ + if (gil_last_holder_cpu) { + SET_GIL_DROP_REQUEST(); + } + COND_RESET(gil_io_cond); + /* If the monitor is suspended, we must do a timed wait */ + if (gil_monitor_suspend) { + COND_TIMED_WAIT(gil_io_cond, gil_mutex, INTERVAL, timeout); + if (timeout && gil_monitor_suspend) { + gil_resume_monitor(); + } + } + while (gil_locked) { + COND_WAIT(gil_io_cond, gil_mutex); + } + } else { + COND_RESET(gil_cpu_cond); + /* If the monitor is suspended, we must do a timed wait */ + if (gil_monitor_suspend) { + COND_TIMED_WAIT(gil_cpu_cond, gil_mutex, INTERVAL, timeout); + if (timeout && gil_monitor_suspend) { + gil_resume_monitor(); + } + } + while (gil_locked) { + COND_WAIT(gil_cpu_cond, gil_mutex); + } } _ready: + #ifdef FORCE_SWITCHING /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */ MUTEX_LOCK(switch_mutex); @@ -320,6 +480,7 @@ if (tstate != gil_last_holder) { gil_last_holder = tstate; + gil_last_holder_cpu = tstate->cpu_bound; ++gil_switch_number; } #ifdef FORCE_SWITCHING @@ -332,7 +493,10 @@ if (tstate->async_exc != NULL) { _PyEval_SignalAsyncExc(); } - + gil_num_waiting -= 1; + if (!tstate->cpu_bound) { + gil_num_io_waiting -= 1; + } MUTEX_UNLOCK(gil_mutex); errno = err; } Index: Python/pystate.c =================================================================== --- Python/pystate.c (revision 80497) +++ Python/pystate.c (working copy) @@ -176,6 +176,8 @@ tstate->use_tracing = 0; tstate->tick_counter = 0; tstate->gilstate_counter = 0; + tstate->cpu_bound = 0; + tstate->async_exc = NULL; #ifdef WITH_THREAD tstate->thread_id = PyThread_get_thread_ident(); Index: Include/pystate.h =================================================================== --- Include/pystate.h (revision 80470) +++ Include/pystate.h (working copy) @@ -103,6 +103,8 @@ PyObject *async_exc; /* Asynchronous exception to raise */ long thread_id; /* Thread id where this tstate was created */ + int cpu_bound; /* Flag indicating whether thread is CPU-bound or not */ + /* XXX signal handlers should also be here */ } PyThreadState;