diff --git a/Include/bfs.h b/Include/bfs.h new file mode 100644 --- /dev/null +++ b/Include/bfs.h @@ -0,0 +1,32 @@ +/* + * Python/bfs.h + * + * Simplified implementation of the Brain F**k Scheduler by Con Kolivas + * http://ck.kolivas.org/patches/bfs/sched-BFS.txt + * + * "The goal of the Brain F**k Scheduler, referred to as BFS from here on, is to + * completely do away with the complex designs of the past for the cpu process + * scheduler and instead implement one that is very simple in basic design. + * The main focus of BFS is to achieve excellent desktop interactivity and + * responsiveness without heuristics and tuning knobs that are difficult to + * understand, impossible to model and predict the effect of, and when tuned to + * one workload cause massive detriment to another." - Con Kolivas + */ + + +#ifndef Py_BFS_H +#define Py_BFS_H +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_FUNC(void) bfs_clear(PyThreadState *tstate); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BFS_H */ + + diff --git a/Include/cycle.h b/Include/cycle.h new file mode 100644 --- /dev/null +++ b/Include/cycle.h @@ -0,0 +1,515 @@ +/* + * Copyright (c) 2003, 2007-8 Matteo Frigo + * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + + +/* machine-dependent cycle counters code. Needs to be inlined. */ + +/***************************************************************************/ +/* To use the cycle counters in your code, simply #include "cycle.h" (this + file), and then use the functions/macros: + + ticks getticks(void); + + ticks is an opaque typedef defined below, representing the current time. + You extract the elapsed time between two calls to gettick() via: + + double elapsed(ticks t1, ticks t0); + + which returns a double-precision variable in arbitrary units. You + are not expected to convert this into human units like seconds; it + is intended only for *comparisons* of time intervals. + + (In order to use some of the OS-dependent timer routines like + Solaris' gethrtime, you need to paste the autoconf snippet below + into your configure.ac file and #include "config.h" before cycle.h, + or define the relevant macros manually if you are not using autoconf.) +*/ + +/***************************************************************************/ +/* This file uses macros like HAVE_GETHRTIME that are assumed to be + defined according to whether the corresponding function/type/header + is available on your system. The necessary macros are most + conveniently defined if you are using GNU autoconf, via the tests: + + dnl --------------------------------------------------------------------- + + AC_C_INLINE + AC_HEADER_TIME + AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) + + AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in ])],,[#if HAVE_SYS_TIME_H +#include +#endif]) + + AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) + + dnl Cray UNICOS _rtc() (real-time clock) intrinsic + AC_MSG_CHECKING([for _rtc intrinsic]) + rtc_ok=yes + AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H +#include +#endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) + AC_MSG_RESULT($rtc_ok) + + dnl --------------------------------------------------------------------- +*/ + +/***************************************************************************/ + +#if TIME_WITH_SYS_TIME +# include +# include +#else +# if HAVE_SYS_TIME_H +# include +# else +# include +# endif +#endif + +#define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ +{ \ + return (double)t1 - (double)t0; \ +} + +/*----------------------------------------------------------------*/ +/* Solaris */ +#if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) +typedef hrtime_t ticks; + +#define getticks gethrtime + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* AIX v. 4+ routines to read the real-time clock or time-base register */ +#if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) +typedef timebasestruct_t ticks; + +static __inline ticks getticks(void) +{ + ticks t; + read_real_time(&t, TIMEBASE_SZ); + return t; +} + +static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ +{ + time_base_to_time(&t1, TIMEBASE_SZ); + time_base_to_time(&t0, TIMEBASE_SZ); + return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + + ((double)t1.tb_low - (double)t0.tb_low)); +} + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PowerPC ``cycle'' counter using the time base register. + */ +#if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + unsigned int tbl, tbu0, tbu1; + + do { + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); + __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); + __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); + } while (tbu0 != tbu1); + + return (((unsigned long long)tbu0) << 32) | tbl; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, + from Carbon, requires no additional libraries to be linked). */ +#if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) +#include +typedef uint64_t ticks; +#define getticks mach_absolute_time +INLINE_ELAPSED(__inline__) +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * Pentium cycle counter + */ +#if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__("rdtsc": "=A" (ret)); + /* no input, nothing else clobbered */ + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/* Visual C++ -- thanks to Morten Nissov for his help with this */ +#if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) +#include +typedef LARGE_INTEGER ticks; +#define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ + +static __inline ticks getticks(void) +{ + ticks retval; + + __asm { + RDTSC + mov retval.HighPart, edx + mov retval.LowPart, eax + } + return retval; +} + +static __inline double elapsed(ticks t1, ticks t0) +{ + return (double)t1.QuadPart - (double)t0.QuadPart; +} + +#define HAVE_TICK_COUNTER_LARGE_INTEGER +#define HAVE_TICK_COUNTER +#define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ +#endif + +/*----------------------------------------------------------------*/ +/* + * X86-64 cycle counter + */ +#if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + unsigned a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return ((ticks)a) | (((ticks)d) << 32); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. + NOTE: this code will fail to link unless you use the -Masmkeyword compiler + option (grrr). */ +#if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; +static ticks getticks(void) +{ + asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); +} +INLINE_ELAPSED(__inline__) +#define HAVE_TICK_COUNTER +#endif + +/* Visual C++, courtesy of Dirk Michaelis */ +#if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) + +#include +#pragma intrinsic(__rdtsc) +typedef unsigned __int64 ticks; +#define getticks __rdtsc +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * IA64 cycle counter + */ + +/* intel's icc/ecc compiler */ +#if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; +#include + +static __inline__ ticks getticks(void) +{ + return __getReg(_IA64_REG_AR_ITC); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* gcc */ +#if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ +#if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) +#include +typedef unsigned long ticks; + +static inline ticks getticks(void) +{ + ticks ret; + + ret = _Asm_mov_from_ar (_AREG_ITC); + return ret; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/* Microsoft Visual C++ */ +#if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) +typedef unsigned __int64 ticks; + +# ifdef __cplusplus +extern "C" +# endif +ticks __getReg(int whichReg); +#pragma intrinsic(__getReg) + +static __inline ticks getticks(void) +{ + volatile ticks temp; + temp = __getReg(3116); + return temp; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* + * PA-RISC cycle counter + */ +#if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +# ifdef __GNUC__ +static __inline__ ticks getticks(void) +{ + ticks ret; + + __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); + /* no input, nothing else clobbered */ + return ret; +} +# else +# include +static inline unsigned long getticks(void) +{ + register ticks ret; + _MFCTL(16, ret); + return ret; +} +# endif + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* S390, courtesy of James Treacy */ +#if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long long ticks; + +static __inline__ ticks getticks(void) +{ + ticks cycles; + __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); + return cycles; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) +/* + * The 32-bit cycle counter on alpha overflows pretty quickly, + * unfortunately. A 1GHz machine overflows in 4 seconds. + */ +typedef unsigned int ticks; + +static __inline__ ticks getticks(void) +{ + unsigned long cc; + __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) +typedef unsigned long ticks; + +static __inline__ ticks getticks(void) +{ + ticks ret; + __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); + return ret; +} + +INLINE_ELAPSED(__inline__) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +#if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) +# include +typedef unsigned int ticks; + +static __inline ticks getticks(void) +{ + unsigned long cc; + cc = asm("rpcc %v0"); + return (cc & 0xFFFFFFFF); +} + +INLINE_ELAPSED(__inline) + +#define HAVE_TICK_COUNTER +#endif +/*----------------------------------------------------------------*/ +/* SGI/Irix */ +#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) +typedef struct timespec ticks; + +static inline ticks getticks(void) +{ + struct timespec t; + clock_gettime(CLOCK_SGI_CYCLE, &t); + return t; +} + +static inline double elapsed(ticks t1, ticks t0) +{ + return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 + + ((double)t1.tv_nsec - (double)t0.tv_nsec); +} +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* Cray UNICOS _rtc() intrinsic function */ +#if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) +#ifdef HAVE_INTRINSICS_H +# include +#endif + +typedef long long ticks; + +#define getticks _rtc + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif + +/*----------------------------------------------------------------*/ +/* MIPS ZBus */ +#if HAVE_MIPS_ZBUS_TIMER +#if defined(__mips__) && !defined(HAVE_TICK_COUNTER) +#include +#include +#include + +typedef uint64_t ticks; + +static inline ticks getticks(void) +{ + static uint64_t* addr = 0; + + if (addr == 0) + { + uint32_t rq_addr = 0x10030000; + int fd; + int pgsize; + + pgsize = getpagesize(); + fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0); + if (fd < 0) { + perror("open"); + return NULL; + } + addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); + close(fd); + if (addr == (uint64_t *)-1) { + perror("mmap"); + return NULL; + } + } + + return *addr; +} + +INLINE_ELAPSED(inline) + +#define HAVE_TICK_COUNTER +#endif +#endif /* HAVE_MIPS_ZBUS_TIMER */ + diff --git a/Include/intrusive.h b/Include/intrusive.h new file mode 100644 --- /dev/null +++ b/Include/intrusive.h @@ -0,0 +1,62 @@ +/* + * Python/intrusive.h + * + * Definitions for intrusive list. + */ + + +#ifndef Py_INTRUSIVE_H +#define Py_INTRUSIVE_H +#ifdef __cplusplus +extern "C" { +#endif + + +#define CONTAINER(type, member, ptr) \ + ((type *)((char *)(ptr) - offsetof(type, member))) + +struct _list { + struct _list *next; + struct _list *prev; +}; + +/* Py_LOCAL_INLINE() does not work - see issue bugs.python.org/issue5553 */ + +#ifdef MS_WINDOWS +#define _LOCAL(type) static type +#else +#define _LOCAL(type) static inline type +#endif + +_LOCAL(void) _list_init(struct _list *l) { + l->next = l; + l->prev = l; +} + +_LOCAL(int) _list_empty(struct _list *l) { + return l->next == l; +} + +_LOCAL(void) _list_append(struct _list *l, struct _list *item) { + struct _list *tail = l->prev; + tail->next = item; + item->prev = tail; + item->next = l; + l->prev = item; +} + +_LOCAL(void) _list_pop(struct _list *item) { + item->next->prev = item->prev; + item->prev->next = item->next; + item->prev = NULL; + item->next = NULL; +} + + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTRUSIVE_H */ + + diff --git a/Include/mutexed.h b/Include/mutexed.h new file mode 100644 --- /dev/null +++ b/Include/mutexed.h @@ -0,0 +1,156 @@ +/* + * Mutex related macros for the BFS scheduler - taken from ceval_gil.h. + */ + +#include + +#ifndef _POSIX_THREADS +/* This means pthreads are not implemented in libc headers, hence the macro + not present in unistd.h. But they still can be implemented as an external + library (e.g. gnu pth in pthread emulation) */ +# ifdef HAVE_PTHREAD_H +# include /* _POSIX_THREADS */ +# endif +#endif + +#ifdef _POSIX_THREADS + +/* + * POSIX support + */ + +#include + +#define ADD_MICROSECONDS(tv, interval) \ +do { \ + tv.tv_usec += (long) interval; \ + tv.tv_sec += tv.tv_usec / 1000000; \ + tv.tv_usec %= 1000000; \ +} while (0) + +/* We assume all modern POSIX systems have gettimeofday() */ +#ifdef GETTIMEOFDAY_NO_TZ +#define GETTIMEOFDAY(ptv) gettimeofday(ptv) +#else +#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL) +#endif + +#define MUTEX_T pthread_mutex_t +#define MUTEX_INIT(mut) \ + if (pthread_mutex_init(&mut, NULL)) { \ + Py_FatalError("pthread_mutex_init(" #mut ") failed"); }; +#define MUTEX_LOCK(mut) \ + if (pthread_mutex_lock(&mut)) { \ + Py_FatalError("pthread_mutex_lock(" #mut ") failed"); }; +#define MUTEX_UNLOCK(mut) \ + if (pthread_mutex_unlock(&mut)) { \ + Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); }; + +#define COND_T pthread_cond_t +#define COND_INIT(cond) \ + if (pthread_cond_init(&cond, NULL)) { \ + Py_FatalError("pthread_cond_init(" #cond ") failed"); }; +#define COND_DESTROY(cond) \ + if (pthread_cond_destroy(&cond)) { \ + Py_FatalError("pthread_cond_destroy(" #cond ") failed"); }; +#define COND_RESET(cond) +#define COND_SIGNAL(cond) \ + if (pthread_cond_signal(&cond)) { \ + Py_FatalError("pthread_cond_signal(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + if (pthread_cond_wait(&cond, &mut)) { \ + Py_FatalError("pthread_cond_wait(" #cond ") failed"); }; +#define COND_TIMED_WAIT(cond, mut, seconds, timestamp_now, timeout_result) \ + { \ + int r; \ + struct timespec ts; \ + struct timeval deadline; \ + \ + if (timestamp_now == 0) { \ + GETTIMEOFDAY(&deadline); \ + } \ + else { \ + deadline.tv_sec = (int) timestamp_now; \ + deadline.tv_usec = (int) ((timestamp_now - deadline.tv_sec) * 1000000); \ + } \ + ADD_MICROSECONDS(deadline, ((int)(seconds * 1000000))); \ + ts.tv_sec = deadline.tv_sec; \ + ts.tv_nsec = deadline.tv_usec * 1000; \ + \ + r = pthread_cond_timedwait(&cond, &mut, &ts); \ + if (r == ETIMEDOUT) \ + timeout_result = 1; \ + else if (r) \ + Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \ + else \ + timeout_result = 0; \ + } \ + +#elif defined(NT_THREADS) + +/* + * Windows (2000 and later, as well as (hopefully) CE) support + */ + +#include + +/* Use critical section since it is significantly faster than a mutex object. + * It should be enough for the needs of the BFS scheduler since on any + * signaled event there is exactly one waiting thread. However the macros + * are not suited for the general case so the code should probably move + * somewhere else. */ + +#define MUTEX_T CRITICAL_SECTION +#define MUTEX_INIT(mut) \ + InitializeCriticalSectionAndSpinCount(&mut, 4000); +#define MUTEX_LOCK(mut) \ + EnterCriticalSection(&mut); +#define MUTEX_UNLOCK(mut) \ + LeaveCriticalSection(&mut); + +#undef COND_T +#define COND_T HANDLE +#define COND_INIT(cond) \ + /* auto-reset, non-signalled */ \ + if (!(cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { \ + Py_FatalError("CreateMutex(" #cond ") failed"); }; +#define COND_DESTROY(mut) \ + if (!CloseHandle(mut)) { \ + Py_FatalError("CloseHandle(" #mut ") failed"); }; +#define COND_RESET(cond) \ + if (!ResetEvent(cond)) { \ + Py_FatalError("ResetEvent(" #cond ") failed"); }; +#define COND_SIGNAL(cond) \ + if (!SetEvent(cond)) { \ + Py_FatalError("SetEvent(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + { \ + MUTEX_UNLOCK(mut); \ + if (WaitForSingleObject(cond, INFINITE) != WAIT_OBJECT_0) \ + Py_FatalError("WaitForSingleObject(" #cond ") failed"); \ + MUTEX_LOCK(mut); \ + } +#define COND_TIMED_WAIT(cond, mut, seconds, timestamp_now, timeout_result) \ + { \ + DWORD r; \ + MUTEX_UNLOCK(mut); \ + r = WaitForSingleObject(cond, (int)(seconds * 1000)); \ + if (r == WAIT_TIMEOUT) { \ + MUTEX_LOCK(mut); \ + timeout_result = 1; \ + } \ + else if (r != WAIT_OBJECT_0) \ + Py_FatalError("WaitForSingleObject(" #cond ") failed"); \ + else { \ + MUTEX_LOCK(mut); \ + timeout_result = 0; \ + } \ + } + +#else + +#error You need either a POSIX-compatible or a Windows system! + +#endif /* _POSIX_THREADS, NT_THREADS */ + + diff --git a/Include/pystate.h b/Include/pystate.h --- a/Include/pystate.h +++ b/Include/pystate.h @@ -8,6 +8,22 @@ extern "C" { #endif +#include "pythread.h" +#include "intrusive.h" + +/* Temporarily work around cross platform (nt/posix) compilation issues. + * Can't include mutexed.h directly on NT since it includes windows.h + * and breaks compliation solution-wide. */ + +#undef COND_T +#ifdef MS_WINDOWS +typedef void *COND_HANDLE; +#define COND_T COND_HANDLE +#else +#include "mutexed.h" +#endif + + /* State shared between threads */ struct _ts; /* Forward */ @@ -88,8 +104,6 @@ PyObject *dict; /* Stores per-thread state */ - /* XXX doesn't mean anything anymore (the comment below is obsolete) - => deprecate or remove? */ /* tick_counter is incremented whenever the check_interval ticker * reaches zero. The purpose is to give a useful measure of the number * of interpreted bytecode instructions in a given thread. This @@ -98,15 +112,21 @@ */ int tick_counter; + COND_T bfs_cond; + struct _list bfs_list; + long double bfs_slice; + long double bfs_deadline; + long double bfs_timestamp; + int gilstate_counter; PyObject *async_exc; /* Asynchronous exception to raise */ long thread_id; /* Thread id where this tstate was created */ /* XXX signal handlers should also be here */ - } PyThreadState; +#define THREAD_STATE(ptr) CONTAINER(PyThreadState, bfs_list, ptr) PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); diff --git a/Python/bfs.c b/Python/bfs.c new file mode 100644 --- /dev/null +++ b/Python/bfs.c @@ -0,0 +1,570 @@ +/* + * Python/bfs.c + * + * Simplified implementation of the Brain F**k Scheduler by Con Kolivas + * http://ck.kolivas.org/patches/bfs/sched-BFS.txt + * + * Copyright (c) 2010 Nir Aides and individual contributors. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of Nir Aides nor the names of other contributors may + * be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Simplified implementation of the Brain F**k Scheduler by Con Kolivas + * http://ck.kolivas.org/patches/bfs/sched-BFS.txt + * + * "The goal of the Brain F**k Scheduler, referred to as BFS from here on, is to + * completely do away with the complex designs of the past for the cpu process + * scheduler and instead implement one that is very simple in basic design. + * The main focus of BFS is to achieve excellent desktop interactivity and + * responsiveness without heuristics and tuning knobs that are difficult to + * understand, impossible to model and predict the effect of, and when tuned to + * one workload cause massive detriment to another." - Con Kolivas + * + * Notes: + * There are several mechanisms in this implementation which are particular to + * Python: + * + * 1) Clock - Except for Linux, most platforms do not provide API to query + * thread's running time with high precision. Therefore timing is done with + * wall clock. The scheduler seems to behave reasonably with wall clock even + * under load since the OS scheduler does not tend to preempt IO bound + * threads. + * + * 2) Clock precision - Code works best with high precision clock 1ms--. + * With low precision clock, scheduler will become sensitive to tasks which + * are synchronized with the interval size. There is a related discussion + * about this in the BFS design doc under sub-tick accounting. + * + * 3) TSC - When available the code will try to use TSC for timing. TSC is + * high precision and fast to read and eliminates the timestamp reading + * overhead. Other time sources such as HPET typically cost around 1usec per + * call, sometimes even 3usec or more. Code is written such that it is + * generally indifferent to multi-core counter sync and CPU-cycles/sec + * ratio swings. + * + * 3) Slow Python ticks - Querying the clock between Python ticks is too + * expensive, so it is done once every 5000 ticks. However, since even a + * single slow Python tick can deplete a slice (10ms), a special flag exists + * (bfs_check_depleted) which is set by waiting threads and which forces the + * running thread to check its remaining slice by next tick. + * + * 4) Schedule snatching - Allow thread to snatch schedule if the signaled + * next-to-run thread did not take over yet and is less urgent. This + * reduces context switching, for example in case of fast yield-schedule + * cycles, such as with IO call that ends being non-blocking. + * + * 5) Force switch - A thread which yields to BFS may signal the next thread + * but continue to run (e.g. in C library code). If the OS schedules the + * next thread on the same core, it will take a while before it actually gets + * to run. Therefore to prevent this situation, when a thread yields to BFS, + * it will block if the next thread has more urgent deadline, until that + * thread starts running. This improves throughput and latency. + * + * 6) Multi-core clock - On older multi core CPU, dynamic frequency scaling + * may result in unsynchronized timetamp readings between cores. The code + * addresses this problem with by using a "Python" clock created by + * accumulating timestamp (capped) diffs. + * + * 7) Disable thread boost - On Windows code disables thread boosting + * associated with scheduler-condition for CPU bound threads. This prevents + * them from getting higher priority, messing up scheduling across the system. + */ + + +#include "time.h" +#include "mutexed.h" +#include "cycle.h" + + +/* Round robin interval - thread time slice for running (in seconds). + * It is set at 10ms to match Windows tick resolution. */ +static const long double rr_interval = 0.010; + +/* Magic number taken from Con's implementation. There it is computed as + * 1.1 ^ 20 (nice level of regular thread). Basically it controls the load + * beyond which threads tend to expire their deadline. Expired threads are + * handled in FIFO order, which means that at such load IO bound threads + * will no longer preempt running threads. */ +#define DEADLINE_FACTOR 8 + +/* Protect access to data structures. */ +static MUTEX_T bfs_mutex; + +/* List all threads waiting for schedule. */ +static struct _list bfs_rq; + +/* Number of threads waiting for schedule */ +static volatile int bfs_threads_waiting = 0; + +/* Current running thread. */ +static volatile PyThreadState *bfs_thread = NULL; + +/* Mark current thread pointer as opaque so code does not try to access it. + * This is relevant to shutdown. */ +static int bfs_thread_opaque = 0; + +/* Previous running thread. */ +static PyThreadState *bfs_thread_prev = NULL; + +/* Pointer to yielding thread waiting for switch confirmation. */ +static volatile PyThreadState *bfs_thread_switch = NULL; + +/* Time to way for urgent thread to reaquire schedule in fast + * yield, schedule scenarios (non-blocking IO). */ +#define SWITCH_BACK_TIMEOUT 0.000012 + +/* Use as cross-multiple-CPU-cores "clock". */ +static volatile long double bfs_python_time = 0; + +/* Number of TSC ticks per second. */ +static long double bfs_tsc_freq = 0; +static int bfs_tsc_disabled = 0; + +/* Flag currently running thread to yield immediately. */ +static int bfs_preempt = 0; + +/* Flag currently running thread to check remaining slice immediately. */ +static int bfs_check_depleted = 0; + +static int bfs_initialized = 0; + +//#define VERBOSE +#undef VERBOSE +#ifdef VERBOSE +#define TRACE(v) printf v +#else +#define TRACE(v) +#endif + + +#ifdef MS_WINDOWS + +#define DISABLE_CPU_BOUND_THREAD_BOOST(tstate) \ + if (tstate == NULL || tstate->bfs_slice <= 0 || tstate->bfs_deadline > bfs_python_time) { \ + SetThreadPriorityBoost(GetCurrentThread(), TRUE); \ + } + +#define RESTORE_THREAD_BOOST() \ + SetThreadPriorityBoost(GetCurrentThread(), FALSE); + +/* Return system wide timestamp in seconds (with usec precision). */ +_LOCAL(long double) get_timestamp(void) { + static LARGE_INTEGER ctrStart; + static long double divisor = 0.0; + LARGE_INTEGER now; + + if (divisor == 0.0) { + LARGE_INTEGER freq; + QueryPerformanceCounter(&ctrStart); + QueryPerformanceFrequency(&freq); + divisor = (long double) freq.QuadPart; + } + + QueryPerformanceCounter(&now); + return (now.QuadPart - ctrStart.QuadPart) / divisor; +} + +#elif defined(HAVE_GETTIMEOFDAY) + +#define DISABLE_CPU_BOUND_THREAD_BOOST(tstate) +#define RESTORE_THREAD_BOOST() + +/* Return system wide timestamp in seconds (with usec precision). */ +_LOCAL(long double) get_timestamp(void) { + struct timeval tv; + GETTIMEOFDAY(&tv); + return (long double) (tv.tv_sec + tv.tv_usec * 0.000001); +} + +#else + +#error You need either a POSIX-compatible or a Windows system! + +#endif /* MS_WINDOWS, HAVE_GETTIMEOFDAY */ + + +#ifdef HAVE_TICK_COUNTER_LARGE_INTEGER +#define GETTICKS() ((long double)getticks().QuadPart) +#elif defined(HAVE_TICK_COUNTER) +#define GETTICKS() ((long double)getticks()) +#else +#define GETTICKS() 0 +#endif + + +/* Calibrate TSC counter frequency. */ +long double bfs_tsc_calibrate0(void) { + long double tk0, tk1, ts0, ts1; + int i; + + tk0 = GETTICKS(); + ts0 = get_timestamp(); + do { + for(i = 0; i < 1000; i++); + } while (get_timestamp() - ts0 < 0.000030); + tk1 = GETTICKS(); + ts1 = get_timestamp(); + return (tk1 - tk0) / (ts1 - ts0); +} + + +/* Calibrate TSC counter frequency. */ +long double bfs_tsc_calibrate1(void) { + long double f0 = bfs_tsc_calibrate0(); + long double f1 = bfs_tsc_calibrate0(); + + if (0.6 * f0 < f1 && f1 < 1.5 * f0) + return (f0 + f1) / 2; + + return 0; +} + + +/* Calibrate TSC counter frequency. */ +void bfs_tsc_calibrate(void) { + if (GETTICKS() != 0) { + bfs_tsc_freq = bfs_tsc_calibrate1(); + if (bfs_tsc_freq != 0) + return; + + bfs_tsc_freq = bfs_tsc_calibrate1(); + if (bfs_tsc_freq != 0) + return; + } + + bfs_tsc_disabled = 1; +} + + +/* Query TSC or fall back to gettimeofday() */ +_LOCAL(long double) get_cpu_timestamp(void) { + if (bfs_tsc_disabled) + return get_timestamp(); + + return GETTICKS() / bfs_tsc_freq; +} + + +/* Lock bfs mutex before fork() to make sure it is not held by an uncopied + * thread */ +void bfs_fork_prepare(void) { + MUTEX_LOCK(bfs_mutex); +} + + +/* Unlock mutex after fork() at parent to allow normal operation. */ +void bfs_fork_parent(void) { + MUTEX_UNLOCK(bfs_mutex); +} + + +/* Initialize bfs and unlock mutex after fork() at child to allow normal + * operation. */ +void bfs_fork_child(void) { + _list_init(&bfs_rq); + MUTEX_UNLOCK(bfs_mutex); +} + + +static void bfs_init(void) { +#ifdef _POSIX_THREADS + pthread_atfork(bfs_fork_prepare, bfs_fork_parent, bfs_fork_child); +#endif + MUTEX_INIT(bfs_mutex); + _list_init(&bfs_rq); + bfs_tsc_calibrate(); + bfs_initialized = 1; +} + + +/* Pick next thread to schedule based on earliest deadline. */ +static PyThreadState *bfs_find_task(long double python_time) { + struct _list *item; + PyThreadState *task; + + if (_list_empty(&bfs_rq)) + return NULL; + + item = bfs_rq.next; + task = THREAD_STATE(item); + + /* Search for thread with earliest deadline or first expired deadline. + * IO bound threads typically have expired deadlines since they naturally + * take longer than their original deadline to deplete their slice. */ + for (item = item->next; item != &bfs_rq && task->bfs_deadline > python_time; item = item->next) { + PyThreadState *tstate = THREAD_STATE(item); + if (tstate->bfs_deadline < task->bfs_deadline) { + task = tstate; + } + } + + return task; +} + + +/* Wait for currently running thread to signal this thread (tstate) + * to take over the schedule. Call with locked mutex */ +static void _bfs_timed_wait(PyThreadState *tstate, long double timestamp) { + /* Use timeout to guard against slow ticks of running thread, but + * multiply by number of waiting threads to prevent growing number of + * context switches. */ + long double timeout = bfs_threads_waiting * rr_interval * 0.55; + int timed_out = 0; + + COND_TIMED_WAIT(tstate->bfs_cond, bfs_mutex, timeout, timestamp, timed_out); + + /* Flag running thread to check slice depletion immediately. it has possibly + * been doing slow ticks (e.g. regular expression searches) and depleted its + * slice. */ + if (timed_out) { + bfs_check_depleted = 1; + COMPUTE_EVAL_BREAKER(); + } +} + + +/* Diff timestamp capping results to protect against clock differences + * between cores. */ +_LOCAL(long double) _bfs_diff_ts(long double ts1, long double ts0) { + if (ts1 < ts0) + return 0; + + if (ts1 - ts0 > rr_interval) + return rr_interval; + + return ts1 - ts0; +} + + +/* Schedule (tstate) thread for running. */ +static void bfs_schedule(PyThreadState *tstate) { +#ifdef VERBOSE + long double _slice = tstate->bfs_slice; + long double _deadline = tstate->bfs_deadline; +#endif + + int is_urgent; + volatile int spin; + long double ts0; + + MUTEX_LOCK(bfs_mutex); + + /* Disable boost inside mutex due to tstate lifetime trickiness during + * shutdown. */ + DISABLE_CPU_BOUND_THREAD_BOOST(tstate); + + /* Refill depleted slice and reset scheduling deadline. */ + if (tstate->bfs_slice <= 0) { + tstate->bfs_slice = rr_interval; + tstate->bfs_deadline = bfs_python_time + rr_interval * DEADLINE_FACTOR; + } + + TRACE(("SCHD %p - %Lf, pt=%Lf, slice=%Lf, deadline-in=%Lf, deadline-on=%Lf\n", tstate, get_timestamp(), bfs_python_time, _slice, _deadline - bfs_python_time, tstate->bfs_deadline)); + + do { + /* Schedule immediately if no thread is currently running. */ + if (bfs_thread == NULL) + break; + + /* Determine urgency based on deadline. If deadline expired handle + * in FIFO order. */ + is_urgent = !bfs_thread_opaque && + tstate->bfs_deadline <= bfs_thread->bfs_deadline && + bfs_thread->bfs_deadline >= bfs_python_time; + + /* Schedule immediately if next thread (bfs_thread) did not take + * over yet and is less urgent. */ + if (is_urgent && bfs_thread_prev != NULL) + break; + + /* Preempt running thread if it is less urgent. */ + if (is_urgent) { + bfs_preempt = 1; + COMPUTE_EVAL_BREAKER(); + } + + /* Flag running thread to check depletion of its slice. */ + else { + bfs_check_depleted = 1; + COMPUTE_EVAL_BREAKER(); + } + + /* Queue and wait for schedule */ + + _list_append(&bfs_rq, &tstate->bfs_list); + bfs_threads_waiting++; + + COND_RESET(tstate->bfs_cond); + while (bfs_thread != tstate) { + _bfs_timed_wait(tstate, 0); + /* If yielding thread is more urgent give it a chance to + * reschedule by spinning a little. Reduces multi-core switching + * on fast yield-schedule cycles. */ + if (bfs_thread == tstate && bfs_thread_prev != NULL && + tstate->bfs_deadline > bfs_thread_prev->bfs_deadline && + tstate->bfs_deadline >= bfs_python_time) { + MUTEX_UNLOCK(bfs_mutex); + ts0 = get_timestamp(); + do { + for (spin = 500; spin > 0 && bfs_thread == tstate; spin--); + } while (get_timestamp() - ts0 < SWITCH_BACK_TIMEOUT); + MUTEX_LOCK(bfs_mutex); + } + } + + _list_pop(&tstate->bfs_list); + bfs_threads_waiting--; + break; + } while(0); + + bfs_thread = tstate; + bfs_thread_prev = NULL; + + /* Signal previous thread if it is waiting for the switch. */ + if (bfs_thread_switch != NULL) { + COND_SIGNAL(((PyThreadState*)bfs_thread_switch)->bfs_cond); + bfs_thread_switch = NULL; + } + + tstate->bfs_timestamp = get_cpu_timestamp(); + + TRACE(("TAKE %p - %Lf, pt=%Lf, tsc=%Lf\n", tstate, get_timestamp(), bfs_python_time, tstate->bfs_timestamp)); + MUTEX_UNLOCK(bfs_mutex); + RESTORE_THREAD_BOOST(); +} + + +/* Yield schedule to another thread. */ +static void _bfs_yield(PyThreadState *tstate, int fast_reschedule) { + long double interval; + + /* Update running time slice (book keeping). */ + if (!bfs_thread_opaque && tstate != NULL) { + interval = _bfs_diff_ts(get_cpu_timestamp(), tstate->bfs_timestamp); + bfs_python_time += interval; + tstate->bfs_slice -= interval; + } + + DISABLE_CPU_BOUND_THREAD_BOOST(tstate); + MUTEX_LOCK(bfs_mutex); + TRACE(("DROP %p - %Lf, pt=%Lf, tsc=%Lf\n", tstate, get_timestamp(), bfs_python_time, get_cpu_timestamp())); + + /* Reset preemption flags - we heard the shout. */ + bfs_preempt = 0; + bfs_check_depleted = 0; + COMPUTE_EVAL_BREAKER(); + + /* Allow re-grabbing schedule back before next thread in fast + * yield-schedule cycles. */ + if (!bfs_thread_opaque && fast_reschedule) { + bfs_thread_prev = tstate; + } + + /* Find earliest deadline thread to run */ + bfs_thread = bfs_find_task(bfs_python_time); + if (bfs_thread != NULL) { + TRACE(("SGNL %p - %Lf, signal %p\n", tstate, get_timestamp(), bfs_thread)); + COND_SIGNAL(((PyThreadState*)bfs_thread)->bfs_cond); + } + + /* Force yield of OS slice if next thread is more urgent. This + * is required since OS may schedule next thread to run on same core. */ + if (!bfs_thread_opaque && bfs_thread != NULL && + bfs_thread_switch == NULL && tstate != NULL && + (tstate->bfs_slice <= 0 || bfs_thread->bfs_deadline < bfs_python_time || + tstate->bfs_deadline >= bfs_thread->bfs_deadline)) { + bfs_thread_switch = tstate; + COND_RESET(tstate->bfs_cond); + while (bfs_thread_switch == tstate) { + COND_WAIT(tstate->bfs_cond, bfs_mutex); + } + } + + bfs_thread_opaque = 0; + + MUTEX_UNLOCK(bfs_mutex); + RESTORE_THREAD_BOOST(); +} + + +/* Yield schedule to another thread. */ +static void bfs_yield(PyThreadState *tstate) { + _bfs_yield(tstate, 1); +} + + +/* Check if thread depleted its running time slice. */ +_LOCAL(int) bfs_depleted_slice(PyThreadState *tstate) { + /* A reading is about 1 usec on a modern CPU. */ + return tstate->bfs_slice <= _bfs_diff_ts(get_cpu_timestamp(), tstate->bfs_timestamp); +} + + +/* Propose scheduler to switch to a different thread - a la cooperative + * multitasking. */ +_LOCAL(void) bfs_checkpoint(PyThreadState *tstate) { + TRACE(("CHCK %p - %Lf, preempt=%d, check_depleted=%d\n", tstate, get_timestamp(), bfs_preempt, bfs_check_depleted)); + + bfs_check_depleted = 0; + COMPUTE_EVAL_BREAKER(); + + if (bfs_preempt || bfs_depleted_slice(tstate)) { + _bfs_yield(tstate, 0); + bfs_schedule(tstate); + } +} + + +/* Clean up, called when clearing up a Python thread state. */ +void bfs_clear(PyThreadState *tstate) { + if (bfs_initialized == 0) + return; + + MUTEX_LOCK(bfs_mutex); + + /* If attempt to clear current thread, mark it as opaque so it is + * not accessed by other threads attempting to schedule. */ + if (bfs_thread == tstate) { + bfs_thread_opaque = 1; + } + /* If attempt to clear prev-thread, reset it to NULL so it is not accessed + * by other threads. */ + else if (bfs_thread_prev == tstate) { + bfs_thread_prev = NULL; + } + + /* Remove cleared thread from waiting list. */ + if (tstate->bfs_list.next != NULL) { + _list_pop(&tstate->bfs_list); + } + else { + COND_DESTROY(tstate->bfs_cond); + } + + MUTEX_UNLOCK(bfs_mutex); +} + + diff --git a/Python/ceval.c b/Python/ceval.c --- a/Python/ceval.c +++ b/Python/ceval.c @@ -222,22 +222,10 @@ #define COMPUTE_EVAL_BREAKER() \ _Py_atomic_store_relaxed( \ &eval_breaker, \ - _Py_atomic_load_relaxed(&gil_drop_request) | \ + bfs_preempt | bfs_check_depleted | \ _Py_atomic_load_relaxed(&pendingcalls_to_do) | \ pending_async_exc) -#define SET_GIL_DROP_REQUEST() \ - do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 1); \ - _Py_atomic_store_relaxed(&eval_breaker, 1); \ - } while (0) - -#define RESET_GIL_DROP_REQUEST() \ - do { \ - _Py_atomic_store_relaxed(&gil_drop_request, 0); \ - COMPUTE_EVAL_BREAKER(); \ - } while (0) - /* Pending calls are only modified under pending_lock */ #define SIGNAL_PENDING_CALLS() \ do { \ @@ -270,35 +258,43 @@ static PyThread_type_lock pending_lock = 0; /* for pending calls */ static long main_thread = 0; +static int threads_init = 0; /* This single variable consolidates all requests to break out of the fast path in the eval loop. */ static _Py_atomic_int eval_breaker = {0}; -/* Request for dropping the GIL */ -static _Py_atomic_int gil_drop_request = {0}; /* Request for running pending calls. */ static _Py_atomic_int pendingcalls_to_do = {0}; /* Request for looking at the `async_exc` field of the current thread state. Guarded by the GIL. */ static int pending_async_exc = 0; -#include "ceval_gil.h" +#define SCHEDULER_INTERVAL_TICKS 5000 +static volatile int bfs_ticks = SCHEDULER_INTERVAL_TICKS; + +#include "bfs.c" int PyEval_ThreadsInitialized(void) { - return gil_created(); + return threads_init == 1; } void PyEval_InitThreads(void) { - if (gil_created()) - return; - create_gil(); - take_gil(PyThreadState_GET()); + if (PyEval_ThreadsInitialized()) + return; + + /* PyEval_InitThreads() either initializes the scheduler or is called + * by a scheduled thread - "has the lock". */ + bfs_init(); + bfs_schedule(PyThreadState_GET()); + main_thread = PyThread_get_thread_ident(); if (!pending_lock) pending_lock = PyThread_allocate_lock(); + + threads_init = 1; } void @@ -307,7 +303,7 @@ PyThreadState *tstate = PyThreadState_GET(); if (tstate == NULL) Py_FatalError("PyEval_AcquireLock: current thread state is NULL"); - take_gil(tstate); + bfs_schedule(tstate); } void @@ -317,7 +313,7 @@ We therefore avoid PyThreadState_GET() which dumps a fatal error in debug mode. */ - drop_gil((PyThreadState*)_Py_atomic_load_relaxed( + bfs_yield((PyThreadState*)_Py_atomic_load_relaxed( &_PyThreadState_Current)); } @@ -326,9 +322,11 @@ { if (tstate == NULL) Py_FatalError("PyEval_AcquireThread: NULL new thread state"); + /* Check someone has called PyEval_InitThreads() to create the lock */ - assert(gil_created()); - take_gil(tstate); + assert(PyEval_ThreadsInitialized()); + + bfs_schedule(tstate); if (PyThreadState_Swap(tstate) != NULL) Py_FatalError( "PyEval_AcquireThread: non-NULL old thread state"); @@ -341,7 +339,10 @@ Py_FatalError("PyEval_ReleaseThread: NULL thread state"); if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("PyEval_ReleaseThread: wrong thread state"); - drop_gil(tstate); + + /* Call yield with NULL since tstate not guaranteed to be valid pointer: + * http://docs.python.org/py3k/c-api/init.html?highlight=gil#PyEval_ReleaseThread */ + bfs_yield(NULL); } /* This function is called from PyOS_AfterFork to ensure that newly @@ -355,15 +356,10 @@ PyObject *threading, *result; PyThreadState *tstate = PyThreadState_GET(); - if (!gil_created()) - return; - /*XXX Can't use PyThread_free_lock here because it does too - much error-checking. Doing this cleanly would require - adding a new function to each thread_*.h. Instead, just - create a new lock and waste a little bit of memory */ - recreate_gil(); + if (!PyEval_ThreadsInitialized()) + return; + pending_lock = PyThread_allocate_lock(); - take_gil(tstate); main_thread = PyThread_get_thread_ident(); /* Update the threading module with the new state. @@ -386,8 +382,8 @@ #else static _Py_atomic_int eval_breaker = {0}; -static _Py_atomic_int gil_drop_request = {0}; static int pending_async_exc = 0; + #endif /* WITH_THREAD */ /* This function is used to signal that async exceptions are waiting to be @@ -410,8 +406,9 @@ if (tstate == NULL) Py_FatalError("PyEval_SaveThread: NULL tstate"); #ifdef WITH_THREAD - if (gil_created()) - drop_gil(tstate); + if (PyEval_ThreadsInitialized()) { + bfs_yield(tstate); + } #endif return tstate; } @@ -422,9 +419,9 @@ if (tstate == NULL) Py_FatalError("PyEval_RestoreThread: NULL tstate"); #ifdef WITH_THREAD - if (gil_created()) { + if (PyEval_ThreadsInitialized()) { int err = errno; - take_gil(tstate); + bfs_schedule(tstate); errno = err; } #endif @@ -868,7 +865,10 @@ #define DISPATCH() \ { \ - if (!_Py_atomic_load_relaxed(&eval_breaker)) { \ + if (bfs_threads_waiting) { \ + bfs_ticks--; \ + } \ + if (!_Py_atomic_load_relaxed(&eval_breaker) && bfs_ticks) { \ FAST_DISPATCH(); \ } \ continue; \ @@ -1242,7 +1242,15 @@ async I/O handler); see Py_AddPendingCall() and Py_MakePendingCalls() above. */ - if (_Py_atomic_load_relaxed(&eval_breaker)) { +#ifndef USE_COMPUTED_GOTOS + /* gcc will segfault on an attempt to increment bfs_ticks without + * the if clause. */ + if (bfs_threads_waiting) + bfs_ticks--; +#endif + + if (_Py_atomic_load_relaxed(&eval_breaker) || !bfs_ticks) { + bfs_ticks = SCHEDULER_INTERVAL_TICKS; if (*next_instr == SETUP_FINALLY) { /* Make the last opcode before a try: finally: block uninterruptable. */ @@ -1258,20 +1266,16 @@ goto on_error; } } - if (_Py_atomic_load_relaxed(&gil_drop_request)) { #ifdef WITH_THREAD /* Give another thread a chance */ if (PyThreadState_Swap(NULL) != tstate) Py_FatalError("ceval: tstate mix-up"); - drop_gil(tstate); - - /* Other threads may run now */ - - take_gil(tstate); + + bfs_checkpoint(tstate); + if (PyThreadState_Swap(tstate) != NULL) Py_FatalError("ceval: orphan tstate"); #endif - } /* Check for asynchronous exceptions. */ if (tstate->async_exc != NULL) { x = tstate->async_exc; diff --git a/Python/pystate.c b/Python/pystate.c --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,6 +2,7 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" +#include "mutexed.h" /* -------------------------------------------------------------------------- CAUTION @@ -24,6 +25,7 @@ #ifdef WITH_THREAD +#include "bfs.h" #include "pythread.h" static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */ #define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock())) @@ -200,6 +202,13 @@ tstate->c_profileobj = NULL; tstate->c_traceobj = NULL; + COND_INIT(tstate->bfs_cond); + tstate->bfs_list.next = NULL; + tstate->bfs_list.prev = NULL; + tstate->bfs_slice = 0; + tstate->bfs_deadline = 0; + tstate->bfs_timestamp = 0; + if (init) _PyThreadState_Init(tstate); @@ -291,6 +300,10 @@ tstate->c_tracefunc = NULL; Py_CLEAR(tstate->c_profileobj); Py_CLEAR(tstate->c_traceobj); + +#ifdef WITH_THREAD + bfs_clear(tstate); +#endif } @@ -347,6 +360,7 @@ #ifdef WITH_THREAD + void PyThreadState_DeleteCurrent() { diff --git a/Python/sysmodule.c b/Python/sysmodule.c --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -490,7 +490,7 @@ static PyObject * sys_setswitchinterval(PyObject *self, PyObject *args) { - double d; + /*double d; if (!PyArg_ParseTuple(args, "d:setswitchinterval", &d)) return NULL; if (d <= 0.0) { @@ -498,7 +498,7 @@ "switch interval must be strictly positive"); return NULL; } - _PyEval_SetSwitchInterval((unsigned long) (1e6 * d)); + _PyEval_SetSwitchInterval((unsigned long) (1e6 * d));*/ Py_INCREF(Py_None); return Py_None; } @@ -518,7 +518,7 @@ static PyObject * sys_getswitchinterval(PyObject *self, PyObject *args) { - return PyFloat_FromDouble(1e-6 * _PyEval_GetSwitchInterval()); + return PyFloat_FromDouble(1000.0);//1e-6 * _PyEval_GetSwitchInterval()); } PyDoc_STRVAR(getswitchinterval_doc,