diff -r 6c5f9c6c25ea Include/ceval.h
--- a/Include/ceval.h Wed Sep 14 18:17:32 2016 +0300
+++ b/Include/ceval.h Wed Sep 14 16:38:08 2016 -0400
@@ -219,6 +219,11 @@
PyAPI_FUNC(void) _PyEval_SignalAsyncExc(void);
#endif
+#ifndef Py_LIMITED_API
+PyAPI_FUNC(void) _PyEval_FreeOpcodeCache(void *);
+PyAPI_FUNC(void) _PyEval_Fini(void);
+#endif
+
/* Masks and values used by FORMAT_VALUE opcode. */
#define FVC_MASK 0x3
#define FVC_NONE 0x0
diff -r 6c5f9c6c25ea Include/code.h
--- a/Include/code.h Wed Sep 14 18:17:32 2016 +0300
+++ b/Include/code.h Wed Sep 14 16:38:08 2016 -0400
@@ -26,6 +26,7 @@
int co_stacksize; /* #entries needed for evaluation stack */
int co_flags; /* CO_..., see below */
int co_firstlineno; /* first source line number */
+ int co_opt; /* Used by ceval opcode cache */
PyObject *co_code; /* instruction opcodes */
PyObject *co_consts; /* list (constants used) */
PyObject *co_names; /* list of strings (names used) */
diff -r 6c5f9c6c25ea Include/pylifecycle.h
--- a/Include/pylifecycle.h Wed Sep 14 18:17:32 2016 +0300
+++ b/Include/pylifecycle.h Wed Sep 14 16:38:08 2016 -0400
@@ -108,6 +108,7 @@
PyAPI_FUNC(void) _PyType_Fini(void);
PyAPI_FUNC(void) _PyRandom_Fini(void);
PyAPI_FUNC(void) PyAsyncGen_Fini(void);
+PyAPI_FUNC(void) _PyEval_Fini(void);
PyAPI_DATA(PyThreadState *) _Py_Finalizing;
#endif
diff -r 6c5f9c6c25ea Makefile.pre.in
--- a/Makefile.pre.in Wed Sep 14 18:17:32 2016 +0300
+++ b/Makefile.pre.in Wed Sep 14 16:38:08 2016 -0400
@@ -863,7 +863,7 @@
$(OPCODETARGETS_H): $(OPCODETARGETGEN_FILES)
$(PYTHON_FOR_GEN) $(OPCODETARGETGEN) $(OPCODETARGETS_H)
-Python/ceval.o: $(OPCODETARGETS_H) $(srcdir)/Python/ceval_gil.h
+Python/ceval.o: $(OPCODETARGETS_H) $(srcdir)/Python/ceval_gil.h $(srcdir)/Python/ceval_cache.h
Python/frozen.o: Python/importlib.h Python/importlib_external.h
diff -r 6c5f9c6c25ea Objects/codeobject.c
--- a/Objects/codeobject.c Wed Sep 14 18:17:32 2016 +0300
+++ b/Objects/codeobject.c Wed Sep 14 16:38:08 2016 -0400
@@ -161,6 +161,7 @@
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
co->co_extra = NULL;
+ co->co_opt = 0;
return co;
}
diff -r 6c5f9c6c25ea PCbuild/pythoncore.vcxproj
--- a/PCbuild/pythoncore.vcxproj Wed Sep 14 18:17:32 2016 +0300
+++ b/PCbuild/pythoncore.vcxproj Wed Sep 14 16:38:08 2016 -0400
@@ -207,6 +207,7 @@
+
diff -r 6c5f9c6c25ea Python/ceval.c
--- a/Python/ceval.c Wed Sep 14 18:17:32 2016 +0300
+++ b/Python/ceval.c Wed Sep 14 16:38:08 2016 -0400
@@ -1,4 +1,3 @@
-
/* Execute compiled code */
/* XXX TO DO:
@@ -215,7 +214,19 @@
Guarded by the GIL. */
static int pending_async_exc = 0;
+
+/* Code access macros */
+#ifdef WORDS_BIGENDIAN
+ #define OPCODE(word) ((word) >> 8)
+ #define OPARG(word) ((word) & 255)
+#else
+ #define OPCODE(word) ((word) & 255)
+ #define OPARG(word) ((word) >> 8)
+#endif
+
#include "ceval_gil.h"
+#include "ceval_cache.h"
+
int
PyEval_ThreadsInitialized(void)
@@ -331,6 +342,14 @@
static int pending_async_exc = 0;
#endif /* WITH_THREAD */
+void
+_PyEval_Fini(void)
+{
+#if OPCACHE_COLLECT_STATS
+ opcode_cache_print_stats();
+#endif
+}
+
/* This function is used to signal that async exceptions are waiting to be
raised, therefore it is also useful in non-threaded builds. */
@@ -746,6 +765,7 @@
const _Py_CODEUNIT *first_instr;
PyObject *names;
PyObject *consts;
+ _PyCodeObjectCache *cache = NULL;
#ifdef LLTRACE
_Py_IDENTIFIER(__ltrace__);
@@ -866,6 +886,8 @@
/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
+#define OPCACHE_OFFSET() ((int)(next_instr - first_instr) - 1)
+
#define NEXTOPARG() do { \
_Py_CODEUNIT word = *next_instr; \
opcode = _Py_OPCODE(word); \
@@ -1078,6 +1100,23 @@
f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */
f->f_executing = 1;
+ if (co->co_opt < OPCACHE_CALLS_THRESHOLD) {
+ co->co_opt++;
+ if (co->co_opt == OPCACHE_CALLS_THRESHOLD) {
+ if (init_opcode_cache(co)) {
+ goto exit_eval_frame;
+ }
+ if (_PyCode_GetExtra((PyObject *)co, 0, (void **)&cache)) {
+ goto exit_eval_frame;
+ }
+ }
+ }
+ else {
+ if (_PyCode_GetExtra((PyObject *)co, 0, (void **)&cache)) {
+ goto exit_eval_frame;
+ }
+ }
+
if (co->co_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) {
if (!throwflag && f->f_exc_type != NULL && f->f_exc_type != Py_None) {
/* We were in an except handler when we left,
@@ -2344,6 +2383,25 @@
if (PyDict_CheckExact(f->f_globals)
&& PyDict_CheckExact(f->f_builtins))
{
+ _PyCodeObjectCache_LOAD_GLOBAL *lg_cache;
+ lg_cache = OPCACHE_GET_LOAD_GLOBAL(cache, OPCACHE_OFFSET());
+ if (lg_cache && lg_cache->optimized) {
+ if (lg_cache->globals_tag ==
+ ((PyDictObject *)f->f_globals)->ma_version_tag &&
+ cache->builtins_tag ==
+ ((PyDictObject *)f->f_builtins)->ma_version_tag)
+
+ {
+ PyObject *res = lg_cache->ptr;
+ OPCACHE_STATS_HIT(LOAD_GLOBAL);
+ Py_INCREF(res);
+ PUSH(res);
+ DISPATCH();
+ } else {
+ OPCACHE_STATS_MISS(LOAD_GLOBAL);
+ }
+ }
+
v = _PyDict_LoadGlobal((PyDictObject *)f->f_globals,
(PyDictObject *)f->f_builtins,
name);
@@ -2357,6 +2415,15 @@
goto error;
}
Py_INCREF(v);
+
+ if (!OPCACHE_UPDATE_LOAD_GLOBAL(lg_cache)) {
+ lg_cache->globals_tag =
+ ((PyDictObject *)f->f_globals)->ma_version_tag;
+ cache->builtins_tag =
+ ((PyDictObject *)f->f_builtins)->ma_version_tag;
+ lg_cache->ptr = v;
+ }
+
}
else {
/* Slow-path if globals or builtins is not a dict */
@@ -2789,7 +2856,8 @@
TARGET(LOAD_ATTR) {
PyObject *name = GETITEM(names, oparg);
PyObject *owner = TOP();
- PyObject *res = PyObject_GetAttr(owner, name);
+ PyObject *res;
+ res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
SET_TOP(res);
if (res == NULL)
diff -r 6c5f9c6c25ea Python/ceval_cache.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Python/ceval_cache.h Wed Sep 14 16:38:08 2016 -0400
@@ -0,0 +1,263 @@
+#include
+#include
+
+
+/* WARNING: This file is full of magic. */
+
+
+#define OPCACHE_OPCODES(XX) \
+ XX(LOAD_GLOBAL)
+
+
+#define OPCACHE_COLLECT_STATS 0
+#define OPCACHE_CALLS_THRESHOLD 1000
+#define OPCACHE_MISSES_BEFORE_DEOPT 20
+
+
+#define OPCACHE_OPCODE_HEAD \
+ int8_t optimized; /* < 0 - deoptimized; \
+ = 0 - not yet optimized; \
+ > 0 - optimized */
+
+
+typedef struct {
+ OPCACHE_OPCODE_HEAD
+ uint64_t globals_tag;
+ PyObject *ptr;
+} _PyCodeObjectCache_LOAD_GLOBAL;
+
+
+/*
+- How to implement cache for a new opcode?
+
+Let's say we want to add cache to MY_OPCODE opcode:
+
+1. Define a `_PyCodeObjectCache_MY_OPCODE` struct.
+2. Add `XX(MY_OPCODE);` to OPCACHE_OPCODES macro.
+3. Everything else will be handled automatically.
+*/
+
+
+#define _OPCACHE_OPCODE_FIELD(OPCODE) \
+ uint8_t OPCODE##_size; \
+ _PyCodeObjectCache_##OPCODE *OPCODE##_cache;
+
+
+typedef struct {
+ uint8_t *index;
+ uint64_t builtins_tag;
+
+ OPCACHE_OPCODES(_OPCACHE_OPCODE_FIELD)
+} _PyCodeObjectCache;
+
+
+#if OPCACHE_COLLECT_STATS
+
+static uint64_t opcode_stats_opts[255];
+static uint64_t opcode_stats_deopts[255];
+static uint64_t opcode_stats_hits[255];
+static uint64_t opcode_stats_misses[255];
+static uint64_t opcode_stats_memory = 0;
+
+#endif
+
+
+static int
+init_opcode_cache(PyCodeObject *co)
+{
+ const uint16_t *instr;
+ Py_ssize_t opcodes_num = PyBytes_Size(co->co_code) / 2;
+ uint8_t *index = NULL;
+ _PyCodeObjectCache *cache;
+
+ cache = (_PyCodeObjectCache*)PyMem_Malloc(sizeof(_PyCodeObjectCache));
+ if (cache == NULL) {
+ goto error;
+ }
+
+# define _OPCODE_PREPARE(OPCODE) \
+ uint8_t OPCODE##_size = 0; \
+ cache->OPCODE##_size = 0; \
+ cache->OPCODE##_cache = NULL;
+
+ OPCACHE_OPCODES(_OPCODE_PREPARE)
+# undef _OPCODE_PREPARE
+
+ index = (uint8_t *)PyMem_Calloc(opcodes_num, sizeof(uint8_t));
+ if (index == NULL) {
+ goto error;
+ }
+
+ instr = (uint16_t*) PyBytes_AS_STRING(co->co_code);
+ for (Py_ssize_t offset = 0; offset < opcodes_num; offset++) {
+ uint16_t word = *instr;
+ uint8_t opcode = OPCODE(word);
+ instr++;
+
+# define _OPCODE_COUNT(OPCODE) \
+ if (opcode == OPCODE && OPCODE##_size < 255) { \
+ index[offset] = OPCODE##_size++; \
+ }
+ OPCACHE_OPCODES(_OPCODE_COUNT)
+# undef _OPCODE_COUNT
+ }
+
+# define _OPCODE_INIT(OPCODE) \
+ if (OPCODE##_size) { \
+ cache->OPCODE##_cache = (_PyCodeObjectCache_##OPCODE*) PyMem_Calloc( \
+ OPCODE##_size, sizeof(_PyCodeObjectCache_##OPCODE)); \
+ if (cache->OPCODE##_cache == NULL) { \
+ goto error; \
+ } \
+ cache->OPCODE##_size = OPCODE##_size; \
+ }
+ OPCACHE_OPCODES(_OPCODE_INIT)
+# undef _OPCODE_INIT
+
+ cache->index = index;
+ if (_PyCode_SetExtra((PyObject *)co, 0, cache)) {
+ goto error;
+ }
+
+# if OPCACHE_COLLECT_STATS
+ opcode_stats_memory += sizeof(_PyCodeObjectCache);
+# define _OPCODE_SIZE(OPCODE) \
+ if (OPCODE##_size) { \
+ opcode_stats_memory += OPCODE##_size * \
+ sizeof(_PyCodeObjectCache_##OPCODE); \
+ }
+ OPCACHE_OPCODES(_OPCODE_SIZE)
+# undef _OPCODE_SIZE
+# endif
+
+ return 0;
+
+error:
+ PyMem_Free(index);
+
+ /* Cleanup opcode structs */
+# define _OPCODE_CLEANUP(OPCODE) PyMem_Free(cache->OPCODE##_cache); \
+ OPCACHE_OPCODES(_OPCODE_CLEANUP)
+# undef _OPCODE_CLEANUP
+
+ PyMem_Free(cache);
+
+ return -1;
+}
+
+
+void
+_PyEval_FreeOpcodeCache(void *co_extra)
+{
+ _PyCodeObjectCache *cache = (_PyCodeObjectCache *)co_extra;
+
+# define _OPCODE_CLEANUP(OPCODE) PyMem_Free(cache->OPCODE##_cache);
+ OPCACHE_OPCODES(_OPCODE_CLEANUP)
+# undef _OPCODE_CLEANUP
+
+ PyMem_Free(cache->index);
+ PyMem_Free(cache);
+}
+
+
+/* --- Stats --- */
+
+
+#if OPCACHE_COLLECT_STATS
+
+#define _OPCACHE_STATS_OPT(opcode) do { \
+ opcode_stats_opts[opcode]++; \
+ } while (0);
+
+#define _OPCACHE_STATS_DEOPT(opcode) do { \
+ opcode_stats_deopts[opcode]++; \
+ } while (0);
+
+#define OPCACHE_STATS_HIT(opcode) do { \
+ opcode_stats_hits[opcode]++; \
+ } while (0);
+
+#define OPCACHE_STATS_MISS(opcode) do { \
+ opcode_stats_misses[opcode]++; \
+ } while (0);
+
+
+static void
+opcode_cache_print_stats(void)
+{
+ printf("=== OPCODE CACHE === \n");
+ printf("memory: %" PRIu64 "\n", opcode_stats_memory);
+
+# define _OPCODE_PRINT_STAT(OPCODE) \
+ printf("--- " #OPCODE " ---\n"); \
+ printf("opts: %" PRIu64 "\n", opcode_stats_opts[OPCODE]); \
+ printf("deopts: %" PRIu64 "\n", opcode_stats_deopts[OPCODE]); \
+ printf("hits: %" PRIu64 "\n", opcode_stats_hits[OPCODE]); \
+ printf("misses: %" PRIu64 "\n\n", opcode_stats_misses[OPCODE]);
+
+ OPCACHE_OPCODES(_OPCODE_PRINT_STAT)
+# undef _OPCODE_PRINT_STAT
+}
+
+
+#else
+
+#define _OPCACHE_STATS_OPT(opcode)
+#define _OPCACHE_STATS_DEOPT(opcode)
+
+#define OPCACHE_STATS_HIT(opcode)
+#define OPCACHE_STATS_MISS(opcode)
+
+#endif
+
+
+#define _OPCACHE_DEFINE_GETTER(OPCODE) \
+ static inline _PyCodeObjectCache_##OPCODE * \
+ OPCACHE_GET_##OPCODE(_PyCodeObjectCache *cache, int offset) \
+ { \
+ Py_ssize_t position; \
+ _PyCodeObjectCache_##OPCODE *opcache; \
+ if (cache == NULL) { \
+ return NULL; \
+ } \
+ position = cache->index[offset]; \
+ assert(cache->OPCODE##_size > position); \
+ opcache = &cache->OPCODE##_cache[position]; \
+ return opcache->optimized >= 0 ? opcache : NULL; \
+ }
+OPCACHE_OPCODES(_OPCACHE_DEFINE_GETTER)
+#undef _OPCACHE_DEFINE_GETTER
+
+
+#define _OPCACHE_DEFINE_MAYBE_DEOPT(OPCODE) \
+ static inline void \
+ OPCACHE_MAYBE_DEOPT_##OPCODE( \
+ _PyCodeObjectCache_##OPCODE *opcache) \
+ { \
+ if (opcache->optimized >= 0) { \
+ opcache->optimized--; \
+ if (opcache->optimized == 0) { \
+ opcache->optimized = -1; \
+ _OPCACHE_STATS_DEOPT(OPCODE); \
+ } \
+ } \
+ }
+OPCACHE_OPCODES(_OPCACHE_DEFINE_MAYBE_DEOPT)
+#undef _OPCACHE_DEFINE_MAYBE_DEOPT
+
+
+#define _OPCACHE_DEFINE_UPDATER(OPCODE) \
+ static inline int \
+ OPCACHE_UPDATE_##OPCODE(_PyCodeObjectCache_##OPCODE *opcache) \
+ { \
+ if (opcache == NULL) return -1; \
+ if (opcache->optimized == 0) { /* first time */ \
+ opcache->optimized = OPCACHE_MISSES_BEFORE_DEOPT; \
+ _OPCACHE_STATS_OPT(OPCODE); \
+ } else { \
+ OPCACHE_MAYBE_DEOPT_##OPCODE(opcache); \
+ } \
+ return 0; \
+ }
+OPCACHE_OPCODES(_OPCACHE_DEFINE_UPDATER)
+#undef _OPCACHE_DEFINE_UPDATER
diff -r 6c5f9c6c25ea Python/pylifecycle.c
--- a/Python/pylifecycle.c Wed Sep 14 18:17:32 2016 +0300
+++ b/Python/pylifecycle.c Wed Sep 14 16:38:08 2016 -0400
@@ -739,6 +739,8 @@
}
#endif
+ _PyEval_Fini();
+
call_ll_exitfuncs();
return status;
}
diff -r 6c5f9c6c25ea Python/pystate.c
--- a/Python/pystate.c Wed Sep 14 18:17:32 2016 +0300
+++ b/Python/pystate.c Wed Sep 14 16:38:08 2016 -0400
@@ -224,7 +224,10 @@
tstate->coroutine_wrapper = NULL;
tstate->in_coroutine_wrapper = 0;
- tstate->co_extra_user_count = 0;
+
+ /* Index 1 is reserved for ceval opcode cache */
+ tstate->co_extra_user_count = 1;
+ tstate->co_extra_freefuncs[0] = _PyEval_FreeOpcodeCache;
tstate->async_gen_firstiter = NULL;
tstate->async_gen_finalizer = NULL;