# HG changeset patch # Parent 1c80a7bcbd6bf84ed17a031772cb8dc5cddaae18 [mq]: functools.lru_cache-in-c diff --git a/Lib/functools.py b/Lib/functools.py --- a/Lib/functools.py +++ b/Lib/functools.py @@ -11,7 +11,7 @@ __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES', 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial'] -from _functools import partial, reduce +from _functools import partial, reduce, c_lru_cache from collections import namedtuple try: from _thread import allocate_lock as Lock @@ -162,113 +162,8 @@ """ - # Users should only access the lru_cache through its public API: - # cache_info, cache_clear, and f.__wrapped__ - # The internals of the lru_cache are encapsulated for thread safety and - # to allow the implementation to change (including a possible C version). - def decorating_function(user_function): - - cache = {} - hits = misses = 0 - kwd_mark = (object(),) # separate positional and keyword args - cache_get = cache.get # bound method to lookup a key or return None - sentinel = object() # unique object used with cache_get - _len = len # localize the global len() function - lock = Lock() # because linkedlist updates aren't threadsafe - root = [] # root of the circular doubly linked list - root[:] = [root, root, None, None] # initialize by pointing to self - PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields - - def make_key(args, kwds, typed, tuple=tuple, sorted=sorted, type=type): - # build a cache key from positional and keyword args - key = args - if kwds: - sorted_items = tuple(sorted(kwds.items())) - key += kwd_mark + sorted_items - if typed: - key += tuple(type(v) for v in args) - if kwds: - key += tuple(type(v) for k, v in sorted_items) - return key - - if maxsize == 0: - - def wrapper(*args, **kwds): - # no caching, just a statistics update after a successful call - nonlocal misses - result = user_function(*args, **kwds) - misses += 1 - return result - - elif maxsize is None: - - def wrapper(*args, **kwds): - # simple caching without ordering or size limit - nonlocal hits, misses - key = make_key(args, kwds, typed) if kwds or typed else args - result = cache_get(key, sentinel) - if result is not sentinel: - hits += 1 - return result - result = user_function(*args, **kwds) - cache[key] = result - misses += 1 - return result - - else: - - def wrapper(*args, **kwds): - # size limited caching that tracks accesses by recency - nonlocal root, hits, misses - key = make_key(args, kwds, typed) if kwds or typed else args - with lock: - link = cache_get(key) - if link is not None: - # move the link to the front of the circular queue - link_prev, link_next, key, result = link - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - last = root[PREV] - last[NEXT] = root[PREV] = link - link[PREV] = last - link[NEXT] = root - hits += 1 - return result - result = user_function(*args, **kwds) - with lock: - if _len(cache) < maxsize: - # put result in a new link at the front of the queue - last = root[PREV] - link = [last, root, key, result] - cache[key] = last[NEXT] = root[PREV] = link - else: - # use root to store the new key and result - root[KEY] = key - root[RESULT] = result - cache[key] = root - # empty the oldest link and make it the new root - root = root[NEXT] - del cache[root[KEY]] - root[KEY] = root[RESULT] = None - misses += 1 - return result - - def cache_info(): - """Report cache statistics""" - with lock: - return _CacheInfo(hits, misses, maxsize, len(cache)) - - def cache_clear(): - """Clear the cache and cache statistics""" - nonlocal hits, misses - with lock: - cache.clear() - root[:] = [root, root, None, None] - hits = misses = 0 - - wrapper.cache_info = cache_info - wrapper.cache_clear = cache_clear + wrapper = c_lru_cache(user_function, maxsize, typed, _CacheInfo) return update_wrapper(wrapper, user_function) return decorating_function diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -623,6 +623,7 @@ def test_lru(self): def orig(x, y): + '''does a thingy''' return 3*x+y f = functools.lru_cache(maxsize=20)(orig) hits, misses, maxsize, currsize = f.cache_info() @@ -756,6 +757,15 @@ self.assertEqual(square.cache_info().hits, 4) self.assertEqual(square.cache_info().misses, 4) + def test_lru_cache_decoration(self): + def f(zomg: 'zomg_annotation'): + '''f doc string''' + return 42 + g = functools.lru_cache()(f) + for attr in functools.WRAPPER_ASSIGNMENTS: + self.assertEqual(getattr(g, attr), getattr(f, attr)) + + def test_main(verbose=None): test_classes = ( TestPartial, diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -540,6 +540,414 @@ of the sequence in the calculation, and serves as a default when the\n\ sequence is empty."); +/* lru_cache object **********************************************************/ + +/* this object is used delimit args and keywords in the cache keys */ +static PyObject *kwd_mark; + +typedef struct lru_list_elem lru_list_elem; + +typedef struct lru_list_elem { + lru_list_elem *prev, *next; + PyObject *key, *result; +} lru_list_elem; + +typedef struct lru_cache_object lru_cache_object; + +typedef PyObject *(*lru_cache_ternaryfunc)(lru_cache_object *, PyObject *, PyObject *); + +typedef struct lru_cache_object { + PyObject_HEAD + Py_ssize_t maxsize; + PyObject *maxsize_O; + PyObject *func; + lru_cache_ternaryfunc wrapper; + PyObject *cache; + PyObject *cache_info_type; + Py_ssize_t misses, hits; + lru_list_elem root; + int typed; + PyObject *dict; +} lru_cache_object; + +static PyTypeObject lru_cache_type; + +static PyObject * +lru_cache_make_key(PyObject *args, PyObject *kwds, int typed) +{ + PyObject *key, *sorted_items; + Py_ssize_t key_size, pos, key_pos; + + /* short path, key will match args anyway, which is a tuple */ + if (!typed && !kwds) { + Py_INCREF(args); + return args; + } + + if (kwds) { + assert(PyDict_Size(kwds)); + if (!(sorted_items = PyDict_Items(kwds))) + return NULL; + if (0 > PyList_Sort(sorted_items)) { + Py_DECREF(sorted_items); + return NULL; + } + } else + sorted_items = NULL; + + key_size = PyTuple_GET_SIZE(args); + if (kwds) + key_size += PyList_GET_SIZE(sorted_items); + if (typed) + key_size *= 2; + if (kwds) + key_size++; + + key = PyTuple_New(key_size); + key_pos = 0; + + for (pos = 0; pos < PyTuple_GET_SIZE(args); ++pos) { + PyObject *item = PyTuple_GET_ITEM(args, pos); + Py_INCREF(item); + PyTuple_SET_ITEM(key, key_pos++, item); + } + if (kwds) { + Py_INCREF(kwd_mark); + PyTuple_SET_ITEM(key, key_pos++, kwd_mark); + for (pos = 0; pos < PyList_GET_SIZE(sorted_items); ++pos) { + PyObject *item = PyList_GET_ITEM(sorted_items, pos); + Py_INCREF(item); + PyTuple_SET_ITEM(key, key_pos++, item); + } + } + if (typed) { + for (pos = 0; pos < PyTuple_GET_SIZE(args); ++pos) { + PyObject *item = (PyObject *)Py_TYPE(PyTuple_GET_ITEM(args, pos)); + Py_INCREF(item); + PyTuple_SET_ITEM(key, key_pos++, item); + } + if (kwds) { + for (pos = 0; pos < PyList_GET_SIZE(sorted_items); ++pos) { + PyObject *item = (PyObject *)Py_TYPE(PyTuple_GET_ITEM(PyList_GET_ITEM(sorted_items, pos), 1)); + Py_INCREF(item); + PyTuple_SET_ITEM(key, key_pos++, item); + } + } + } + assert(key_pos == key_size); + + if (kwds) + Py_DECREF(sorted_items); + return key; +} + +static PyObject * +uncached_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds) +{ + PyObject *result = PyObject_Call(self->func, args, kwds); + if (!result) + return NULL; + self->misses++; + return result; +} + +static PyObject * +infinite_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds) +{ + PyObject *result; + PyObject *key = lru_cache_make_key(args, kwds, self->typed); + if (!key) + return NULL; + result = PyDict_GetItemWithError(self->cache, key); + if (result) { + Py_INCREF(result); + self->hits++; + Py_DECREF(key); + return result; + } + if (PyErr_Occurred()) { + Py_DECREF(key); + return NULL; + } + result = PyObject_Call(self->func, args, kwds); + if (!result) { + Py_DECREF(key); + return NULL; + } + if (PyDict_SetItem(self->cache, key, result) < 0) { + Py_DECREF(result); + Py_DECREF(key); + return NULL; + } + Py_DECREF(key); + self->misses++; + return result; +} + +static void +lru_cache_list_extricate(lru_list_elem *link) +{ + link->prev->next = link->next; + link->next->prev = link->prev; +} + +static void +lru_cache_list_append(lru_list_elem *root, lru_list_elem *link) +{ + lru_list_elem *last = root->prev; + last->next = root->prev = link; + link->prev = last; + link->next = root; +} + +static PyObject * +bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds) +{ + PyObject *key = lru_cache_make_key(args, kwds, self->typed); + if (!key) + return NULL; + PyObject *value = PyDict_GetItemWithError(self->cache, key); + if (value) { + lru_list_elem *link = PyCapsule_GetPointer(value, NULL); + lru_cache_list_extricate(link); + lru_cache_list_append(&self->root, link); + self->hits++; + Py_DECREF(key); + Py_INCREF(link->result); + return link->result; + } + if (PyErr_Occurred()) { + Py_DECREF(key); + return NULL; + } + PyObject *result = PyObject_Call(self->func, args, kwds); + if (!result) { + Py_DECREF(key); + return NULL; + } + lru_list_elem *link; + if (PyDict_Size(self->cache) == self->maxsize) { + /* extricate the oldest item */ + link = self->root.next; + lru_cache_list_extricate(link); + /* grab its capsule */ + value = PyDict_GetItem(self->cache, link->key); + Py_INCREF(value); + /* remove its key from the cache */ + if (0 > PyDict_DelItem(self->cache, link->key)) + abort(); + /* scrub the result from the link */ + Py_DECREF(link->result); + } else { + link = PyMem_New(lru_list_elem, 1); + value = PyCapsule_New(link, NULL, NULL); + } + lru_cache_list_append(&self->root, link); + link->key = key; + link->result = result; + Py_INCREF(result); + if (0 > PyDict_SetItem(self->cache, key, value)) abort(); + Py_DECREF(key); + Py_DECREF(value); + self->misses++; + return result; +} + +static PyObject * +lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw) +{ + PyObject *func, *maxsize_O, *typed_O, *cache_info_type; + int typed; + lru_cache_object *obj; + Py_ssize_t maxsize; + PyObject *(*wrapper)(lru_cache_object *, PyObject *, PyObject *); + static char *keywords[] = {"user_function", "maxsize", "typed", + "cache_info_type", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kw, "OOOO:lru_cache", keywords, + &func, &maxsize_O, &typed_O, + &cache_info_type)) { + return NULL; + } + + if (!PyCallable_Check(func)) { + PyErr_SetString(PyExc_TypeError, + "the first argument must be callable"); + return NULL; + } + + // select the caching function, and make/inc maxsize_O + if (maxsize_O == Py_None) { + wrapper = infinite_lru_cache_wrapper; + Py_INCREF(maxsize_O); + } else if (PyNumber_Check(maxsize_O)) { + maxsize = PyNumber_AsSsize_t(maxsize_O, PyExc_OverflowError); + if (maxsize == -1 && PyErr_Occurred()) + return NULL; + if (maxsize == 0) + wrapper = uncached_lru_cache_wrapper; + else + wrapper = bounded_lru_cache_wrapper; + Py_INCREF(maxsize_O); + } else { + PyErr_SetString(PyExc_TypeError, "maxsize should be integer or None"); + return NULL; + } + + if (typed_O) { + int err = PyObject_IsTrue(typed_O); + if (err < 0) { + Py_DECREF(maxsize_O); + return NULL; + } + typed = err; + } else + typed = 0; + + obj = (lru_cache_object *)type->tp_alloc(type, 0); + if (obj == NULL) { + Py_DECREF(maxsize_O); + return NULL; + } + + obj->root.prev = &obj->root; + obj->root.next = &obj->root; + obj->maxsize = maxsize; + obj->maxsize_O = maxsize_O; + if (!(obj->cache = PyDict_New())) { + Py_DECREF(obj); + Py_DECREF(maxsize_O); + return NULL; + } + obj->func = func; + Py_INCREF(func); + obj->wrapper = wrapper; + obj->misses = obj->hits = 0; + obj->typed = typed; + obj->cache_info_type = cache_info_type; + Py_INCREF(cache_info_type); + + return (PyObject *)obj; +} + +static void +lru_cache_clear_list(lru_list_elem *root) +{ + lru_list_elem *link = root->next; + while (link != root) { + lru_list_elem *next = link->next; + Py_DECREF(link->result); + PyMem_Free(link); + link = next; + } +} + +static void +lru_cache_dealloc(lru_cache_object *obj) +{ + Py_XDECREF(obj->maxsize_O); + Py_XDECREF(obj->func); + Py_XDECREF(obj->cache); + Py_XDECREF(obj->dict); + Py_XDECREF(obj->cache_info_type); + lru_cache_clear_list(&obj->root); + Py_TYPE(obj)->tp_free(obj); +} + +static PyObject * +lru_cache_call(lru_cache_object *self, PyObject *args, PyObject *kwds) +{ + return self->wrapper(self, args, kwds); +} + +static PyObject * +lru_cache_cache_info(lru_cache_object *self, PyObject *unused) +{ + return PyObject_CallFunction(self->cache_info_type, "nnOn", + self->hits, self->misses, self->maxsize_O, + PyDict_Size(self->cache)); +} + +static PyObject * +lru_cache_cache_clear(lru_cache_object *self, PyObject *unused) +{ + do PyDict_Clear(self->cache); while (PyDict_Size(self->cache)); + self->hits = self->misses = 0; + lru_cache_clear_list(&self->root); + self->root.next = self->root.prev = &self->root; + Py_RETURN_NONE; +} + +PyDoc_STRVAR(lru_cache_doc, +"Create a cached callable that wraps another function.\n\ +\n\ +user_function: the function being cached\n\ +\n\ +maxsize: 0 for no caching\n\ + None for unlimited cache size\n\ + n for a bounded cache\n\ +\n\ +typed: False cache f(3) and f(3.0) as identical calls\n\ + True cache f(3) and f(3.0) as distinct calls\n\ +\n\ +cache_info_type: namedtuple class with the fields:\n\ + hits misses currsize maxsize\n" +); + +static PyMethodDef lru_cache_methods[] = { + {"cache_info", (PyCFunction)lru_cache_cache_info, METH_NOARGS}, + {"cache_clear", (PyCFunction)lru_cache_cache_clear, METH_NOARGS}, + {NULL} +}; + +static PyGetSetDef lru_cache_getsetlist[] = { + {"__dict__", PyObject_GenericGetDict, PyObject_GenericSetDict}, + {NULL} +}; + +static PyTypeObject lru_cache_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "functools.c_lru_cache", /* tp_name */ + sizeof(lru_cache_object), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)lru_cache_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + (ternaryfunc)lru_cache_call, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + /* tp_flags */ + lru_cache_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + lru_cache_methods, /* tp_methods */ + 0, /* tp_members */ + lru_cache_getsetlist, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + offsetof(lru_cache_object, dict), /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + lru_cache_new, /* tp_new */ +}; + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, @@ -552,6 +960,11 @@ {NULL, NULL} /* sentinel */ }; +static void +module_free(void *m) +{ + Py_DECREF(kwd_mark); +} static struct PyModuleDef _functoolsmodule = { PyModuleDef_HEAD_INIT, @@ -562,7 +975,7 @@ NULL, NULL, NULL, - NULL + module_free, }; PyMODINIT_FUNC @@ -573,6 +986,7 @@ char *name; PyTypeObject *typelist[] = { &partial_type, + &lru_cache_type, NULL }; @@ -580,6 +994,11 @@ if (m == NULL) return NULL; + if (!(kwd_mark = PyObject_CallObject((PyObject *)&PyBaseObject_Type, NULL))) { + Py_DECREF(m); + return NULL; + } + for (i=0 ; typelist[i] != NULL ; i++) { if (PyType_Ready(typelist[i]) < 0) { Py_DECREF(m);