Index: Include/dictobject.h =================================================================== --- Include/dictobject.h (revision 59362) +++ Include/dictobject.h (working copy) @@ -111,6 +111,8 @@ PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); +PyAPI_FUNC(int) PyDict_IsStringKeysOnly(PyObject *mp); + /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); Index: Include/object.h =================================================================== --- Include/object.h (revision 59362) +++ Include/object.h (working copy) @@ -391,6 +391,7 @@ PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyType_CachingLookup(PyTypeObject *, PyObject *); /* Generic operations on objects */ PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); Index: Objects/object.c =================================================================== --- Objects/object.c (revision 59362) +++ Objects/object.c (working copy) @@ -1287,31 +1287,7 @@ goto done; } - /* Inline _PyType_Lookup */ - { - Py_ssize_t i, n; - PyObject *mro, *base, *dict; - - /* Look in tp_dict of types in MRO */ - mro = tp->tp_mro; - assert(mro != NULL); - assert(PyTuple_Check(mro)); - n = PyTuple_GET_SIZE(mro); - for (i = 0; i < n; i++) { - base = PyTuple_GET_ITEM(mro, i); - if (PyClass_Check(base)) - dict = ((PyClassObject *)base)->cl_dict; - else { - assert(PyType_Check(base)); - dict = ((PyTypeObject *)base)->tp_dict; - } - assert(dict && PyDict_Check(dict)); - descr = PyDict_GetItem(dict, name); - if (descr != NULL) - break; - } - } - + descr = _PyType_CachingLookup(tp, name); Py_XINCREF(descr); f = NULL; @@ -1410,7 +1386,7 @@ goto done; } - descr = _PyType_Lookup(tp, name); + descr = _PyType_CachingLookup(tp, name); f = NULL; if (descr != NULL && PyType_HasFeature(descr->ob_type, Py_TPFLAGS_HAVE_CLASS)) { Index: Objects/typeobject.c =================================================================== --- Objects/typeobject.c (revision 59362) +++ Objects/typeobject.c (working copy) @@ -5,6 +5,9 @@ #include +static PyObject *PyAttrCache_New(void); +static int type_attrcache_callback(PyTypeObject *type, void *data); + static PyMemberDef type_members[] = { {"__basicsize__", T_INT, offsetof(PyTypeObject,tp_basicsize),READONLY}, {"__itemsize__", T_INT, offsetof(PyTypeObject, tp_itemsize), READONLY}, @@ -188,7 +191,7 @@ int r = 0; PyObject *ob, *temp; PyTypeObject *new_base, *old_base; - PyObject *old_bases, *old_mro; + PyObject *old_bases, *old_mro, *old_cache; if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) { PyErr_Format(PyExc_TypeError, @@ -245,6 +248,7 @@ old_bases = type->tp_bases; old_base = type->tp_base; old_mro = type->tp_mro; + old_cache = type->tp_cache; type->tp_bases = value; type->tp_base = new_base; @@ -252,7 +256,7 @@ if (mro_internal(type) < 0) { goto bail; } - + temp = PyList_New(0); if (!temp) goto bail; @@ -276,6 +280,11 @@ Py_DECREF(temp); + /* Set up a lookup cache */ + type->tp_cache = PyAttrCache_New(); + if (type->tp_cache == NULL) + goto bail; + /* any base that was in __bases__ but now isn't, we need to remove |type| from its tp_subclasses. conversely, any class now in __bases__ that wasn't @@ -305,6 +314,7 @@ Py_DECREF(old_bases); Py_DECREF(old_base); Py_DECREF(old_mro); + Py_DECREF(old_cache); return r; @@ -314,10 +324,14 @@ if (type->tp_mro != old_mro) { Py_DECREF(type->tp_mro); } + if (type->tp_cache != old_cache) { + Py_XDECREF(type->tp_cache); + } type->tp_bases = old_bases; type->tp_base = old_base; type->tp_mro = old_mro; + type->tp_cache = old_cache; return -1; } @@ -2190,7 +2204,7 @@ meta_get = NULL; /* Look for the attribute in the metatype */ - meta_attribute = _PyType_Lookup(metatype, name); + meta_attribute = _PyType_CachingLookup(metatype, name); if (meta_attribute != NULL) { meta_get = Py_Type(meta_attribute)->tp_descr_get; @@ -2208,7 +2222,7 @@ /* No data descriptor found on metatype. Look in tp_dict of this * type and its bases */ - attribute = _PyType_Lookup(type, name); + attribute = _PyType_CachingLookup(type, name); if (attribute != NULL) { /* Implement descriptor functionality, if any */ descrgetfunc local_get = Py_Type(attribute)->tp_descr_get; @@ -2258,10 +2272,9 @@ type->tp_name); return -1; } - /* XXX Example of how I expect this to be used... - if (update_subclasses(type, name, invalidate_cache, NULL) < 0) + /* Let subclasses know this value has changed */ + if (update_subclasses(type, name, type_attrcache_callback, name) < 0) return -1; - */ if (PyObject_GenericSetAttr((PyObject *)type, name, value) < 0) return -1; return update_slot(type, name); @@ -2370,7 +2383,8 @@ It is a dict, so the collector will call its tp_clear. tp_cache: - Not used; if it were, it would be a dict. + This is a PyAttrCacheObject (a specialized dict), so the + the collector will call its tp_clear. tp_bases, tp_base: If these are involved in a cycle, there must be at least @@ -3541,6 +3555,11 @@ inherit_slots(type, (PyTypeObject *)b); } + /* Set up a lookup cache */ + type->tp_cache = PyAttrCache_New(); + if (type->tp_cache == NULL) + goto error; + /* Sanity check for tp_free. */ if (PyType_IS_GC(type) && (type->tp_flags & Py_TPFLAGS_BASETYPE) && (type->tp_free == NULL || type->tp_free == PyObject_Del)) { @@ -6135,3 +6154,507 @@ PyType_GenericNew, /* tp_new */ PyObject_GC_Del, /* tp_free */ }; + + +/* +TODO: + - attrcache_update optimizations + - Are there other places that could use _PyType_CachingLookup? + - Why are list.__init__ and list().__init__ slower with caching? + - Find good values for PyAttrCache_MINSIZE and PERTURB_SHIFT + - Speed option: combine attrcache_lookup and PyAttrCache_GetEntry? +*/ + +/* +Attribute Caching for Fast Lookups + +From here on is most things related to tp_cache. + +Since the only way variables are ever added to a type's dict is via setattr, +we can cache lookups without having a huge footprint in the core. + +This uses a custom dict because a regular one wasn't fast enough. Further, +it caches *missing* entries as well as present ones, which would have been +difficult and possibly cost-prohibitive to support with a regular dict. + +PyAttrCacheObject is just like PyDictObject, except it can have NULL values, +which represent cached missing attributes. Upon resize, these are NOT copied +into the new dict, so it's not possible to fill up the cache using a bunch +of hasattr(cls, ) calls. + +Queries also work differently - there's only a PyAttrCache_GetEntry, which +returns a pointer into the entry table. +*/ + +typedef struct { + long me_hash; + PyObject *me_key; + PyObject *me_value; +} PyAttrCacheEntry; + +typedef struct { + PyObject_HEAD + Py_ssize_t ma_fill; /* # Active + # Missing + # Dummy */ + Py_ssize_t ma_used; /* # Active */ + /* The table contains ma_mask + 1 slots, and that's a power of 2. + We store the mask instead of the size because the mask is more + frequently needed. */ + Py_ssize_t ma_mask; + /* This is never NULL. */ + PyAttrCacheEntry *ma_table; + /* There's no special case for small tables, but since cache sizes + shouldn't normally change a lot, this might be okay */ +} PyAttrCacheObject; + +PyAPI_DATA(PyTypeObject) PyAttrCache_Type; + +#define PyAttrCache_Check(op) (Py_Type(op) == &PyAttrCache_Type) + +/* This may be too small, but the regression tests never needed +anything bigger */ +#define PyAttrCache_MINSIZE 4 +/* This may not be the right value, since it was arrived at through testing +general dicts. These are specialized to strings, specifically attribute +names, so a different value may be in order. Needs testing. */ +#define PERTURB_SHIFT 5 + +static PyObject *dummy = NULL; + +static PyObject * +PyAttrCache_New(void) +{ + PyAttrCacheObject *mp; + + if (dummy == NULL) { + dummy = PyString_FromString(""); + if (dummy == NULL) + return NULL; + } + + mp = PyObject_GC_New(PyAttrCacheObject, &PyAttrCache_Type); + if (mp == NULL) + return NULL; + + mp->ma_fill = mp->ma_used = 0; + mp->ma_mask = PyAttrCache_MINSIZE - 1; + + mp->ma_table = PyMem_NEW(PyAttrCacheEntry, PyAttrCache_MINSIZE); + if (mp->ma_table == NULL) { + _PyObject_GC_TRACK(mp); + Py_DECREF(mp); + return NULL; + } + memset(mp->ma_table, 0, sizeof(PyAttrCacheEntry) * PyAttrCache_MINSIZE); + + _PyObject_GC_TRACK(mp); + return (PyObject *)mp; +} + +/* Shamelessly copied from lookdict_string - see dictobject.c for details +on how lookups work. This cannot return NULL. */ +static PyAttrCacheEntry * +attrcache_lookup(PyAttrCacheObject *mp, PyObject *key, long hash) +{ + register size_t i; + register size_t perturb; + register PyAttrCacheEntry *freeslot; + register size_t mask = (size_t)mp->ma_mask; + PyAttrCacheEntry *ep0 = mp->ma_table; + register PyAttrCacheEntry *ep; + + assert(key != NULL && PyString_CheckExact(key)); + + i = hash & mask; + ep = &ep0[i]; + if (ep->me_key == NULL || ep->me_key == key) + return ep; + if (ep->me_key == dummy) + freeslot = ep; + else { + if (ep->me_hash == hash && _PyString_Eq(ep->me_key, key)) + return ep; + freeslot = NULL; + } + + /* In the loop, me_key == dummy is by far (factor of 100s) the + least likely outcome, so test for that last. */ + for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + if (ep->me_key == NULL) + return freeslot == NULL ? ep : freeslot; + if (ep->me_key == key + || (ep->me_hash == hash + && ep->me_key != dummy + && _PyString_Eq(ep->me_key, key))) + return ep; + if (ep->me_key == dummy && freeslot == NULL) + freeslot = ep; + } + assert(0); /* NOT REACHED */ + return 0; +} + +static void +attrcache_insertclean(PyAttrCacheObject *mp, PyAttrCacheEntry *oldep) +{ + register size_t i; + register size_t perturb; + register size_t mask = (size_t)mp->ma_mask; + register PyAttrCacheEntry *ep0 = mp->ma_table; + register PyAttrCacheEntry *ep; + register long hash = oldep->me_hash; + + i = hash & mask; + ep = &ep0[i]; + for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { + i = (i << 2) + i + perturb + 1; + ep = &ep0[i & mask]; + } + + mp->ma_fill++; + *ep = *oldep; + mp->ma_used++; +} + +static int +attrcache_resize(PyAttrCacheObject *mp, Py_ssize_t minused) +{ + Py_ssize_t newsize; + PyAttrCacheEntry *oldtable, *newtable, *ep; + Py_ssize_t i; + + assert(minused >= 0); + + /* Find the smallest table size > minused. */ + for (newsize = PyAttrCache_MINSIZE; + newsize <= minused && newsize > 0; + newsize <<= 1) + ; + if (newsize <= 0) { + return -1; + } + + /* Get space for a new table. */ + oldtable = mp->ma_table; + assert(oldtable != NULL); + + newtable = PyMem_NEW(PyAttrCacheEntry, newsize); + if (newtable == NULL) { + return -1; + } + + mp->ma_table = newtable; + mp->ma_mask = newsize - 1; + memset(newtable, 0, sizeof(PyAttrCacheEntry) * newsize); + mp->ma_used = 0; + i = mp->ma_fill; + mp->ma_fill = 0; + + /* Copy the data over; this is refcount-neutral for active entries; + dummies and missing attribute entries aren't copied over */ + for (ep = oldtable; i > 0; ep++) { + if (ep->me_key == NULL) + continue; + if (ep->me_key == dummy) { + /* Dummy key - don't keep it */ + Py_DECREF(dummy); + assert(ep->me_value == NULL); + } + else if (ep->me_value == NULL) + /* Missing attribute - don't keep it */ + Py_DECREF(ep->me_key); + else + /* dummy != me_key != NULL, me_value != NULL */ + attrcache_insertclean(mp, ep); + i--; + } + + PyMem_FREE(oldtable); + return 0; +} + +/* Returns a pointer into the dict's ma_table, or NULL on error with an +exception set. That only happens when, for some reason, the key can't +hash - but they're all strings, so that really shouldn't happen. */ +static PyAttrCacheEntry * +PyAttrCache_GetEntry(PyObject *op, PyObject *key) +{ + PyAttrCacheObject *mp = (PyAttrCacheObject *)op; + long hash; + + assert(op != NULL && PyAttrCache_Check(op)); + assert(key != NULL && PyString_CheckExact(key)); + + if ((hash = ((PyStringObject *)key)->ob_shash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return NULL; + } + + /* Resize if fill/size >= 2/3 */ + if (mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2) + if (attrcache_resize(mp, 4 * mp->ma_used) < 0) + return NULL; + + return attrcache_lookup(mp, key, hash); +} + +/* Updates an attribute cache entry given an MRO and a key. Returns 0 on +success, -1 if one of the dicts in the MRO has non-string keys. Doesn't +set an exception. */ +static int +attrcache_update(PyAttrCacheObject *mp, PyObject *mro, PyObject *key, + PyAttrCacheEntry *ep) +{ + Py_ssize_t i, n; + PyObject *base, *dict, *value = NULL; + + /* Don't update active entries */ + assert(ep->me_key == NULL || ep->me_key == dummy); + assert(ep->me_value == NULL); + + /* Look through the types in the MRO for the attribute */ + n = PyTuple_GET_SIZE(mro); + for (i = 0; i < n; i++) { + base = PyTuple_GET_ITEM(mro, i); + if (PyClass_Check(base)) + dict = ((PyClassObject *)base)->cl_dict; + else { + assert(PyType_Check(base)); + dict = ((PyTypeObject *)base)->tp_dict; + } + assert(dict != NULL && PyDict_Check(dict)); + /* It's not safe to allow non-string keys */ + if (!PyDict_IsStringKeysOnly(dict)) + return -1; + value = PyDict_GetItem(dict, key); + if (value != NULL) + break; + } + + if (ep->me_key != NULL) { + Py_DECREF(dummy); + } + else { + /* If it's NULL we're activating an entry */ + mp->ma_fill++; + } + Py_INCREF(key); + ep->me_key = key; + + if (value != NULL) { + Py_INCREF(value); + ep->me_value = value; + mp->ma_used++; + } + /* else me_value = NULL to make an entry for a missing attribute - + it won't be copied on next resize */ + return 0; +} + +/* Called when any class in the MRO has an attribute set. void *data is +a pointer to the name of the attribute. Clears the cache entry associated +with the attribute name. + +This could have looked up the new attribute value in the dicts in the MRO, +but that can take a long time if the hierarchy is deep. Also, it's likely +that only a few classes will actually care, especially if many of the +classes in the MRO are abstract and will never be instantiated, which is +very often true of deep hierarchies. + +Returns 0 on success, -1 on error with an exception set. */ +static int +type_attrcache_callback(PyTypeObject *type, void *data) +{ + PyAttrCacheEntry *ep; + PyAttrCacheObject *mp; + PyObject *old_key, *old_value; + PyObject *key = (PyObject *)data; + + mp = (PyAttrCacheObject *)type->tp_cache; + if (mp == NULL) + /* No cache - nothing to do */ + return 0; + assert(PyAttrCache_Check(mp)); + + assert(key != NULL); + /* We don't do non-string keys, like, ever */ + if (!PyString_CheckExact(key)) + goto bad; + + ep = PyAttrCache_GetEntry((PyObject *)mp, key); + if (ep == NULL) { + /* Can't trust the cache anymore */ + Py_CLEAR(type->tp_cache); + return -1; + } + + if (ep->me_key == dummy || ep->me_key == NULL) + /* It's already not there */ + return 0; + + /* Delete it */ + Py_INCREF(dummy); + old_key = ep->me_key; + old_value = ep->me_value; + ep->me_key = dummy; + ep->me_value = NULL; + mp->ma_used--; + Py_DECREF(old_key); + Py_DECREF(old_value); + return 0; +bad: + /* Bail forever - leave them to the old, slow way */ + Py_CLEAR(type->tp_cache); + /* But we don't want update_subclasses to bail */ + return 0; +} + +static void +attrcache_dealloc(PyAttrCacheObject *mp) +{ + PyAttrCacheEntry *ep = mp->ma_table; + Py_ssize_t fill = mp->ma_fill; + + _PyObject_GC_UNTRACK(mp); + + for (; fill > 0; ep++) { + if (ep->me_key != NULL) { + fill--; + Py_DECREF(ep->me_key); + Py_XDECREF(ep->me_value); + } + } + PyMem_FREE(mp->ma_table); + PyObject_GC_Del(mp); +} + +static int +attrcache_clear(PyAttrCacheObject *mp) +{ + PyAttrCacheEntry *ep = mp->ma_table; + Py_ssize_t fill = mp->ma_fill; + + for (; fill > 0; ep++) { + if (ep->me_key != NULL) { + fill--; + Py_DECREF(ep->me_key); + Py_XDECREF(ep->me_value); + } + } + memset(mp->ma_table, 0, + sizeof(PyAttrCacheEntry) * (mp->ma_mask + 1)); + mp->ma_used = mp->ma_fill = 0; + return 0; +} + +static int +attrcache_traverse(PyAttrCacheObject *mp, visitproc visit, void *arg) +{ + Py_ssize_t i; + PyAttrCacheEntry *ep; + + for (i = mp->ma_mask + 1, ep = mp->ma_table; i > 0; i--, ep++) { + if (ep->me_key != NULL && ep->me_key != dummy) { + Py_VISIT(ep->me_key); + if (ep->me_value != NULL) + Py_VISIT(ep->me_value); + } + } + return 0; +} + +PyTypeObject PyAttrCache_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, + "attrcache", + sizeof(PyAttrCacheObject), + 0, + (destructor) + attrcache_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_HAVE_GC, /* tp_flags */ + 0, /* tp_doc */ + (traverseproc) + attrcache_traverse, /* tp_traverse */ + (inquiry)attrcache_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + 0, /* tp_free */ +}; + + +/* This is meant as a drop-in replacement for _PyType_Lookup in spots that +could use some caching for speed. If a lookup will only be done once or +infrequently, it's probably best to use _PyType_Lookup instead. Since it's +a drop-in replacement, it never sets an exception. */ +PyObject * +_PyType_CachingLookup(PyTypeObject *type, PyObject *name) +{ + PyObject *cache, *mro; + PyAttrCacheEntry *ep; + + cache = type->tp_cache; + if (cache == NULL) + goto not_so_bad; + assert(PyAttrCache_Check(cache)); + + /* We don't do non-string keys, like, ever */ + if (!PyString_CheckExact(name)) + goto bad; + + ep = PyAttrCache_GetEntry(cache, name); + if (ep == NULL) { + PyErr_Clear(); + goto bad; + } + + if (ep->me_key != NULL && ep->me_key != dummy) + /* me_value may be NULL, meaning that the attribute is + definitely not in a tp_dict in the MRO */ + return ep->me_value; + /* else we don't know about this key yet */ + + mro = type->tp_mro; + if (mro == NULL) + /* See _PyType_Lookup */ + return NULL; + + if (attrcache_update((PyAttrCacheObject *)cache, mro, name, ep) < 0) + goto bad; + return ep->me_value; + +bad: + /* Bail forever - leave them to the old, slow way */ + Py_CLEAR(type->tp_cache); +not_so_bad: + return _PyType_Lookup(type, name); +} Index: Objects/dictobject.c =================================================================== --- Objects/dictobject.c (revision 59362) +++ Objects/dictobject.c (working copy) @@ -388,6 +388,16 @@ return 0; } +/* Returns 1 if the dict has only ever contained string keys, 0 +otherwise. This is used to avoid evil rich compares doing nasty +things to tp_cache in attrcache_update in typeobject.c. */ +int +PyDict_IsStringKeysOnly(PyObject *op) +{ + assert(op != NULL && PyDict_Check(op)); + return ((PyDictObject *)op)->ma_lookup == lookdict_string; +} + /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. Index: Tools/pybench/Calls.py =================================================================== --- Tools/pybench/Calls.py (revision 59362) +++ Tools/pybench/Calls.py (working copy) @@ -219,9 +219,9 @@ def calibrate(self): # localize functions - f0 = dir + f0 = globals f1 = hash - f2 = range + f2 = cmp f3 = range # do calls