Index: Include/dictobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/dictobject.h,v retrieving revision 2.26 diff -c -r2.26 dictobject.h *** Include/dictobject.h 11 Dec 2002 13:21:12 -0000 2.26 --- Include/dictobject.h 30 Dec 2002 22:36:39 -0000 *************** *** 84,89 **** --- 84,90 ---- PyAPI_FUNC(PyObject *) PyDict_New(void); PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key); + PyAPI_FUNC(PyObject *) PyDict_GetItemByName(PyObject *mp, PyObject *key); PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item); PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); Index: Include/stringobject.h =================================================================== RCS file: /cvsroot/python/python/dist/src/Include/stringobject.h,v retrieving revision 2.38 diff -c -r2.38 stringobject.h *** Include/stringobject.h 15 Sep 2002 14:09:46 -0000 2.38 --- Include/stringobject.h 30 Dec 2002 22:36:39 -0000 *************** *** 77,82 **** --- 77,85 ---- /* Use only if you know it's a string */ #define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) + /* Warning: hash field may not be initialized yet */ + #define PyString_HASH(op) (((PyStringObject *)(op))->ob_shash) + /* Macro, trading safety for speed */ #define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) #define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size) Index: Objects/dictobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/dictobject.c,v retrieving revision 2.138 diff -c -r2.138 dictobject.c *** Objects/dictobject.c 29 Dec 2002 16:33:11 -0000 2.138 --- Objects/dictobject.c 30 Dec 2002 22:36:40 -0000 *************** *** 6,13 **** typedef PyDictEntry dictentry; typedef PyDictObject dictobject; ! /* Define this out if you don't want conversion statistics on exit. */ ! #undef SHOW_CONVERSION_COUNTS /* See large comment block below. This must be >= 1. */ #define PERTURB_SHIFT 5 --- 6,13 ---- typedef PyDictEntry dictentry; typedef PyDictObject dictobject; ! /* Define this to collect statistics about dictionary operation. */ ! #undef INSTRUMENTED /* See large comment block below. This must be >= 1. */ #define PERTURB_SHIFT 5 *************** *** 106,111 **** --- 106,120 ---- equally good collision statistics, needed less code & used less memory. */ + /* Sets i and perturb to generate the probe sequence */ + #define PROBE_SEQUENCE_INIT(i, perturb, hash) (i = perturb = hash) + + /* Update i to the next entry in the probe sequence */ + #define PROBE_SEQUENCE_NEXT(i, perturb) ( \ + i = (i << 2) + i + perturb + 1, \ + perturb >>= PERTURB_SHIFT, i \ + ) + /* Object used as dummy key to fill deleted entries */ static PyObject *dummy; /* Initialized by first call to newdictobject() */ *************** *** 113,128 **** static dictentry * lookdict_string(dictobject *mp, PyObject *key, long hash); ! #ifdef SHOW_CONVERSION_COUNTS static long created = 0L; static long converted = 0L; static void show_counts(void) { fprintf(stderr, "created %ld string dicts\n", created); fprintf(stderr, "converted %ld to normal dicts\n", converted); fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created); } #endif --- 122,153 ---- static dictentry * lookdict_string(dictobject *mp, PyObject *key, long hash); ! #ifdef INSTRUMENTED static long created = 0L; static long converted = 0L; + static long byname_total = 0L; + static long byname_bailout = 0L; + static long byname_fast_positive = 0L; + static long byname_fast_negative = 0L; + static long byname_slow_positive = 0L; + static long byname_slow_negative = 0L; + static void show_counts(void) { fprintf(stderr, "created %ld string dicts\n", created); fprintf(stderr, "converted %ld to normal dicts\n", converted); fprintf(stderr, "%.2f%% conversion rate\n", (100.0*converted)/created); + + fprintf(stderr, "byname total%ld\n", byname_total); + fprintf(stderr, "byname bailout%ld\n", byname_bailout); + + fprintf(stderr, "byname fast_positive%ld\n", byname_fast_positive); + fprintf(stderr, "byname fast_negative%ld\n", byname_fast_negative); + + fprintf(stderr, "byname slow_positive%ld\n", byname_slow_positive); + fprintf(stderr, "byname slow_negative%ld\n", byname_slow_negative); } #endif *************** *** 154,160 **** dummy = PyString_FromString(""); if (dummy == NULL) return NULL; ! #ifdef SHOW_CONVERSION_COUNTS Py_AtExit(show_counts); #endif } --- 179,185 ---- dummy = PyString_FromString(""); if (dummy == NULL) return NULL; ! #ifdef INSTRUMENTED Py_AtExit(show_counts); #endif } *************** *** 163,169 **** return NULL; EMPTY_TO_MINSIZE(mp); mp->ma_lookup = lookdict_string; ! #ifdef SHOW_CONVERSION_COUNTS ++created; #endif _PyObject_GC_TRACK(mp); --- 188,194 ---- return NULL; EMPTY_TO_MINSIZE(mp); mp->ma_lookup = lookdict_string; ! #ifdef INSTRUMENTED ++created; #endif _PyObject_GC_TRACK(mp); *************** *** 211,217 **** return ep; restore_error = checked_error = 0; ! if (ep->me_key == dummy) freeslot = ep; else { if (ep->me_hash == hash) { --- 236,242 ---- return ep; restore_error = checked_error = 0; ! if (ep->me_value == NULL) freeslot = ep; else { if (ep->me_hash == hash) { *************** *** 242,251 **** freeslot = NULL; } ! /* In the loop, me_key == dummy is by far (factor of 100s) the ! least likely outcome, so test for that last. */ ! for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { ! i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) { if (freeslot != NULL) --- 267,275 ---- freeslot = NULL; } ! PROBE_SEQUENCE_INIT(i, perturb, hash); ! for (;;) { ! PROBE_SEQUENCE_NEXT(i, perturb); ep = &ep0[i & mask]; if (ep->me_key == NULL) { if (freeslot != NULL) *************** *** 254,260 **** } if (ep->me_key == key) break; ! if (ep->me_hash == hash && ep->me_key != dummy) { if (!checked_error) { checked_error = 1; if (PyErr_Occurred()) { --- 278,284 ---- } if (ep->me_key == key) break; ! if (ep->me_hash == hash) { if (!checked_error) { checked_error = 1; if (PyErr_Occurred()) { *************** *** 281,287 **** break; } } ! else if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } --- 305,311 ---- break; } } ! else if (ep->me_value == NULL && freeslot == NULL) freeslot = ep; } *************** *** 316,322 **** strings is to override __eq__, and for speed we don't cater to that here. */ if (!PyString_CheckExact(key)) { ! #ifdef SHOW_CONVERSION_COUNTS ++converted; #endif mp->ma_lookup = lookdict; --- 340,346 ---- strings is to override __eq__, and for speed we don't cater to that here. */ if (!PyString_CheckExact(key)) { ! #ifdef INSTRUMENTED ++converted; #endif mp->ma_lookup = lookdict; *************** *** 326,332 **** ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; ! if (ep->me_key == dummy) freeslot = ep; else { if (ep->me_hash == hash --- 350,356 ---- ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; ! if (ep->me_value == NULL) freeslot = ep; else { if (ep->me_hash == hash *************** *** 336,354 **** freeslot = NULL; } ! /* In the loop, me_key == dummy is by far (factor of 100s) the ! least likely outcome, so test for that last. */ ! for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { ! i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key || (ep->me_hash == hash - && ep->me_key != dummy && _PyString_Eq(ep->me_key, key))) return ep; ! if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } } --- 360,376 ---- freeslot = NULL; } ! PROBE_SEQUENCE_INIT(i, perturb, hash); ! for (;;) { ! PROBE_SEQUENCE_NEXT(i, perturb); ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key || (ep->me_hash == hash && _PyString_Eq(ep->me_key, key))) return ep; ! if (ep->me_value == NULL && freeslot == NULL) freeslot = ep; } } *************** *** 356,364 **** /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. ! Eats a reference to key and one to value. */ ! static void insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) { PyObject *old_value; --- 378,387 ---- /* Internal routine to insert a new item into the table. Used both by the internal resize routine and by the public insert routine. ! Eats a reference to key and one to value. Returns a nonzero result if ! a new key was added. */ ! static int insertdict(register dictobject *mp, PyObject *key, long hash, PyObject *value) { PyObject *old_value; *************** *** 372,377 **** --- 395,401 ---- ep->me_value = value; Py_DECREF(old_value); /* which **CAN** re-enter */ Py_DECREF(key); + return 0; } else { if (ep->me_key == NULL) *************** *** 382,387 **** --- 406,412 ---- ep->me_hash = hash; ep->me_value = value; mp->ma_used++; + return 1; } } *************** *** 455,470 **** /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ for (ep = oldtable; i > 0; ep++) { ! if (ep->me_value != NULL) { /* active entry */ ! --i; ! insertdict(mp, ep->me_key, ep->me_hash, ep->me_value); ! } ! else if (ep->me_key != NULL) { /* dummy entry */ ! --i; ! assert(ep->me_key == dummy); ! Py_DECREF(ep->me_key); ! } ! /* else key == value == NULL: nothing to do */ } if (is_oldtable_malloced) --- 480,497 ---- /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren't copied over, of course */ for (ep = oldtable; i > 0; ep++) { ! if (ep->me_value != NULL) { /* active entry */ ! --i; ! insertdict(mp, ep->me_key, ep->me_hash, ep->me_value); ! } ! else if (ep->me_key != NULL) { /* dummy/negative entry */ ! --i; ! assert(ep->me_key == dummy || ! (PyString_CheckExact(ep->me_key) && ! PyString_CHECK_INTERNED(ep->me_key))); ! Py_DECREF(ep->me_key); ! } ! /* else key == value == NULL: nothing to do */ } if (is_oldtable_malloced) *************** *** 481,487 **** return NULL; } if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) { --- 508,514 ---- return NULL; } if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) { *************** *** 492,497 **** --- 519,614 ---- return (mp->ma_lookup)(mp, key, hash)->me_value; } + /* Faster than GetItem when key is an interned string (typically a + variable or attribute name, hence the "by name" suffix). If key is not + interned it will be slightly slower. If key is not found a negative entry + is inserted into the dictionary to speed up the next lookup. op must be a + valid dictionary object. + + The key should be a string. If it's not this code will do a silly dance + but will eventually return the correct result. It shouldn't even trigger + valgrind. + */ + + PyObject * + PyDict_GetItemByName(PyObject *op, PyObject *key) + { + dictobject *mp; + long hash; + unsigned int mask; + int i; + unsigned int perturb; + dictentry *ep0; + dictentry *ep; + + #ifdef INSTRUMENTED + byname_total++; + #endif + mp = (dictobject *)op; + hash = PyString_HASH(key); + mask = mp->ma_mask; + ep0 = mp->ma_table; + /* Fast search for exact key match */ + PROBE_SEQUENCE_INIT(i, perturb, hash); + for (;;) { + ep = &ep0[i & mask]; + if (ep->me_key == key) { + #ifdef INSTRUMENTED + if(ep->me_value != NULL) + byname_fast_positive++; + else + byname_fast_negative++; + #endif + return ep->me_value; + } + if (ep->me_key == NULL) + break; + PROBE_SEQUENCE_NEXT(i, perturb); + } + + hash = PyObject_Hash(key); + + if (mp->ma_lookup != lookdict_string || !PyString_CheckExact(key)) { + #ifdef INSTRUMENTED + byname_bailout++; + #endif + return (mp->ma_lookup)(mp, key, hash)->me_value; + } + + ep = lookdict_string(mp, key, hash); + + #ifdef INSTRUMENTED + if(ep->me_value != NULL) + byname_slow_positive++; + else + byname_slow_negative++; + #endif + /* Insert a negative entry */ + if (ep->me_value == NULL) { + /* Temporary solution: don't insert if too full */ + if (mp->ma_fill*3 >= (mp->ma_mask+1)*2) + return NULL; + /* Another problem for now is negative entry thrashing */ + + if (ep->me_key == NULL) + mp->ma_fill++; + else + Py_DECREF(ep->me_key); + Py_INCREF(key); + ep->me_key = key; + ep->me_hash = PyString_HASH(key); + return NULL; + } else { + /* The fast search failed because entry was indexed + by a non-interned string. Replace it. */ + Py_DECREF(ep->me_key); + Py_INCREF(key); + ep->me_key = key; + return ep->me_value; + } + return ep->me_value; + } + /* CAUTION: PyDict_SetItem() must guarantee that it won't resize the * dictionary if it is merely replacing the value for an existing key. * This is means that it's safe to loop over a dictionary with *************** *** 510,532 **** return -1; } mp = (dictobject *)op; ! if (PyString_CheckExact(key)) { ! hash = ((PyStringObject *)key)->ob_shash; ! if (hash == -1) ! hash = PyObject_Hash(key); ! } ! else { ! hash = PyObject_Hash(key); ! if (hash == -1) ! return -1; ! } assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */ n_used = mp->ma_used; Py_INCREF(value); Py_INCREF(key); ! insertdict(mp, key, hash, value); ! /* If we added a key, we can safely resize. Otherwise skip this! ! * If fill >= 2/3 size, adjust size. Normally, this doubles the * size, but it's also possible for the dict to shrink (if ma_fill is * much larger than ma_used, meaning a lot of dict keys have been * deleted). --- 627,651 ---- return -1; } mp = (dictobject *)op; ! ! if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) ! { ! hash = PyObject_Hash(key); ! if (hash == -1) { ! return -1; ! } ! } ! assert(mp->ma_fill <= mp->ma_mask); /* at least one empty slot */ n_used = mp->ma_used; Py_INCREF(value); Py_INCREF(key); ! ! if (insertdict(mp, key, hash, value) == 0) ! return 0; /* Skip the rest if no new key was added */ ! ! /* If fill >= 2/3 size, adjust size. Normally, this doubles the * size, but it's also possible for the dict to shrink (if ma_fill is * much larger than ma_used, meaning a lot of dict keys have been * deleted). *************** *** 551,557 **** return -1; } if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; --- 670,676 ---- return -1; } if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; *************** *** 565,570 **** --- 684,690 ---- old_key = ep->me_key; Py_INCREF(dummy); ep->me_key = dummy; + ep->me_hash = -1; old_value = ep->me_value; ep->me_value = NULL; mp->ma_used--; *************** *** 828,834 **** long hash; assert(mp->ma_table != NULL); if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; --- 948,954 ---- long hash; assert(mp->ma_table != NULL); if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; *************** *** 1462,1468 **** long hash; register long ok; if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; --- 1582,1588 ---- long hash; register long ok; if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; *************** *** 1483,1489 **** return NULL; if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; --- 1603,1609 ---- return NULL; if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; *************** *** 1509,1515 **** return NULL; if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; --- 1629,1635 ---- return NULL; if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; *************** *** 1546,1552 **** return NULL; } if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; --- 1666,1672 ---- return NULL; } if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return NULL; *************** *** 1559,1564 **** --- 1679,1685 ---- old_key = ep->me_key; Py_INCREF(dummy); ep->me_key = dummy; + ep->me_hash = -1; old_value = ep->me_value; ep->me_value = NULL; mp->ma_used--; *************** *** 1617,1622 **** --- 1738,1744 ---- PyTuple_SET_ITEM(res, 1, ep->me_value); Py_INCREF(dummy); ep->me_key = dummy; + ep->me_hash = -1; ep->me_value = NULL; mp->ma_used--; assert(mp->ma_table[0].me_value == NULL); *************** *** 1698,1704 **** return dictiter_new(dict, select_item); } - PyDoc_STRVAR(has_key__doc__, "D.has_key(k) -> 1 if D has a key k, else 0"); --- 1820,1825 ---- *************** *** 1780,1792 **** {NULL, NULL} /* sentinel */ }; static int dict_contains(dictobject *mp, PyObject *key) { long hash; if (!PyString_CheckExact(key) || ! (hash = ((PyStringObject *) key)->ob_shash) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; --- 1901,1926 ---- {NULL, NULL} /* sentinel */ }; + + #ifdef INSTRUMENTED + #include + #define OFF(x) offsetof(PyDictObject, x) + + static PyMemberDef dict_memberlist[] = { + {"ma_used", T_INT, OFF(ma_used), RO, "" }, + {"ma_fill", T_INT, OFF(ma_fill), RO, "" }, + {"ma_mask", T_INT, OFF(ma_mask), RO, "" }, + {NULL} /* Sentinel */ + }; + #endif + static int dict_contains(dictobject *mp, PyObject *key) { long hash; if (!PyString_CheckExact(key) || ! (hash = PyString_HASH(key)) == -1) { hash = PyObject_Hash(key); if (hash == -1) return -1; *************** *** 1821,1827 **** assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0); INIT_NONZERO_DICT_SLOTS(d); d->ma_lookup = lookdict_string; ! #ifdef SHOW_CONVERSION_COUNTS ++created; #endif } --- 1955,1961 ---- assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0); INIT_NONZERO_DICT_SLOTS(d); d->ma_lookup = lookdict_string; ! #ifdef INSTRUMENTED ++created; #endif } *************** *** 1903,1909 **** --- 2037,2047 ---- (getiterfunc)dict_iter, /* tp_iter */ 0, /* tp_iternext */ mapp_methods, /* tp_methods */ + #ifdef INSTRUMENTED + dict_memberlist, /* tp_members */ + #else 0, /* tp_members */ + #endif 0, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ Index: Python/ceval.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Python/ceval.c,v retrieving revision 2.342 diff -c -r2.342 ceval.c *** Python/ceval.c 10 Nov 2002 14:33:26 -0000 2.342 --- Python/ceval.c 30 Dec 2002 22:36:40 -0000 *************** *** 1706,1716 **** PyObject_REPR(w)); break; } ! x = PyDict_GetItem(x, w); if (x == NULL) { ! x = PyDict_GetItem(f->f_globals, w); if (x == NULL) { ! x = PyDict_GetItem(f->f_builtins, w); if (x == NULL) { format_exc_check_arg( PyExc_NameError, --- 1706,1716 ---- PyObject_REPR(w)); break; } ! x = PyDict_GetItemByName(x, w); if (x == NULL) { ! x = PyDict_GetItemByName(f->f_globals, w); if (x == NULL) { ! x = PyDict_GetItemByName(f->f_builtins, w); if (x == NULL) { format_exc_check_arg( PyExc_NameError, *************** *** 1725,1760 **** case LOAD_GLOBAL: w = GETITEM(names, oparg); ! if (PyString_CheckExact(w)) { ! /* Inline the PyDict_GetItem() calls. ! WARNING: this is an extreme speed hack. ! Do not try this at home. */ ! long hash = ((PyStringObject *)w)->ob_shash; ! if (hash != -1) { ! PyDictObject *d; ! d = (PyDictObject *)(f->f_globals); ! x = d->ma_lookup(d, w, hash)->me_value; ! if (x != NULL) { ! Py_INCREF(x); ! PUSH(x); ! continue; ! } ! d = (PyDictObject *)(f->f_builtins); ! x = d->ma_lookup(d, w, hash)->me_value; ! if (x != NULL) { ! Py_INCREF(x); ! PUSH(x); ! continue; ! } ! goto load_global_error; ! } ! } ! /* This is the un-inlined version of the code above */ ! x = PyDict_GetItem(f->f_globals, w); if (x == NULL) { ! x = PyDict_GetItem(f->f_builtins, w); if (x == NULL) { - load_global_error: format_exc_check_arg( PyExc_NameError, GLOBAL_NAME_ERROR_MSG, w); --- 1725,1734 ---- case LOAD_GLOBAL: w = GETITEM(names, oparg); ! x = PyDict_GetItemByName(f->f_globals, w); if (x == NULL) { ! x = PyDict_GetItemByName(f->f_builtins, w); if (x == NULL) { format_exc_check_arg( PyExc_NameError, GLOBAL_NAME_ERROR_MSG, w);