Index: Objects/longobject.c =================================================================== --- Objects/longobject.c (revision 65805) +++ Objects/longobject.c (working copy) @@ -18,6 +18,23 @@ (Py_SIZE(x) == 0 ? 0 : (x)->ob_digit[0])) #define ABS(x) ((x) < 0 ? -(x) : (x)) +/* Free list for long objects with 0 < abs(size) <= FREELIST_DIGITS. + * The free list (ab)uses the ob_type field to chain free long objects in a + * single linked, NULL terminated list. + */ +static PyLongObject *free_list = NULL; +static unsigned int numfree = 0; +#define PyLong_MAXFREELIST 4096 + +/* We keep a freelist for all of the smallest allocation size PyLongObjects. + * Our allocator pads allocations so we might as well treat all objects + * allocated the same physically as the same regardless of how many digits + * they actually contain. + * Typical results for 32-bit systems: (16-12) / 2 == 2 + * Typical results for 64-bit systems: (32-24) / 2 == 4 + */ +#define FREELIST_DIGITS 1 + #if NSMALLNEGINTS + NSMALLPOSINTS > 0 /* Small integers are preallocated in this array so that they can be shared. @@ -42,6 +59,7 @@ #endif return v; } + #define CHECK_SMALL_INT(ival) \ do if (-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS) { \ return get_small_int(ival); \ @@ -62,7 +80,7 @@ #else #define CHECK_SMALL_INT(ival) #define maybe_small_long(val) (val) -#endif +#endif /* NSMALLNEGINTS + NSMALLPOSINTS > 0 */ /* If a freshly-allocated long is already shared, it must be a small integer, so negating it must go to PyLong_FromLong */ @@ -126,6 +144,13 @@ _PyLong_New(Py_ssize_t size) { PyLongObject *result; + + /* Use a free list for the smallest longs. */ + if (free_list && size == FREELIST_DIGITS) { + result = free_list; + free_list = (PyLongObject *)Py_TYPE(result); + numfree--; + } /* Can't use sizeof(PyLongObject) here, since the compiler takes padding at the end into account. As the consequence, this would waste 2 bytes on @@ -133,8 +158,10 @@ This computation would be incorrect on systems which have padding before the digits; with 16-bit digits this should not happen. */ - result = PyObject_MALLOC(sizeof(PyVarObject) + - size*sizeof(digit)); + else { + result = PyObject_MALLOC(sizeof(PyVarObject) + + size*sizeof(digit)); + } if (!result) { PyErr_NoMemory(); return NULL; @@ -2195,7 +2222,23 @@ static void long_dealloc(PyObject *v) { - Py_TYPE(v)->tp_free(v); + register Py_ssize_t size = Py_SIZE(v); + /* Using this also works: + * if ((abs(size) <= FREELIST_DIGITS) && + * instead of <= and >= checks against size but abs requires 4 instructions + * for all cases (x86) rather than a common 2 instruction short circuit + * fall through. The perf difference is measurable. */ + if (((size == FREELIST_DIGITS) || + (size == -FREELIST_DIGITS)) && + PyLong_CheckExact(v) && + (numfree < PyLong_MAXFREELIST)) + { + Py_TYPE(v) = (PyTypeObject *)free_list; + free_list = (PyLongObject*)v; + numfree++; + return; + } + Py_TYPE(v)->tp_free(v); } static PyObject * @@ -3839,4 +3882,14 @@ _Py_ForgetReference((PyObject*)v); } #endif + if (Py_VerboseFlag) { + fprintf(stderr, "# cleanup ints: %u on free list.\n", numfree); + } + while (free_list) { + PyObject *v = (PyObject *)free_list; + free_list = (PyLongObject *)Py_TYPE(v); + PyLong_Type.tp_free(v); + numfree--; + } + assert(numfree == 0); }