Index: Modules/mathmodule.c =================================================================== --- Modules/mathmodule.c (revision 81150) +++ Modules/mathmodule.c (working copy) @@ -1129,56 +1129,183 @@ Return an accurate floating point sum of values in the iterable.\n\ Assumes IEEE-754 floating point arithmetic."); +/* Divide-and-conquer factorial algorithm + * + * Based on the formula and psuedo-code provided at: + * http://www.luschny.de/math/factorial/binarysplitfact.html + * + * Faster algorithms exist, but they're more complicated and depend on + * a fast prime factoriazation algorithm. + */ + +static unsigned long +precomputed[] = {3, 15, 5, 35, 315, 63, 693, 9009, 1287, 19305, 328185, 36465, 692835, + 14549535, 1322685, 30421755, 760543875, 58503375, 1579591125ul}; +/* i-th odd number */ +#define ODD(i) (((i) << 1) | 1) +/* Compute product(ODD(i) for i in range(first, last+1)) */ static PyObject * +factorial_partial_product(unsigned long first, unsigned long last) +{ + PyObject *result; + PyObject *a, *x, *y; + Py_ssize_t n, i; + + if (last < sizeof(precomputed)/sizeof(*precomputed)) { + size_t index = first + last - 2; + if (index < sizeof(precomputed)/sizeof(*precomputed)) { + return PyLong_FromUnsignedLong(precomputed[index]); + } + } + + n = last - first + 1; + if (n <= 0) + return PyLong_FromLong(1L); + if (n == 1) + return PyLong_FromUnsignedLong(ODD(first)); + + /* a = [ODD(i) for i in range(first, last + 1)] */ + a = PyList_New(last - first + 1); + if (a == NULL) + return NULL; + for (i = 0; i < n; ++i) { + result = PyLong_FromUnsignedLong(ODD(first + i)); + if (result == NULL) + goto done; + PyList_SET_ITEM(a, i, result); + } + while (n > 1) { + for (i = (n >> 1) - 1; i >= 0; --i) { + x = PyList_GET_ITEM(a, i); + y = PyList_GET_ITEM(a, n - i - 1); + result = PyNumber_Multiply(x, y); + if (result == NULL) + goto done; + PyList_SET_ITEM(a, i, result); + PyList_SET_ITEM(a, n - i - 1, NULL); + Py_DECREF(x); + Py_DECREF(y); + } + n = (n >> 1) + (n & 1); + } + /* Release reference held by the list */ + PyList_SET_ITEM(a, 0, NULL); + done: + Py_DECREF(a); + return result; +} + +static unsigned long +bit_length(unsigned long n) +{ + unsigned long len = 0; + while (n != 0) { + ++len; + n >>= 1; + } + return len; +} + +static unsigned long +bit_count(unsigned long n) +{ + unsigned long count = 0; + while (n != 0) { + ++count; + n &= n - 1; /* clear least significant bit */ + } + return count; +} + +static PyObject * +factorial_loop(unsigned long n) +{ + long i, v; + PyObject *p, *r; + PyObject *part, *tmp; + + p = PyLong_FromLong(1L); + if (p == NULL) + return NULL; + Py_INCREF(p); + r = p; + + for (i = bit_length(n) - 2; i >= 0; --i) { + v = n >> i; + if (v > 2) { + part = factorial_partial_product(((v >> 1) + 1) >> 1, (v - 1) >> 1); + if (part == NULL) + goto error; + + tmp = PyNumber_Multiply(p, part); + Py_DECREF(part); + if (tmp == NULL) + goto error; + Py_DECREF(p); + p = tmp; + + tmp = PyNumber_Multiply(r, p); + if (tmp == NULL) + goto error; + Py_DECREF(r); + r = tmp; + } + } + Py_DECREF(p); + return r; + error: + Py_DECREF(p); + Py_DECREF(r); + return NULL; +} + +static PyObject * math_factorial(PyObject *self, PyObject *arg) { - long i, x; - PyObject *result, *iobj, *newresult; + PyObject *result; + PyObject *r; /* result without trailing zeros */ + PyObject *ntz; /* number of trailing zeros */ + long n; + if (PyFloat_Check(arg)) { - PyObject *lx; double dx = PyFloat_AS_DOUBLE((PyFloatObject *)arg); if (!(Py_IS_FINITE(dx) && dx == floor(dx))) { PyErr_SetString(PyExc_ValueError, "factorial() only accepts integral values"); return NULL; } - lx = PyLong_FromDouble(dx); - if (lx == NULL) + n = (long) dx; + } else { + n = PyLong_AsLong(arg); + if (n == -1 && PyErr_Occurred()) return NULL; - x = PyLong_AsLong(lx); - Py_DECREF(lx); } - else - x = PyLong_AsLong(arg); - if (x == -1 && PyErr_Occurred()) - return NULL; - if (x < 0) { + if (n < 0) { PyErr_SetString(PyExc_ValueError, - "factorial() not defined for negative values"); + "factorial() not defined for negative values"); return NULL; } - result = (PyObject *)PyLong_FromLong(1); - if (result == NULL) + if (n <= 12) { + static const long lookup[] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320, + 362880, 3628800, 39916800, 479001600}; + return PyLong_FromLong(lookup[n]); + } + + r = factorial_loop(n); + if (r == NULL) return NULL; - for (i=1 ; i<=x ; i++) { - iobj = (PyObject *)PyLong_FromLong(i); - if (iobj == NULL) - goto error; - newresult = PyNumber_Multiply(result, iobj); - Py_DECREF(iobj); - if (newresult == NULL) - goto error; - Py_DECREF(result); - result = newresult; + + ntz = PyLong_FromLong(n - bit_count(n)); + if (ntz != NULL) { + result = PyNumber_Lshift(r, ntz); + Py_DECREF(ntz); } + + Py_DECREF(r); return result; - -error: - Py_DECREF(result); - return NULL; } PyDoc_STRVAR(math_factorial_doc,