--- Python-2.6a3/Modules/mathmoduleORIG.c 2008-04-20 18:55:50.000000000 -0700 +++ Python-2.6a3/Modules/mathmodule.c 2008-05-22 08:42:49.000000000 -0700 @@ -55,6 +55,8 @@ #include "Python.h" #include "longintrepr.h" /* just for SHIFT */ +#include /* for FLT_RADIX and DBL_MAX_EXP */ + #ifdef _OSF_SOURCE /* OSF1 5.1 doesn't make this available with XOPEN_SOURCE_EXTENDED defined */ extern double copysign(double, double); @@ -307,6 +309,258 @@ FUNC1(tanh, tanh, 0, "tanh(x)\n\nReturn the hyperbolic tangent of x.") + +/* Precision summation function as msum() by Raymond Hettinger in + , + enhanced with the roundoff handling from Mark Dickinson post at + . + + See both of those for more details, proofs and other references. + + Note 1: IEEE 754 floating point format and semantics are assumed, + but not explicitly maintained. The following rules may not apply: + + 1. if the summands include a NaN, return a NaN, + + 2. if the summands include infinities of both signs, raise ValueError, + + 3. if the summands include infinities of only one sign, return + infinity with that sign, + + 4. otherwise (all summands are finite) if the result is infinite, + raise OverflowError. The result can never be a NaN if all + summands are finite. + + Note 2: the implementation below not include the intermediate + overflow handling from Mark Dickinson's msum(). Therefore, + sum([1e+308, 1e-308, 1e+308]) returns result 1e+308, however + sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to + intermediate overflow of the first partial sum. + + Note 3: a MemoryError occurs if insufficient memory is available + for the partials array. + + Note 4: aggressively optimizing compilers may eliminate the + error expressions critical for accurate summation. Using + pragmas or keywords as volatile may be necessary as remedy. + + The same summation function is also in ./cmathmodule.c, except + for the 2 lines marked *** below. Make sure to update both. +*/ + +#define DO_SUM_PARTIALS 128 /* initial partials size, on stack */ + +#define DO_SUM_SIGNOF(x) ((x) < 0 ? 0 : 1) + +/* Extend the partials array p[] by doubling its size. + */ +static int /* non-zero on error */ +_do_sum_realloc(double **p_ptr, Py_ssize_t n, + double *ps, Py_ssize_t *m_ptr) +{ + void *v = NULL; + Py_ssize_t m = *m_ptr; + + m += m; /* double */ + if (n < m && m < (PY_SSIZE_T_MAX / sizeof(double))) { + double *p = *p_ptr; + if (p == ps) { + v = PyMem_Malloc(sizeof(double) * m); + if (v != NULL) + memcpy(v, ps, sizeof(double) * n); + } + else + v = PyMem_Realloc(p, sizeof(double) * m); + } + if (v == NULL) { /* size overflow or no memory */ + PyErr_SetString(PyExc_MemoryError, "math sum partials"); + return 1; + } + *p_ptr = (double*) v; + *m_ptr = m; + return 0; +} + +/* Add two floats returning the sum and optionally + the roundoff. Global errno is not updated. + */ +static double +_do_sum_add2(double x, double y, double *lo_ptr) +{ + double hi = x + y; + + if (lo_ptr != NULL) /* return roundoff */ + *lo_ptr = fabs(x) < fabs(y) + ? x - (hi - y) + : y - (hi - x); + return hi; +} + +/* Sum of array p[] of nonoverlapping floats. + + On input, p[] is an array of nonzero, non- + special, nonoverlapping floats, strictly + increasing in magnitude, but possibly not + all having the same sign. + + On output, the sum of p[] gives the error + in the returned result, which is correctly + rounded (using the round-half-to-even rule). + The elements of p[] remain nonzero, non- + special, nonoverlapping, and increasing in + magnitude. + */ +static double +_do_sum_exact(double top, double *p, Py_ssize_t *n_ptr) +{ + double pn, lo, hi = top; + Py_ssize_t n = *n_ptr; + + /* sum from the top (most significant), + stop as soon as sum becomes inexact */ + while (n > 0) { + pn = p[--n]; + hi = _do_sum_add2(hi, pn, &lo); + if (lo != 0.0) { + p[n++] = lo; + *n_ptr = n; + break; + } + } + /* round correctly if necessary */ + if (n > 1) { + pn = p[--n]; + if (DO_SUM_SIGNOF(pn) == DO_SUM_SIGNOF(p[n-1])) { + double t = pn * 2.0; + double x = _do_sum_add2(hi, t, NULL); + if (t == (x - hi)) { + hi = x; + p[n] = -pn; + } + } + } + return hi; +} + +/* Full precision summation of a sequence of floats, + returning the sum as double if no errror occurred. + */ +static int /* non-zero on error */ +_do_sum(PyObject *seq, double *sum_ptr) /* sum */ +{ + int err = 1; + Py_ssize_t i, j, n = 0, m = DO_SUM_PARTIALS; + double x, y, hi, lo, ps[DO_SUM_PARTIALS], *p = ps; + PyObject *iter, *item; + + iter = PyObject_GetIter(seq); + if (iter == NULL) + return err; + + PyFPE_START_PROTECT("sum", goto _do_sum_exit) + + /* for x in iterable */ + for(;;) { + /* some invariants */ + assert(err != 0); + assert(n >= 0); + assert(n <= m); + assert((m == DO_SUM_PARTIALS && p == ps) || + (m > DO_SUM_PARTIALS && p != NULL)); + + item = PyIter_Next(iter); + if (item == NULL) { + if (PyErr_Occurred()) + goto _do_sum_error; + else + break; + } + x = PyFloat_AsDouble(item); + Py_DECREF(item); + if (PyErr_Occurred()) + goto _do_sum_error; + + if (x != 0.0) { + /* i = 0; for y in partials */ + for (i = j = 0; j < n; j++) { + y = p[j]; + hi = _do_sum_add2(x, y, &lo); + if (lo != 0.0) + p[i++] = lo; + x = hi; + } + /* ps[i:] = [x] */ + n = i; + if (x != 0.0) { + /* if non-finite, reset partials, + effectively adding subsequent + items without roundoff (and + yielding correct results iff + IEEE 754 rules are observed + for floating point errors) */ + if (! Py_IS_FINITE(x)) + n = 0; + else if (n >= m && + _do_sum_realloc(&p, n, ps, &m)) + goto _do_sum_error; + p[n++] = x; + } + } + } + assert(err != 0); + assert(n <= m); + + if (n > 0) { + hi = p[--n]; + if (hi == 0.0 || Py_IS_FINITE(hi)) { + /* sum(ps, hi) */ + hi = _do_sum_exact(hi, p, &n); + } + else { /* handle errors */ + errno = Py_IS_NAN(hi) ? EDOM : ERANGE; + if (is_error(hi)) /* *** */ + goto _do_sum_error; + } + } + else /* default */ + hi = 0.0; + *sum_ptr = hi; + err = 0; + +_do_sum_error: + PyFPE_END_PROTECT(hi) + + Py_DECREF(iter); + if (p != ps) + PyMem_Free(p); + return err; +} + +#undef DO_SUM_PARTIALS +#undef DO_SUM_SIGNOF + +static PyObject* +math_sum(PyObject *self, PyObject *args) +{ + double s = 0.0; + PyObject *seq; + + if (! PyArg_UnpackTuple(args, "sum", 1, 1, &seq)) + return NULL; + + if (_do_sum(seq, &s)) + return NULL; + + return PyFloat_FromDouble(s); +} + +PyDoc_STRVAR(math_sum_doc, +"sum(sequence)\n\n\ +Return the full precision sum of a sequence of numbers.\n\ +When the sequence is empty, return zero.\n\n\ +For accurate results, IEEE 754 floating point format\n\ +and semantics and floating point radix 2 are required."); + static PyObject * math_trunc(PyObject *self, PyObject *number) { @@ -718,6 +972,7 @@ {"sin", math_sin, METH_O, math_sin_doc}, {"sinh", math_sinh, METH_O, math_sinh_doc}, {"sqrt", math_sqrt, METH_O, math_sqrt_doc}, + {"sum", math_sum, METH_VARARGS, math_sum_doc}, {"tan", math_tan, METH_O, math_tan_doc}, {"tanh", math_tanh, METH_O, math_tanh_doc}, {"trunc", math_trunc, METH_O, math_trunc_doc},