Index: Doc/library/math.rst =================================================================== --- Doc/library/math.rst (revision 65671) +++ Doc/library/math.rst (working copy) @@ -90,11 +90,6 @@ algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the typical case where the rounding mode is half-even. - .. note:: - - The accuracy of fsum() may be impaired on builds that use - extended precision addition and then double-round the results. - .. versionadded:: 2.6 Index: Lib/test/test_math.py =================================================================== --- Lib/test/test_math.py (revision 65671) +++ Lib/test/test_math.py (working copy) @@ -365,25 +365,13 @@ self.assert_(math.isnan(math.frexp(NAN)[0])) def testFsum(self): - # math.fsum relies on exact rounding for correct operation. - # There's a known problem with IA32 floating-point that causes - # inexact rounding in some situations, and will cause the - # math.fsum tests below to fail; see issue #2937. On non IEEE - # 754 platforms, and on IEEE 754 platforms that exhibit the - # problem described in issue #2937, we simply skip the whole - # test. - + # Some of the tests rely on IEEE double; we skip the + # tests on non-IEEE platforms. if not float.__getformat__("double").startswith("IEEE"): return - # on IEEE 754 compliant machines, both of the expressions - # below should round to 10000000000000002.0. - if 1e16+2.0 != 1e16+2.9999: - return - - # Python version of math.fsum, for comparison. Uses a - # different algorithm based on frexp, ldexp and integer - # arithmetic. + # Python version of math.fsum, for comparison. Uses an + # algorithm based on frexp, ldexp and integer arithmetic. from sys import float_info mant_dig = float_info.mant_dig etiny = float_info.min_exp - mant_dig Index: Modules/mathmodule.c =================================================================== --- Modules/mathmodule.c (revision 65671) +++ Modules/mathmodule.c (working copy) @@ -54,6 +54,7 @@ #include "Python.h" #include "longintrepr.h" /* just for SHIFT */ +#include "float.h" #ifdef _OSF_SOURCE /* OSF1 5.1 doesn't make this available with XOPEN_SOURCE_EXTENDED defined */ @@ -312,226 +313,254 @@ FUNC1(tanh, tanh, 0, "tanh(x)\n\nReturn the hyperbolic tangent of x.") -/* Precision summation function as msum() by Raymond Hettinger in +/* Full precision summation of a sequence of floats. Based on the + function lsum by Raymond Hettinger at , - enhanced with the exact partials sum and roundoff from Mark - Dickinson's post at . - See those links for more details, proofs and other references. + and following implementation suggestions from Tim Peters. - Note 1: IEEE 754R floating point semantics are assumed, - but the current implementation does not re-establish special - value semantics across iterations (i.e. handling -Inf + Inf). + Method: maintain the sum so far in the form - Note 2: No provision is made for intermediate overflow handling; - therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while - sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the - overflow of the first partial sum. + huge_integer * 2**FSUM_MIN_EXP - Note 3: The intermediate values lo, yr, and hi are declared volatile so - aggressive compilers won't algebraically reduce lo to always be exactly 0.0. - Also, the volatile declaration forces the values to be stored in memory as - regular doubles instead of extended long precision (80-bit) values. This - prevents double rounding because any addition or subtraction of two doubles - can be resolved exactly into double-sized hi and lo values. As long as the - hi value gets forced into a double before yr and lo are computed, the extra - bits in downstream extended precision operations (x87 for example) will be - exactly zero and therefore can be losslessly stored back into a double, - thereby preventing double rounding. + where FSUM_MIN_EXP is such that every finite double can be + expressed as an integral multiple of 2**FSUM_MIN_EXP. huge_integer + is stored in base FSUM_BASE. Following a suggestion from Tim + Peters, we use a signed-digit representation: the digits for the + base FSUM_BASE representation all lie in the range [-FSUM_BASE/2, + FSUM_BASE/2). It's not hard to show that every integer (positive + or negative), can be expressed uniquely in the form - Note 4: A similar implementation is in Modules/cmathmodule.c. - Be sure to update both when making changes. + a0 + a1*FSUM_BASE + a2*FSUM_BASE**2 + ... - Note 5: The signature of math.fsum() differs from __builtin__.sum() + This choice of representation makes it easy to deal with both + positive and negative summands. + + Note: The signature of math.fsum() differs from __builtin__.sum() because the start argument doesn't make sense in the context of - accurate summation. Since the partials table is collapsed before - returning a result, sum(seq2, start=sum(seq1)) may not equal the + accurate summation. sum(seq2, start=sum(seq1)) may not equal the accurate result returned by sum(itertools.chain(seq1, seq2)). */ -#define NUM_PARTIALS 32 /* initial partials array size, on stack */ +#define FSUM_MIN_EXP (DBL_MIN_EXP - DBL_MANT_DIG) +#define FSUM_SIZE 30 +#define FSUM_BASE ((long)1 << FSUM_SIZE) +/* allow at least 60 extra bits at the top end, to cope with intermediate + overflow. On an IEEE 754 machine, FSUM_ACC_SIZE is 72. */ +#define FSUM_ACC_SIZE ((DBL_MAX_EXP - FSUM_MIN_EXP + 60)/FSUM_SIZE + 1) -/* Extend the partials array p[] by doubling its size. */ -static int /* non-zero on error */ -_fsum_realloc(double **p_ptr, Py_ssize_t n, - double *ps, Py_ssize_t *m_ptr) -{ - void *v = NULL; - Py_ssize_t m = *m_ptr; +/* _fsum_twopower[i] is 2.0**(i+1) */ +static const double _fsum_twopower[FSUM_SIZE] = { + 2.0, + 4.0, + 8.0, + 16.0, + 32.0, + 64.0, + 128.0, + 256.0, + 512.0, + 1024.0, + 2048.0, + 4096.0, + 8192.0, + 16384.0, + 32768.0, + 65536.0, + 131072.0, + 262144.0, + 524288.0, + 1048576.0, + 2097152.0, + 4194304.0, + 8388608.0, + 16777216.0, + 33554432.0, + 67108864.0, + 134217728.0, + 268435456.0, + 536870912.0, + 1073741824.0, +}; - m += m; /* double */ - if (n < m && m < (PY_SSIZE_T_MAX / sizeof(double))) { - double *p = *p_ptr; - if (p == ps) { - v = PyMem_Malloc(sizeof(double) * m); - if (v != NULL) - memcpy(v, ps, sizeof(double) * n); - } - else - v = PyMem_Realloc(p, sizeof(double) * m); - } - if (v == NULL) { /* size overflow or no memory */ - PyErr_SetString(PyExc_MemoryError, "math.fsum partials"); - return 1; - } - *p_ptr = (double*) v; - *m_ptr = m; - return 0; -} - -/* Full precision summation of a sequence of floats. - - def msum(iterable): - partials = [] # sorted, non-overlapping partial sums - for x in iterable: - i = 0 - for y in partials: - if abs(x) < abs(y): - x, y = y, x - hi = x + y - lo = y - (hi - x) - if lo: - partials[i] = lo - i += 1 - x = hi - partials[i:] = [x] - return sum_exact(partials) - - Rounded x+y stored in hi with the roundoff stored in lo. Together hi+lo - are exactly equal to x+y. The inner loop applies hi/lo summation to each - partial so that the list of partial sums remains exact. - - Sum_exact() adds the partial sums exactly and correctly rounds the final - result (using the round-half-to-even rule). The items in partials remain - non-zero, non-special, non-overlapping and strictly increasing in - magnitude, but possibly not all having the same sign. - - Depends on IEEE 754 arithmetic guarantees and half-even rounding. -*/ - static PyObject* math_fsum(PyObject *self, PyObject *seq) { - PyObject *item, *iter, *sum = NULL; - Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; - double x, y, t, ps[NUM_PARTIALS], *p = ps; - double xsave, special_sum = 0.0, inf_sum = 0.0; - volatile double hi, yr, lo; + PyObject *item, *iter; + double x, m, inf_sum = 0.0, special_sum = 0.0; + int e, q, q_top, i, round_up, ndigits, top_exp; + long digit, half_eps, acc[FSUM_ACC_SIZE], carry, sign; iter = PyObject_GetIter(seq); if (iter == NULL) return NULL; - PyFPE_START_PROTECT("fsum", Py_DECREF(iter); return NULL) + /* initialize accumulator */ + for (i = 0; i < FSUM_ACC_SIZE; i++) + acc[i] = 0; - for(;;) { /* for x in iterable */ - assert(0 <= n && n <= m); - assert((m == NUM_PARTIALS && p == ps) || - (m > NUM_PARTIALS && p != NULL)); + /******************************************** + * Stage 1: accumulate values from iterable * + ********************************************/ + for(;;) { item = PyIter_Next(iter); if (item == NULL) { + Py_DECREF(iter); if (PyErr_Occurred()) - goto _fsum_error; + return NULL; break; } x = PyFloat_AsDouble(item); Py_DECREF(item); - if (PyErr_Occurred()) - goto _fsum_error; + if (PyErr_Occurred()){ + Py_DECREF(iter); + return NULL; + } - xsave = x; - for (i = j = 0; j < n; j++) { /* for y in partials */ - y = p[j]; - if (fabs(x) < fabs(y)) { - t = x; x = y; y = t; - } - hi = x + y; - yr = hi - x; - lo = y - yr; - if (lo != 0.0) - p[i++] = lo; - x = hi; + /* accumulate specials in special_sum, infs in inf_sum */ + if (!Py_IS_FINITE(x)) { + if (Py_IS_INFINITY(x)) + inf_sum += x; + special_sum += x; + continue; } - n = i; /* ps[i:] = [x] */ - if (x != 0.0) { - if (! Py_IS_FINITE(x)) { - /* a nonfinite x could arise either as - a result of intermediate overflow, or - as a result of a nan or inf in the - summands */ - if (Py_IS_FINITE(xsave)) { - PyErr_SetString(PyExc_OverflowError, - "intermediate overflow in fsum"); - goto _fsum_error; - } - if (Py_IS_INFINITY(xsave)) - inf_sum += xsave; - special_sum += xsave; - /* reset partials */ - n = 0; + m = frexp(x, &e); + e -= (FSUM_MIN_EXP + 1); + + /* add digits of m into accumulator */ + m *= _fsum_twopower[e % FSUM_SIZE]; + q_top = q = e / FSUM_SIZE; + for (;;) { + digit = (long)m; + m -= digit; + acc[q] += digit; + if (m == 0.0) + break; + m *= (double)FSUM_BASE; + q--; + } + + /* normalize accumulator */ + for (;;) { + if (acc[q] < -FSUM_BASE/2) { + acc[q] += FSUM_BASE; + acc[++q]--; } - else if (n >= m && _fsum_realloc(&p, n, ps, &m)) - goto _fsum_error; - else - p[n++] = x; + else if (acc[q] >= FSUM_BASE/2) { + acc[q] -= FSUM_BASE; + acc[++q]++; + } + else if (++q > q_top) + break; } } + /*************************************************** + * Stage 2: compute correctly rounded return value * + ***************************************************/ + + /* deal with any special values that occurred. See note above. */ if (special_sum != 0.0) { - if (Py_IS_NAN(inf_sum)) + if (Py_IS_NAN(inf_sum)) { PyErr_SetString(PyExc_ValueError, "-inf + inf in fsum"); - else - sum = PyFloat_FromDouble(special_sum); - goto _fsum_error; + return NULL; + } + return PyFloat_FromDouble(special_sum); } - hi = 0.0; - if (n > 0) { - hi = p[--n]; - /* sum_exact(ps, hi) from the top, stop when the sum becomes - inexact. */ - while (n > 0) { - x = hi; - y = p[--n]; - assert(fabs(y) < fabs(x)); - hi = x + y; - yr = hi - x; - lo = y - yr; - if (lo != 0.0) - break; + /* strip zero limbs from top */ + ndigits = FSUM_ACC_SIZE; + while (ndigits > 0 && acc[ndigits-1] == 0) + ndigits--; + if (ndigits == 0) + return PyFloat_FromDouble(0.0); + + /* sign of result == sign of topmost nonzero limb */ + sign = acc[ndigits-1] > 0 ? 1 : -1; + + /* take absolute value of accumulator, renormalizing digits + to lie in the range [0, FSUM_BASE) */ + carry = 0; + for (i = 0; i < ndigits; i++) { + /* FSUM_BASE/2-1 <= acc[i]*sign + carry <= FSUM_BASE/2 */ + digit = acc[i] * sign + carry; + if (digit < 0) { + acc[i] = digit + FSUM_BASE; + carry = -1; } - /* Make half-even rounding work across multiple partials. - Needed so that sum([1e-16, 1, 1e16]) will round-up the last - digit to two instead of down to zero (the 1e-16 makes the 1 - slightly closer to two). With a potential 1 ULP rounding - error fixed-up, math.fsum() can guarantee commutativity. */ - if (n > 0 && ((lo < 0.0 && p[n-1] < 0.0) || - (lo > 0.0 && p[n-1] > 0.0))) { - y = lo * 2.0; - x = hi + y; - yr = x - hi; - if (y == yr) - hi = x; + else { + acc[i] = digit; + carry = 0; } } - sum = PyFloat_FromDouble(hi); + assert(carry == 0); + /* it's possible that the normalization leads to a zero top digit */ + if (acc[ndigits-1] == 0) + ndigits--; + assert(ndigits > 0 && acc[ndigits-1] != 0); -_fsum_error: - PyFPE_END_PROTECT(hi) - Py_DECREF(iter); - if (p != ps) - PyMem_Free(p); - return sum; + /* Round acc * 2**FSUM_MIN_EXP to the nearest double. */ + + /* 2**(top_exp-1) <= |sum| < 2**top_exp */ + top_exp = FSUM_MIN_EXP + FSUM_SIZE*(ndigits-1); + for (digit = acc[ndigits-1]; digit != 0; digit /= 2) + top_exp++; + /* catch almost all overflows here */ + if (top_exp > DBL_MAX_EXP) + goto overflow_error; + + m = 0.0; + if (top_exp <= DBL_MIN_EXP) { + /* no need for rounding */ + for (i = ndigits-1; i >= 0; i--) + m = FSUM_BASE*m + acc[i]; + return PyFloat_FromDouble((double)sign * + ldexp(m, FSUM_MIN_EXP)); + } + + /* round: e is the position of the first bit to be rounded away. */ + e = top_exp - DBL_MIN_EXP - 1; + half_eps = (long)1 << (e % FSUM_SIZE); + q = e / FSUM_SIZE; + for (i = ndigits-1; i > q; i--) + m = FSUM_BASE*m + acc[i]; + digit = acc[q]; + m = FSUM_BASE*m + (digit & (FSUM_BASE-2*half_eps)); + if ((digit & half_eps) != 0) { + round_up = 0; + if ((digit & (3*half_eps-1)) != 0 || + (half_eps == FSUM_BASE/2 && (acc[q+1] & 1) != 0)) + round_up = 1; + else + for (i = q-1; i >= 0; i--) + if (acc[i] != 0) { + round_up = 1; + break; + } + if (round_up == 1) { + m += 2*half_eps; + if (top_exp == DBL_MAX_EXP && + m == ldexp((double)(2*half_eps), DBL_MANT_DIG)) + /* overflow corner case: result before + rounding is within range, but rounded + result overflows. */ + goto overflow_error; + } + } + return PyFloat_FromDouble((double)sign * + ldexp(m, FSUM_MIN_EXP+FSUM_SIZE*q)); + + overflow_error: + PyErr_SetString(PyExc_OverflowError, + "fsum result too large to represent as a float"); + return NULL; } -#undef NUM_PARTIALS - PyDoc_STRVAR(math_fsum_doc, -"sum(iterable)\n\n\ -Return an accurate floating point sum of values in the iterable.\n\ -Assumes IEEE-754 floating point arithmetic."); +"fsum(iterable)\n\n\ +Return an accurate floating point sum of values in the iterable."); static PyObject * math_factorial(PyObject *self, PyObject *arg)