diff -r 821a34a44291 Lib/statistics.py --- a/Lib/statistics.py Mon Aug 26 04:54:47 2013 +1000 +++ b/Lib/statistics.py Tue Aug 27 01:05:19 2013 +1000 @@ -81,10 +81,10 @@ If you have previously calculated the mean, you can pass it as the optional second argument to the four "spread" functions to avoid recalculating it: ->>> data = [1, 1, 1, 1] # FIXME better non-sucky example please +>>> data = [1, 2, 2, 4, 4, 4, 5, 6] >>> xbar = mean(data) ->>> variance(data, xbar) #doctest: +ELLIPSIS -0.0 +>>> pvariance(data, xbar) +2.5 Other functions and classes @@ -116,7 +116,9 @@ import math import numbers import operator -from builtins import sum as _sum + +from fractions import Fraction +from decimal import Decimal # === Exceptions === @@ -141,8 +143,7 @@ >>> sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) 11.0 - Float sums are calculated using high-precision floating point arithmetic - that can avoid some sources of round-off error: + Some sources of round-off error will be avoided: >>> sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero. 1000.0 @@ -153,142 +154,133 @@ >>> sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) Fraction(63, 20) - Decimal sums honour the context: - - >>> import decimal - >>> D = decimal.Decimal + >>> from decimal import Decimal as D >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] >>> sum(data) Decimal('0.6963') - >>> with decimal.localcontext( - ... decimal.Context(prec=2, rounding=decimal.ROUND_DOWN)): - ... sum(data) - Decimal('0.68') - - - Limitations - ----------- - - The promise of high-precision summation of floats depends crucially on - IEEE-754 correct rounding. On platforms that do not provide that, all - promises of higher precision are null and void. - - ``sum`` supports mixed arithmetic with the following limitations: - - - mixing Fractions and Decimals raises TypeError; - - mixing floats with either Fractions or Decimals coerces to float, - which may lose precision; - - complex numbers are not supported. - - These limitations may be relaxed in future versions. """ - if not isinstance(start, numbers.Number): - raise TypeError('sum only accepts numbers') - total = start - data = iter(data) - x = None - if not isinstance(total, float): - # Non-float sum. If we find a float, we exit this loop and continue - # with the float code below. Until that happens, we keep adding. - for x in data: - if isinstance(x, float): - total = float(total) - break - total += x - else: - # No break, so we're done. - return total - # High-precision float sum. - assert isinstance(total, float) - partials = [] - add_partial(total, partials) - if x is not None: - add_partial(x, partials) + n, d = _exact_ratio(start) + T = type(start) + partials = {d: n} # map {denominator: sum of numerators} + # Micro-optimizations. + coerce_types = _coerce_types + exact_ratio = _exact_ratio + partials_get = partials.get + # Add numerators for each denominator, and track the "current" type. for x in data: - try: - # Don't call float() directly, as that converts strings and we - # don't want that. Also, like all dunder methods, we should call - # __float__ on the class, not the instance. - x = type(x).__float__(x) - except OverflowError: - x = float('inf') if x > 0 else float('-inf') - add_partial(x, partials) - return _sum(partials) + T = _coerce_types(T, type(x)) + n, d = exact_ratio(x) + partials[d] = partials_get(d, 0) + n + if None in partials: + assert issubclass(T, (float, Decimal)) + assert not math.isfinite(partials[None]) + return T(partials[None]) + total = Fraction() + for d, n in sorted(partials.items()): + total += Fraction(n, d) + if issubclass(T, int): + assert total.denominator == 1 + return T(total.numerator) + if issubclass(T, Decimal): + return T(total.numerator)/total.denominator + return T(total) # === Private utilities === -# Thanks to Raymond Hettinger for his recipe: -# http://code.activestate.com/recipes/393090/ -# and Jonathan Shewchuk for the algorithm. -def add_partial(x, partials): - """Helper function for full-precision summation of binary floats. +def _exact_ratio(x): + """Convert Real number x exactly to (numerator, denominator) pair. - Add float x in place to the list partials, keeping the sum exact with no - rounding error. + >>> _exact_ratio(0.25) + (1, 4) + x is expected to be an int, Fraction, Decimal or float. + """ + try: + try: + # int, Fraction + return (x.numerator, x.denominator) + except AttributeError: + # float + try: + return x.as_integer_ratio() + except AttributeError: + # Decimal + try: + return _decimal_to_ratio(x) + except AttributeError: + msg = "can't convert type '{}' to numerator/denominator" + raise TypeError(msg.format(type(x).__name__)) from None + except (OverflowError, ValueError): + # INF or NAN + if __debug__: + # Decimal signalling NANs cannot be converted to float :-( + if isinstance(x, Decimal): + assert not x.is_finite() + else: + assert not math.isfinite(x) + return (x, None) - Arguments - --------- - x - Must be a float. +# FIXME This is faster than Fraction.from_decimal, but still too slow. +def _decimal_to_ratio(d): + """Convert Decimal d to exact integer ratio (numerator, denominator). - partials - A list containing the partial sums. - - - Description - ----------- - - Initialise partials to be an empty list. Then for each float value ``x`` - you wish to add, call ``add_partial(x, partials)``. - - When you are done, call the built-in ``sum(partials)`` to round the - result to the standard float precision. - - If any x is not a float, or partials is not initialised to an empty - list, results are undefined. - - The correctness of this algorithm depends on IEEE-754 arithmetic - guarantees, in particular, correct rounding. - - - Examples - -------- - - >>> partials = [] - >>> for x in (0.125, 1e100, 1e-50, 0.125, 1e100): - ... add_partial(x, partials) - >>> partials - [0.0, 1e-50, 0.25, 2e+100] + >>> from decimal import Decimal + >>> _decimal_to_ratio(Decimal("2.6")) + (26, 10) """ - # Keep these as assertions so they can be optimized away. - assert isinstance(x, float) and isinstance(partials, list) - if not partials: - partials.append(0.0) # Holder for NAN/INF values. - if not math.isfinite(x): - partials[0] += x - return - # Rounded x+y stored in hi with the round-off stored in lo. Together - # hi+lo are exactly equal to x+y. The loop applies hi/lo summation to - # each partial so that the list of partial sums remains exact. Depends - # on IEEE-754 arithmetic guarantees. See proof of correctness at: - # www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps - i = 1 - for y in partials[1:]: - if abs(x) < abs(y): - x, y = y, x - hi = x + y - lo = y - (hi - x) - if lo: - partials[i] = lo - i += 1 - x = hi - assert i > 0 - partials[i:] = [x] + sign, digits, exp = d.as_tuple() + if exp in ('F', 'n', 'N'): # INF, NAN, sNAN + assert not d.is_finite() + raise ValueError + num = 0 + for digit in digits: + num = num*10 + digit + if sign: + num = -num + den = 10**-exp + return (num, den) + + +def _coerce_types(T1, T2): + """Coerce types T1 and T2 to a common type. + + >>> _coerce_types(int, float) + + + Coercion is performed according to this table, where "N/A" means + that a TypeError exception is raised. + + +----------+-----------+-----------+-----------+----------+ + | | int | Fraction | Decimal | float | + +----------+-----------+-----------+-----------+----------+ + | int | int | Fraction | Decimal | float | + | Fraction | Fraction | Fraction | N/A | float | + | Decimal | Decimal | N/A | Decimal | float | + | float | float | float | float | float | + +----------+-----------+-----------+-----------+----------+ + + Subclasses trump their parent class; two subclasses of the same + base class will be coerced to the second of the two. + + """ + # Get the common/fast cases out of the way first. + if T1 is T2: return T1 + if T1 is int: return T2 + if T2 is int: return T1 + # Subclasses trump their parent class. + if issubclass(T2, T1): return T2 + if issubclass(T1, T2): return T1 + # Floats trump everything else. + if issubclass(T2, float): return T2 + if issubclass(T1, float): return T1 + # Subclasses of the same base class give priority to the second. + if T1.__base__ is T2.__base__: return T2 + # Otherwise, just give up. + raise TypeError('cannot coerce types %r and %r' % (T1, T2)) def _counts(data): @@ -696,7 +688,7 @@ >>> from decimal import Decimal as D >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) - Decimal('24.8150') + Decimal('24.815') >>> from fractions import Fraction as F >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) diff -r 821a34a44291 Lib/test/test_statistics.py --- a/Lib/test/test_statistics.py Mon Aug 26 04:54:47 2013 +1000 +++ b/Lib/test/test_statistics.py Tue Aug 27 01:05:19 2013 +1000 @@ -172,8 +172,9 @@ ... self.assertApproxEqual(a, b, rel=1e-3) ... >>> import unittest + >>> from io import StringIO # Suppress test runner output. >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest) - >>> unittest.TextTestRunner().run(suite) + >>> unittest.TextTestRunner(stream=StringIO()).run(suite) """ @@ -644,59 +645,44 @@ # === Tests for private utility functions === -class AddPartialTest(unittest.TestCase): - def test_inplace(self): - # Test that add_partial modifies list in place and returns None. - L = [] - result = statistics.add_partial(1.5, L) - self.assertEqual(L, [0.0, 1.5]) - self.assertIsNone(result) +class ExactRatioTest(unittest.TestCase): + # Test _exact_ratio utility. - def test_add(self): - # Test that add_partial actually does add. - L = [] - statistics.add_partial(1.5, L) - statistics.add_partial(2.5, L) - self.assertEqual(sum(L), 4.0) - statistics.add_partial(1e120, L) - statistics.add_partial(1e-120, L) - statistics.add_partial(0.5, L) - self.assertEqual(sum(L), 1e120) - statistics.add_partial(-1e120, L) - self.assertEqual(sum(L), 4.5) - statistics.add_partial(-4.5, L) - self.assertEqual(sum(L), 1e-120) + def test_int(self): + for i in (-20, -3, 0, 5, 99, 10**20): + self.assertEqual(statistics._exact_ratio(i), (i, 1)) - def test_nan(self): - # Test that add_partial works as expected with NANs. - L = [] - for x in (1.5, float('NAN'), 2.5): - statistics.add_partial(x, L) - self.assertTrue(math.isnan(sum(L))) + def test_fraction(self): + numerators = (-5, 1, 12, 38) + for n in numerators: + f = Fraction(n, 37) + self.assertEqual(statistics._exact_ratio(f), (n, 37)) - def do_inf_test(self, infinity): - L = [] - statistics.add_partial(1.5, L) - statistics.add_partial(infinity, L) - statistics.add_partial(2.5, L) - total = sum(L) - # Result is an infinity of the correct sign. - self.assertTrue(math.isinf(total)) - self.assertEqual((total > 0), (infinity > 0), "signs do not match") - # Adding another infinity doesn't change that. - statistics.add_partial(infinity, L) - total = sum(L) - self.assertTrue(math.isinf(total)) - self.assertEqual((total > 0), (infinity > 0), "signs do not match") - # But adding an infinity of the opposite sign changes it to a NAN. - statistics.add_partial(-infinity, L) - self.assertTrue(math.isnan(sum(L))) + def test_float(self): + self.assertEqual(statistics._exact_ratio(0.125), (1, 8)) + self.assertEqual(statistics._exact_ratio(1.125), (9, 8)) + data = [random.uniform(-100, 100) for _ in range(100)] + for x in data: + num, den = statistics._exact_ratio(x) + self.assertEqual(x, num/den) - def test_inf(self): - # Test that add_partial works as expected with INFs. - inf = float('inf') - self.do_inf_test(inf) - self.do_inf_test(-inf) + def test_decimal(self): + D = Decimal + _exact_ratio = statistics._exact_ratio + self.assertEqual(_exact_ratio(D("0.125")), (125, 1000)) + self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000)) + self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100)) + + +class DecimalToRatioTest(unittest.TestCase): + # Test _decimal_to_ratio private function. + + def testSpecialsRaise(self): + # Test that NANs and INFs raise ValueError. + # Non-special values are covered by _exact_ratio above. + for d in (Decimal('NAN'), Decimal('sNAN'), Decimal('INF')): + self.assertRaises(ValueError, statistics._decimal_to_ratio, d) + # === Tests for public functions === @@ -866,18 +852,6 @@ ] self.assertEqual(self.func(data), Decimal("20.686")) - def test_decimal_context(self): - # Test that sum honours the context settings. - data = list(map(Decimal, "0.033 0.133 0.233 0.333 0.433".split())) - with decimal.localcontext( - decimal.Context(prec=1, rounding=decimal.ROUND_DOWN) - ): - self.assertEqual(self.func(data), Decimal("1")) - with decimal.localcontext( - decimal.Context(prec=2, rounding=decimal.ROUND_UP) - ): - self.assertEqual(self.func(data), Decimal("1.2")) - def test_compare_with_math_fsum(self): # Compare with the math.fsum function. # Ideally we ought to get the exact same result, but sometimes @@ -991,15 +965,25 @@ result = statistics.sum([1, 2, inf, 3, -inf, 4]) self.assertTrue(math.isnan(result)) - def test_decimal_mismatched_infs(self): - # Test behaviour of Decimal INFs with opposite sign. + def test_decimal_mismatched_infs_to_nan(self): + # Test adding Decimal INFs with opposite sign returns NAN. inf = Decimal('inf') data = [1, 2, inf, 3, -inf, 4] - sum = statistics.sum with decimal.localcontext(decimal.ExtendedContext): - self.assertTrue(math.isnan(sum(data))) + self.assertTrue(math.isnan(statistics.sum(data))) + + def test_decimal_mismatched_infs_to_nan(self): + # Test adding Decimal INFs with opposite sign raises InvalidOperation. + inf = Decimal('inf') + data = [1, 2, inf, 3, -inf, 4] with decimal.localcontext(decimal.BasicContext): - self.assertRaises(decimal.InvalidOperation, sum, data) + self.assertRaises(decimal.InvalidOperation, statistics.sum, data) + + def test_decimal_snan_raises(self): + # Adding sNAN should raise InvalidOperation. + sNAN = Decimal('sNAN') + data = [1, sNAN, 2] + self.assertRaises(decimal.InvalidOperation, statistics.sum, data) # === Tests for averages ===