diff -r c45a10b93a56 Lib/statistics.py --- a/Lib/statistics.py Mon Feb 03 09:35:08 2014 -0500 +++ b/Lib/statistics.py Tue Feb 04 17:41:13 2014 +0100 @@ -101,6 +101,7 @@ import collections import math +import numbers from fractions import Fraction from decimal import Decimal @@ -114,7 +115,7 @@ # === Private utilities === -def _sum(data, start=0): +def _sum(data, start=None): """_sum(data [, start]) -> value Return a high-precision sum of the given numeric data. If optional @@ -145,18 +146,23 @@ Decimal('0.6963') """ - n, d = _exact_ratio(start) - T = type(start) + if start is None: + t = set() + n = 0 + d = 1 + else: + t = set((type(start),)) + n, d = _exact_ratio(start) partials = {d: n} # map {denominator: sum of numerators} # Micro-optimizations. - coerce_types = _coerce_types exact_ratio = _exact_ratio partials_get = partials.get - # Add numerators for each denominator, and track the "current" type. + # Add numerators for each denominator. for x in data: - T = _coerce_types(T, type(x)) + t.add(type(x)) # keep track of input types with a set n, d = exact_ratio(x) partials[d] = partials_get(d, 0) + n + T = _coerce_types(t) # determine return type based on the set of input types if None in partials: assert issubclass(T, (float, Decimal)) assert not math.isfinite(partials[None]) @@ -228,42 +234,54 @@ return (num, den) -def _coerce_types(T1, T2): - """Coerce types T1 and T2 to a common type. +def _coerce_types(types): + """Coerce a set of numeric types to a common type. - >>> _coerce_types(int, float) + >>> _coerce_types(set((int, float))) - Coercion is performed according to this table, where "N/A" means - that a TypeError exception is raised. + Coercion is performed according to these rules: - +----------+-----------+-----------+-----------+----------+ - | | int | Fraction | Decimal | float | - +----------+-----------+-----------+-----------+----------+ - | int | int | Fraction | Decimal | float | - | Fraction | Fraction | Fraction | N/A | float | - | Decimal | Decimal | N/A | Decimal | float | - | float | float | float | float | float | - +----------+-----------+-----------+-----------+----------+ + - with an empty set return int + - with only one type in the set return that type + - with the set consisting of int and exactly one other type return + that other type + - with Decimal in the set + -> return Decimal, if all other types are subclasses of numbers.Integral + -> return float, if the other types are subclasses of numbers.Integral + and of float + -> raise TypeError with any other combination + - in all other cases return a representative subclass of the most derived + numeric abstract base class shared by all types in the set; + if the input types do not share any numeric abstract base class, + return float - Subclasses trump their parent class; two subclasses of the same - base class will be coerced to the second of the two. + representative classes of the numeric abstract base classes are: + + int for numbers.Integral + fractions.Fraction for numbers.Rational + float for numbers.Real """ - # Get the common/fast cases out of the way first. - if T1 is T2: return T1 - if T1 is int: return T2 - if T2 is int: return T1 - # Subclasses trump their parent class. - if issubclass(T2, T1): return T2 - if issubclass(T1, T2): return T1 - # Floats trump everything else. - if issubclass(T2, float): return T2 - if issubclass(T1, float): return T1 - # Subclasses of the same base class give priority to the second. - if T1.__base__ is T2.__base__: return T2 - # Otherwise, just give up. - raise TypeError('cannot coerce types %r and %r' % (T1, T2)) + + types.discard(int) # presence of int does not influence the decision tree + if len(types) == 1: + return next(iter(types)) + if all(issubclass(t, numbers.Integral) for t in types): + return int + if all(issubclass(t, numbers.Rational) for t in types): + return Fraction + if any(issubclass(t, Decimal) for t in types): + if all(issubclass(t, (numbers.Integral, Decimal)) for t in types): + return Decimal + if not all(issubclass(t, (numbers.Integral, + Decimal, float)) for t in types): + msg = "cannot coerce type Decimal and non-Integral type(s) {0}" + offending_types = ', '.join( + t.__name__ for t in types + if not issubclass(t, (numbers.Integral, Decimal, float))) + raise TypeError(msg.format(offending_types)) + return float def _counts(data): diff -r c45a10b93a56 Lib/test/test_statistics.py --- a/Lib/test/test_statistics.py Mon Feb 03 09:35:08 2014 -0500 +++ b/Lib/test/test_statistics.py Tue Feb 04 17:41:13 2014 +0100 @@ -800,9 +800,10 @@ rely on the function returning the same type as its input data. """ def test_types_conserved(self): - # Test that functions keeps the same type as their data points. + # Test that functions keep the same type as their data points. # (Excludes mixed data types.) This only tests the type of the return # result, not the value. + class MyFloat(float): def __truediv__(self, other): return type(self)(super().__truediv__(other)) @@ -821,20 +822,95 @@ d = [kind(x) for x in data] result = self.func(d) self.assertIs(type(result), kind) + +class UnivariateCoercionMixin: + """Mixin class for type-coercing functions. + This mixin class holds test(s) for functions which coerce the types of + individual data points to a common type. -class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): + """ + def test_mixed_types(self): + # Test that functions handle mixed data types as promised in the + # _coerce_types docstring. This only tests the type of the return + # result, not the value. + class MyFloat(float): + def __truediv__(self, other): + return type(self)(super().__truediv__(other)) + def __sub__(self, other): + return type(self)(super().__sub__(other)) + def __rsub__(self, other): + return type(self)(super().__rsub__(other)) + def __pow__(self, other): + return type(self)(super().__pow__(other)) + def __add__(self, other): + return type(self)(super().__add__(other)) + __radd__ = __add__ + + class MyFloat2(float): + def __truediv__(self, other): + return type(self)(super().__truediv__(other)) + def __sub__(self, other): + return type(self)(super().__sub__(other)) + def __rsub__(self, other): + return type(self)(super().__rsub__(other)) + def __pow__(self, other): + return type(self)(super().__pow__(other)) + def __add__(self, other): + return type(self)(super().__add__(other)) + __radd__ = __add__ + + class MyFraction(Fraction): + def __truediv__(self, other): + return type(self)(super().__truediv__(other)) + def __sub__(self, other): + return type(self)(super().__sub__(other)) + def __rsub__(self, other): + return type(self)(super().__rsub__(other)) + def __pow__(self, other): + return type(self)(super().__pow__(other)) + def __add__(self, other): + return type(self)(super().__add__(other)) + __radd__ = __add__ + + real_types = (float, Fraction, MyFloat, MyFraction, + MyFloat2, int) + rational_types = (Fraction, int, MyFraction) + real_d = [kind(3) for kind in real_types] + rational_d = [kind(3) for kind in rational_types] + # Test that the return type is float with mixed Real types + # independent of the order of data elements. + result_fw = self.func(real_d) + result_rv = self.func(real_d[::-1]) + self.assertIs(type(result_fw), type(result_rv)) + self.assertIs(type(result_fw), float) + # Test that the return type is Fraction with mixed Rational types + # independent of the order of data elements. + result_fw = self.func(rational_d) + result_rv = self.func(rational_d[::-1]) + self.assertIs(type(result_fw), type(result_rv)) + self.assertIs(type(result_fw), Fraction) + +class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin, + UnivariateCoercionMixin): # Test cases for statistics._sum() function. def setUp(self): self.func = statistics._sum def test_empty_data(self): - # Override test for empty data. + # Test that empty data with no start value returns int(0). for data in ([], (), iter([])): - self.assertEqual(self.func(data), 0) - self.assertEqual(self.func(data, 23), 23) - self.assertEqual(self.func(data, 2.3), 2.3) + result = self.func(data) + self.assertEqual(result, 0) + self.assertIs(type(result), int) + # Override test for empty data. Return value and type should be + # determined by start + for data in ([], (), iter([])): + for start in (0, 23, Fraction(2,3), 2.3): + result = self.func(data, start) + self.assertEqual(result, start) + self.assertIs(type(result), type(start)) def test_ints(self): self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), 60) @@ -882,12 +958,16 @@ def test_mixed_sum(self): # Mixed sums are allowed. + data = [1, 2, Fraction(1, 2), 3.0] + self.assertEqual(self.func(data), 6.5) - # Careful here: order matters. Can't mix Fraction and Decimal directly, - # only after they're converted to float. + data = [1, 2, Decimal("0.25"), 3.0] + self.assertEqual(self.func(data), 6.25) + + # But Combining Fraction and Decimal should raise TypeError + # independent of the order of data. data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")] - self.assertEqual(self.func(data), 6.75) - + self.assertRaises(TypeError, self.func, data) class SumInternalsTest(NumericTestCase): # Test internals of the sum function. @@ -1007,7 +1087,8 @@ self.assertEqual(self.func(data), x) -class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin): +class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin, + UnivariateCoercionMixin): def setUp(self): self.func = statistics.mean @@ -1424,7 +1505,8 @@ self.assertEqual(self.func(iter(data)), expected) -class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin): +class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin, + UnivariateCoercionMixin): # Tests for population variance. def setUp(self): self.func = statistics.pvariance @@ -1461,7 +1543,8 @@ self.assertIsInstance(result, Decimal) -class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin): +class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin, + UnivariateCoercionMixin): # Tests for sample variance. def setUp(self): self.func = statistics.variance