diff -r e0a90b1c4cdf Doc/library/statistics.rst --- a/Doc/library/statistics.rst Fri Feb 07 15:04:26 2014 -0500 +++ b/Doc/library/statistics.rst Sat Feb 08 08:01:11 2014 +1100 @@ -20,6 +20,16 @@ This module provides functions for calculating mathematical statistics of numeric (:class:`Real`-valued) data. +.. note:: + + Unless explicitly noted otherwise, these functions support :class:`int`, + :class:`float`, :class:`decimal.Decimal` and :class:`fractions.Fraction`. + Behaviour with other types (whether in the numeric tower or not) is + currently unsupported. Mixed types are also undefined and + implementation-dependent. If your input data consists of mixed types, + you may be able to use :func:`map` to ensure a consistent result, e.g. + ``map(float, input_data)``. + Averages and measures of central location ----------------------------------------- @@ -261,13 +271,13 @@ .. function:: pvariance(data, mu=None) Return the population variance of *data*, a non-empty iterable of real-valued - numbers. Variance, or second moment about the mean, is a measure of the - variability (spread or dispersion) of data. A large variance indicates that - the data is spread out; a small variance indicates it is clustered closely - around the mean. + numbers representing the entire population. Variance, or second moment about + the mean, is a measure of the variability (spread or dispersion) of data. A + large variance indicates that the data is spread out; a small variance + indicates it is clustered closely around the mean. - If the optional second argument *mu* is given, it should be the mean of - *data*. If it is missing or ``None`` (the default), the mean is + If the optional second argument *mu* is given, it should be the population + mean μ of *data*. If it is missing or ``None`` (the default), the mean is automatically calculated. Use this function to calculate the variance from the entire population. To @@ -336,13 +346,13 @@ .. function:: variance(data, xbar=None) Return the sample variance of *data*, an iterable of at least two real-valued - numbers. Variance, or second moment about the mean, is a measure of the - variability (spread or dispersion) of data. A large variance indicates that - the data is spread out; a small variance indicates it is clustered closely - around the mean. + numbers representing a sample from the larger population. Variance, or + second moment about the mean, is a measure of the variability (spread or + dispersion) of data. A large variance indicates that the data is spread out; + a small variance indicates it is clustered closely around the mean. If the optional second argument *xbar* is given, it should be the mean of - *data*. If it is missing or ``None`` (the default), the mean is + the sample *data*. If it is missing or ``None`` (the default), the mean is automatically calculated. Use this function when your data is a sample from a population. To calculate @@ -404,5 +414,5 @@ Subclass of :exc:`ValueError` for statistics-related exceptions. .. - # This modelines must appear within the last ten lines of the file. + # This modeline must appear within the last ten lines of the file. kate: indent-width 3; remove-trailing-space on; replace-tabs on; encoding utf-8; diff -r e0a90b1c4cdf Lib/statistics.py --- a/Lib/statistics.py Fri Feb 07 15:04:26 2014 -0500 +++ b/Lib/statistics.py Sat Feb 08 08:01:11 2014 +1100 @@ -144,19 +144,31 @@ >>> _sum(data) Decimal('0.6963') + Mixed types are currently treated as an error, except that int is + allowed. """ + # We fail as soon as we reach a value that is not an int or the type of + # the first value which is not an int. E.g. _sum([int, int, float, int]) + # is okay, but sum([int, int, float, Fraction]) is not. + allowed_types = set([int, type(start)]) n, d = _exact_ratio(start) - T = type(start) partials = {d: n} # map {denominator: sum of numerators} # Micro-optimizations. - coerce_types = _coerce_types exact_ratio = _exact_ratio partials_get = partials.get - # Add numerators for each denominator, and track the "current" type. + # Add numerators for each denominator. for x in data: - T = _coerce_types(T, type(x)) + _check_type(type(x), allowed_types) n, d = exact_ratio(x) partials[d] = partials_get(d, 0) + n + # Find the expected result type. If allowed_types has only one item, it + # will be int; if it has two, use the one which isn't int. + assert len(allowed_types) in (1, 2) + if len(allowed_types) == 1: + assert allowed_types.pop() is int + T = int + else: + T = (allowed_types - set([int])).pop() if None in partials: assert issubclass(T, (float, Decimal)) assert not math.isfinite(partials[None]) @@ -172,6 +184,15 @@ return T(total) +def _check_type(T, allowed): + if T not in allowed: + if len(allowed) == 1: + allowed.add(T) + else: + types = ', '.join([t.__name__ for t in allowed] + [T.__name__]) + raise TypeError("unsupported mixed types: %s" % types) + + def _exact_ratio(x): """Convert Real number x exactly to (numerator, denominator) pair. @@ -228,44 +249,6 @@ return (num, den) -def _coerce_types(T1, T2): - """Coerce types T1 and T2 to a common type. - - >>> _coerce_types(int, float) - - - Coercion is performed according to this table, where "N/A" means - that a TypeError exception is raised. - - +----------+-----------+-----------+-----------+----------+ - | | int | Fraction | Decimal | float | - +----------+-----------+-----------+-----------+----------+ - | int | int | Fraction | Decimal | float | - | Fraction | Fraction | Fraction | N/A | float | - | Decimal | Decimal | N/A | Decimal | float | - | float | float | float | float | float | - +----------+-----------+-----------+-----------+----------+ - - Subclasses trump their parent class; two subclasses of the same - base class will be coerced to the second of the two. - - """ - # Get the common/fast cases out of the way first. - if T1 is T2: return T1 - if T1 is int: return T2 - if T2 is int: return T1 - # Subclasses trump their parent class. - if issubclass(T2, T1): return T2 - if issubclass(T1, T2): return T1 - # Floats trump everything else. - if issubclass(T2, float): return T2 - if issubclass(T1, float): return T1 - # Subclasses of the same base class give priority to the second. - if T1.__base__ is T2.__base__: return T2 - # Otherwise, just give up. - raise TypeError('cannot coerce types %r and %r' % (T1, T2)) - - def _counts(data): # Generate a table of sorted (value, frequency) pairs. if data is None: diff -r e0a90b1c4cdf Lib/test/test_statistics.py --- a/Lib/test/test_statistics.py Fri Feb 07 15:04:26 2014 -0500 +++ b/Lib/test/test_statistics.py Sat Feb 08 08:01:11 2014 +1100 @@ -687,6 +687,26 @@ self.assertRaises(ValueError, statistics._decimal_to_ratio, d) +class CheckTypeTest(unittest.TestCase): + # Test _check_type private function. + + def test_allowed(self): + # Test that a type which should be allowed is allowed. + allowed = set([int, float]) + statistics._check_type(int, allowed) + statistics._check_type(float, allowed) + + def test_not_allowed(self): + # Test that a type which should not be allowed raises. + allowed = set([int, float]) + self.assertRaises(TypeError, statistics._check_type, Decimal, allowed) + + def test_add_to_allowed(self): + # Test that a second type will be added to the allowed set. + allowed = set([int]) + statistics._check_type(float, allowed) + self.assertEqual(allowed, set([int, float])) + # === Tests for public functions === @@ -881,17 +901,17 @@ self.assertRaises(TypeError, self.func, [1, 2, 3, b'999']) def test_mixed_sum(self): - # Mixed sums are allowed. - - # Careful here: order matters. Can't mix Fraction and Decimal directly, - # only after they're converted to float. - data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")] - self.assertEqual(self.func(data), 6.75) + # Mixed input types are not (currently) allowed. + # Check that mixed data types fail. + self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)]) + # And so does mixed start argument. + self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1)) class SumInternalsTest(NumericTestCase): # Test internals of the sum function. + @unittest.skip("FIXME: this test is now obsolete?") def test_ignore_instance_float_method(self): # Test that __float__ methods on data instances are ignored.