diff -r b0cc8c9ab912 Lib/statistics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/statistics.py Wed Aug 14 00:01:53 2013 -0400 @@ -0,0 +1,812 @@ +## Module statistics.py +## +## Copyright (c) 2013 Steven D'Aprano. +## +## Permission is hereby granted, free of charge, to any person obtaining +## a copy of this software and associated documentation files (the +## "Software"), to deal in the Software without restriction, including +## without limitation the rights to use, copy, modify, merge, publish, +## distribute, sublicense, and/or sell copies of the Software, and to +## permit persons to whom the Software is furnished to do so, subject to +## the following conditions: +## +## The above copyright notice and this permission notice shall be +## included in all copies or substantial portions of the Software. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +## IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +## CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +## TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +## SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +""" +Statistics module for Python 3.3 and better. + +This module provides functions for calculating statistics of data, including +averages, variance, and standard deviation. + + + +Calculating averages +-------------------- + +================== ============================================= +Function Description +================== ============================================= +mean Arithmetic mean (average) of data. +median Median (middle value) of data. +mode Mode (most common value) of data. +================== ============================================= + +Calculate the arithmetic mean ("the average") of data: + +>>> mean([-1.0, 2.5, 3.25, 5.75]) +2.625 + + +Calculate the standard median of discrete data: + +>>> median([2, 3, 4, 5]) +3.5 + + +Calculate the median, or 50th percentile, of data grouped into class intervals +centred on the data values provided. E.g. if your data points are rounded to +the nearest whole number: + +>>> median.grouped([2, 2, 3, 3, 3, 4]) #doctest: +ELLIPSIS +2.8333333333... + +This should be interpreted in this way: you have two data points in the class +interval 1.5-2.5, three data points in the class interval 2.5-3.5, and one in +the class interval 3.5-4.5. The median of these data points is 2.8333... + + +Calculating variability or spread +--------------------------------- + +================== ============================================= +Function Description +================== ============================================= +pvariance Population variance of data. +variance Sample variance of data. +pstdev Population standard deviation of data. +stdev Sample standard deviation of data. +================== ============================================= + +Calculate the standard deviation of sample data: + +>>> stdev([2.5, 3.25, 5.5, 11.25, 11.75]) #doctest: +ELLIPSIS +4.38961843444... + +If you have previously calculated the mean, you can pass it as the optional +second argument to the four "spread" functions to avoid recalculating it: + +>>> data = [1, 1, 1, 1] # FIXME better non-sucky example please +>>> xbar = mean(data) +>>> variance(data, xbar) #doctest: +ELLIPSIS +0.0 + + +Other functions and classes +--------------------------- + +================== ============================================= +Function Description +================== ============================================= +sum High-precision sum of numeric data. +StatisticsError Exception for statistics errors. +================== ============================================= + +The built-in sum function can lose precision when dealing with floats. The +``sum`` function in this module is designed to be higher-precision, while +still supporting Fractions and Decimals, but disallowing non-numeric arguments +such as lists, tuples and strings. + + +""" + +# Module metadata. +__version__ = "0.2a" +__date__ = "2013-08-13" +__author__ = "Steven D'Aprano" +__author_email__ = "steve+python@pearwood.info" + + +__all__ = [ 'sum', 'StatisticsError', + 'pstdev', 'pvariance', 'stdev', 'variance', + 'mean', 'median', 'mode', + ] + + +import collections +import math +import numbers +import operator +from builtins import sum as _sum + + +# === Exceptions === + +class StatisticsError(ValueError): + pass + + +# === Public utilities === + +def sum(data, start=0): + """sum(data [, start]) -> value + + Return a high-precision sum of the given numeric data. If optional + argument ``start`` is given, it is added to the total. If ``data`` is + empty, ``start`` (defaulting to 0) is returned. + + + Examples + -------- + + >>> sum([3, 2.25, 4.5, -0.5, 1.0], 0.75) + 11.0 + + Float sums are calculated using high-precision floating point arithmetic + that can avoid some sources of round-off error: + + >>> sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero. + 1000.0 + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + Fraction(63, 20) + + Decimal sums honour the context: + + >>> import decimal + >>> D = decimal.Decimal + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> sum(data) + Decimal('0.6963') + >>> with decimal.localcontext( + ... decimal.Context(prec=2, rounding=decimal.ROUND_DOWN)): + ... sum(data) + Decimal('0.68') + + + Limitations + ----------- + + The promise of high-precision summation of floats depends crucially on + IEEE-754 correct rounding. On platforms that do not provide that, all + promises of higher precision are null and void. + + ``sum`` supports mixed arithmetic with the following limitations: + + - mixing Fractions and Decimals raises TypeError; + - mixing floats with either Fractions or Decimals coerces to float, + which may lose precision; + - complex numbers are not supported. + + These limitations may be relaxed in future versions. + + """ + if not isinstance(start, numbers.Number): + raise TypeError('sum only accepts numbers') + total = start + data = iter(data) + x = None + if not isinstance(total, float): + # Non-float sum. If we find a float, we exit this loop and continue + # with the float code below. Until that happens, we keep adding. + for x in data: + if isinstance(x, float): + total = float(total) + break + total += x + else: + # No break, so we're done. + return total + # High-precision float sum. + assert isinstance(total, float) + partials = [] + add_partial(total, partials) + if x is not None: + add_partial(x, partials) + for x in data: + try: + # Don't call float() directly, as that converts strings and we + # don't want that. Also, like all dunder methods, we should call + # __float__ on the class, not the instance. + x = type(x).__float__(x) + except OverflowError: + x = float('inf') if x > 0 else float('-inf') + add_partial(x, partials) + return _sum(partials) + + +# === Private utilities === + +# Thanks to Raymond Hettinger for his recipe: +# http://code.activestate.com/recipes/393090/ +def add_partial(x, partials): + """Helper function for full-precision summation of binary floats. + + Add float x in place to the list partials, keeping the sum exact with no + rounding error. + + + Arguments + --------- + + x + Must be a float. + + partials + A list containing the partial sums. + + + Description + ----------- + + Initialise partials to be an empty list. Then for each float value ``x`` + you wish to add, call ``add_partial(x, partials)``. + + When you are done, call the built-in ``sum(partials)`` to round the + result to the standard float precision. + + If any x is not a float, or partials is not initialised to an empty + list, results are undefined. + + The correctness of this algorithm depends on IEEE-754 arithmetic + guarantees, in particular, correct rounding. + + + Examples + -------- + + >>> partials = [] + >>> for x in (0.125, 1e100, 1e-50, 0.125, 1e100): + ... add_partial(x, partials) + >>> partials + [0.0, 1e-50, 0.25, 2e+100] + + """ + # Keep these as assertions so they can be optimized away. + assert isinstance(x, float) and isinstance(partials, list) + if not partials: + partials.append(0.0) # Holder for NAN/INF values. + if not math.isfinite(x): + partials[0] += x + return + # Rounded x+y stored in hi with the round-off stored in lo. Together + # hi+lo are exactly equal to x+y. The loop applies hi/lo summation to + # each partial so that the list of partial sums remains exact. Depends + # on IEEE-754 arithmetic guarantees. See proof of correctness at: + # www-2.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps + i = 1 + for y in partials[1:]: + if abs(x) < abs(y): + x, y = y, x + hi = x + y + lo = y - (hi - x) + if lo: + partials[i] = lo + i += 1 + x = hi + assert i > 0 + partials[i:] = [x] + + +def _attach_to(target): + """Attach the decorated function to target. + + >>> def f(): pass + >>> + >>> @_attach_to(f) + ... def g(): pass + >>> + >>> f.g is g + True + + """ + def decorator(func): + setattr(target, func.__name__, func) + return func + return decorator + + +# === Measures of central tendency (averages) === + +def mean(data): + """mean(data) -> arithmetic mean of data + + Return the sample arithmetic mean of ``data``, a sequence or iterator + of real-valued numbers. + + The arithmetic mean is the sum of the data divided by the number of + data points. It is commonly called "the average", although it is only + one of many different mathematical averages. It is a measure of the + central location of the data. + + + Examples + -------- + + >>> mean([1, 2, 3, 4, 4]) + 2.8 + + >>> from fractions import Fraction as F + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) + + >>> from decimal import Decimal as D + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + + Errors + ------ + + If ``data`` is empty, StatisticsError will be raised. + + + Additional Information + ---------------------- + + The mean is strongly effected by outliers and is not a robust estimator + for central location: the mean is not necessarily a typical example of + the data points. For more robust, although less efficient, measures of + central location, see ``median`` and ``mode``. + + The sample mean gives an unbiased estimate of the true population mean, + which means that on average, ``mean(sample)`` will equal the mean of + the entire population. If you call ``mean`` with the entire population, + the result returned is the population mean \N{GREEK SMALL LETTER MU}. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + return sum(data)/n + + +# FIXME: investigate ways to calculate medians without sorting? Quickselect? +def median(data): + """Return the median (middle value) of numeric data. + + The median is a robust measure of central location, and is less affected + by the presence of outliers in your data. + + This uses the "mean-of-middle-two" method of calculating the median. When + the number of data points is odd, the middle data point is returned: + + >>> median([1, 3, 5]) + 3 + + When the number of data points is even, the median is interpolated by + taking the average of the two middle values: + + >>> median([1, 3, 5, 7]) + 4.0 + + This is suited for when your data is discrete, and you don't mind that + the median may not be an actual data point. Three other methods for + calculating median are provided as methods on the ``median`` function: + + * median.low + * median.high + * median.grouped + + See individual methods for details. + """ + # If you think that having four definitions of median is annoying, you + # ought to see the FIFTEEN definitions for quartiles! + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + i = n//2 + return (data[i - 1] + data[i])/2 + + +@_attach_to(median) +def low(data): + """Return the low median of numeric data. + + The low median is always a member of the data set. When the number + of data points is odd, the middle value is returned. When it is + even, the smaller of the two middle values is returned. + + >>> median.low([1, 3, 5]) + 3 + >>> median.low([1, 3, 5, 7]) + 3 + + Use the low median when your data are discrete and you prefer the median + to be an actual data point rather than interpolated. + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + if n%2 == 1: + return data[n//2] + else: + return data[n//2 - 1] + + +@_attach_to(median) +def high(data): + """Return the high median of data. + + The high median is always a member of the data set. When the number of + data points is odd, the middle value is returned. When it is even, the + larger of the two middle values is returned. + + >>> median.high([1, 3, 5]) + 3 + >>> median.high([1, 3, 5, 7]) + 5 + + Use the high median when your data are discrete and you prefer the median + to be an actual data point rather than interpolated. + """ + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + return data[n//2] + + +@_attach_to(median) +def grouped(data, interval=1): + """"Return the 50th percentile (median) of grouped continuous data. + + >>> median.grouped([1, 2, 2, 3, 4, 4, 4, 4, 4, 5]) + 3.7 + >>> median.grouped([52, 52, 53, 54]) + 52.5 + + This calculates the median as the 50th percentile, and should be + used when your data is continuous and grouped. In the above example, + the values 1, 2, 3, etc. actually represent the midpoint of classes + 0.5-1.5, 1.5-2.5, 2.5-3.5, etc. The middle value falls somewhere in + class 3.5-4.5, and interpolation is used to estimate it. + + Optional argument ``interval`` represents the class interval, and + defaults to 1. Changing the class interval naturally will change the + interpolated 50th percentile value: + + >>> median.grouped([1, 3, 3, 5, 7], interval=1) + 3.25 + >>> median.grouped([1, 3, 3, 5, 7], interval=2) + 3.5 + + This function does not check whether the data points are at least + ``interval`` apart. + """ + # References: + # http://www.ualberta.ca/~opscan/median.html + # https://mail.gnome.org/archives/gnumeric-list/2011-April/msg00018.html + # https://projects.gnome.org/gnumeric/doc/gnumeric-function-SSMEDIAN.shtml + data = sorted(data) + n = len(data) + if n == 0: + raise StatisticsError("no median for empty data") + elif n == 1: + return data[0] + # Find the value at the midpoint. Remember this corresponds to the + # centre of the class interval. + x = data[n//2] + for obj in (x, interval): + if isinstance(obj, (str, bytes)): + raise TypeError('expected number but got %r' % obj) + try: + L = x - interval/2 # The lower limit of the median interval. + except TypeError: + # Mixed type. For now we just coerce to float. + L = float(x) - float(interval)/2 + cf = data.index(x) # Number of values below the median interval. + # FIXME The following line could be more efficient for big lists. + f = data.count(x) # Number of data points in the median interval. + return L + interval*(n/2 - cf)/f + +del low, high, grouped + + +def mode(data, max_modes=1): + """mode(data [, max_modes]) -> mode(s) + + Return the most common data point, or points, from discrete data. The mode + (when it exists) is the most typical value, and is a robust measure of + central location. + + + Arguments + --------- + + data + Non-empty iterable of data points, not necessarily numeric. + + max_modes + The maximum number of modes to return. Defaults to 1. Passing 0 or + None as ``max_modes`` will allow an unlimited number of modes. + + + Examples + -------- + + ``mode`` assumes discrete data, and by default returns a single value. + This is the standard treatment of the mode as commonly taught in schools: + + >>> mode([1, 1, 2, 3, 3, 3, 3, 4]) + 3 + + This also works with nominal (non-numeric) data: + + >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) + 'red' + + + If you suspect that your data has more than one mode, pass a positive + int as the ``max_mode`` argument, and up to that many modes will be + returned as a list. In this example, the data has two modes, both of which + have a frequency of three: + + >>> mode([5, 3, 2, 1, 5, 4, 2, 2, 5], max_modes=2) + [2, 5] + + + If you pass 0 or None as ``max_modes``, an unlimited number of modes may + be returned. + + + Errors + ------ + + If your data is empty, or if it has more modes than you specified as + ``max_modes`` (default of 1), then ``mode`` will raise StatisticsError. + """ + # Generate a table of sorted (value, frequency) pairs. + if data is None: + raise TypeError('None is not iterable') + table = collections.Counter(data).most_common() + if not table: + raise StatisticsError('empty data has no mode') + # Extract the modes (highest frequency). + maxfreq = table[0][1] + for i in range(1, len(table)): + if table[i][1] != maxfreq: + table = table[:i] + break + if max_modes and len(table) > max_modes: + raise StatisticsError('got too many modes: %d' % len(table)) + if max_modes == 1: + assert len(table) == 1 + return table[0][0] + return [t[0] for t in table] + + +# === Measures of spread === + +# See http://mathworld.wolfram.com/Variance.html +# http://mathworld.wolfram.com/SampleVariance.html +# http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance +# +# Under no circumstances use the so-called "computational formula for +# variance", as that is only suitable for hand calculations with a small +# amount of low-precision data. It has terrible numeric properties. +# +# See a comparison of three computational methods here: +# http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ + +def _ss(data, c=None): + """Return sum of square deviations of sequence data. + + If ``c`` is None, the mean is calculated in one pass, and the deviations + from the mean are calculated in a second pass. Otherwise, deviations are + calculated from ``c`` as given. Use the second case with care, as it can + lead to garbage results. + """ + if c is None: + c = mean(data) + ss = sum((x-c)**2 for x in data) + # The following sum should mathematically equal zero, but due to rounding + # error may not. + ss -= sum((x-c) for x in data)**2/len(data) + assert not ss < 0, 'negative sum of square deviations: %f' % ss + return ss + + +def variance(data, xbar=None): + """variance(data [, xbar]) -> sample variance of numeric data + + Return the sample variance of ``data``, a sequence of real-valued numbers. + + Variance, or second moment about the mean, is a measure of the variability + (spread or dispersion) of data. A large variance indicates that the data + is spread out; a small variance indicates it is clustered closely around + the mean. + + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. + + + Arguments + --------- + + data + sequence of numeric (non-complex) data with at least two values. + + xbar + (Optional) Mean of the sample data. If missing or None (the default), + the mean is automatically caclulated. + + + Examples + -------- + + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 + + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: + + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 + + .. CAUTION:: Using arbitrary values for ``xbar`` may lead to invalid + or impossible results. + + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('31.01875') + + >>> from fractions import Fraction as F + >>> variance([F(1, 6), F(1, 2), F(5, 3)]) + Fraction(67, 108) + + + Additional Information + ---------------------- + + This is the sample variance s\N{SUPERSCRIPT TWO} with Bessel's correction, + also known as variance with N-1 degrees of freedom. Provided the data + points are representative (e.g. independent and identically distributed), + the result will be an unbiased estimate of the population variance. + + If you somehow know the population mean \N{GREEK SMALL LETTER MU} you + should use it with the ``pvariance`` function to get the sample variance. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 2: + raise StatisticsError('variance requires at least two data points') + ss = _ss(data, xbar) + return ss/(n-1) + + +def pvariance(data, mu=None): + """pvariance(data [, mu]) -> population variance of numeric data + + Return the population variance of ``data``, a sequence of real-valued + numbers. + + Variance, or second moment about the mean, is a measure of the variability + (spread or dispersion) of data. A large variance indicates that the data + is spread out; a small variance indicates it is clustered closely around + the mean. + + Use this function to calculate the variance from the entire population. + To estimate the variance from a sample, the ``variance`` function is + usually a better choice. + + Arguments + --------- + + data + non-empty sequence of numeric (non-complex) data. + + mu + (Optional) Mean of the population from which your data has been taken. + If ``mu`` is missing or None (the default), the data is presumed to be + the entire population, and the mean automatically calculated. + + + Examples + -------- + + >>> data = [0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] + >>> pvariance(data) + 1.25 + + If you have already calculated the mean of the data, you can pass it as + the optional second argument to avoid recalculating it: + + >>> mu = mean(data) + >>> pvariance(data, mu) + 1.25 + + .. CAUTION:: Using arbitrary values for ``mu`` may lead to invalid + or impossible results. + + Decimals and Fractions are supported: + + >>> from decimal import Decimal as D + >>> pvariance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) + Decimal('24.8150') + + >>> from fractions import Fraction as F + >>> pvariance([F(1, 4), F(5, 4), F(1, 2)]) + Fraction(13, 72) + + + Additional Information + ---------------------- + + When called with the entire population, this gives the population variance + \N{GREEK SMALL LETTER SIGMA}\N{SUPERSCRIPT TWO}. When called on a sample + instead, this is the biased sample variance s\N{SUPERSCRIPT TWO}, also + known as variance with N degrees of freedom. + + If you somehow know the true population mean \N{GREEK SMALL LETTER MU}, + you may use this function to calculate the sample variance, giving the + known population mean as the second argument. Provided the data points are + representative (e.g. independent and identically distributed), the result + will be an unbiased estimate of the population variance. + """ + if iter(data) is data: + data = list(data) + n = len(data) + if n < 1: + raise StatisticsError('pvariance requires at least one data point') + ss = _ss(data, mu) + return ss/n + + +def stdev(data, xbar=None): + """stdev(data [, xbar]) -> sample standard deviation of numeric data + + Return the square root of the sample variance. See ``variance`` for + arguments and other details. + + >>> stdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 1.0810874155219827 + + """ + var = variance(data, xbar) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) + + +def pstdev(data, mu=None): + """pstdev(data [, mu]) -> population standard deviation of numeric data + + Return the square root of the population variance. See ``pvariance`` for + arguments and other details. + + >>> pstdev([1.5, 2.5, 2.5, 2.75, 3.25, 4.75]) + 0.986893273527251 + + """ + var = pvariance(data, mu) + try: + return var.sqrt() + except AttributeError: + return math.sqrt(var) + + diff -r b0cc8c9ab912 Lib/test/test_statistics.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_statistics.py Wed Aug 14 00:01:53 2013 -0400 @@ -0,0 +1,982 @@ +"""Test suite for statistics module.""" + +#from test.support import run_unittest + +import decimal +import doctest +import math +import random +import types +import unittest + +from decimal import Decimal +from fractions import Fraction + +# Test helper. +from test.test_statistics_approx import NumericTestCase + +# Module to be tested. +import statistics + + +# === Tests for the statistics module === + +class GlobalsTest(unittest.TestCase): + module = statistics + expected_metadata = [ + "__doc__", "__all__", "__version__", "__date__", + "__author__", "__author_email__", + ] + + def testMeta(self): + # Test for the existence of metadata. + for meta in self.expected_metadata: + self.assertTrue(hasattr(self.module, meta), + "%s not present" % meta) + + def testCheckAll(self): + # Check everything in __all__ exists and is public. + module = self.module + for name in module.__all__: + # No private names in __all__: + self.assertFalse(name.startswith("_"), + 'private name "%s" in __all__' % name) + # And anything in __all__ must exist: + self.assertTrue(hasattr(module, name), + 'missing name "%s" in __all__' % name) + + +class DocTests(unittest.TestCase): + def testDocTests(self): + failed, tried = doctest.testmod(statistics) + self.assertTrue(tried > 0) + self.assertTrue(failed == 0) + + +class StatsErrorTest(unittest.TestCase): + def testHasException(self): + self.assertTrue(hasattr(statistics, 'StatisticsError')) + self.assertTrue(issubclass(statistics.StatisticsError, ValueError)) + + +# === Tests for private utility functions === + +class AddPartialTest(unittest.TestCase): + def testInplace(self): + # Test that add_partial modifies list in place and returns None. + L = [] + result = statistics.add_partial(1.5, L) + self.assertEqual(L, [0.0, 1.5]) + self.assertTrue(result is None) + + def testAdd(self): + # Test that add_partial actually does add. + L = [] + statistics.add_partial(1.5, L) + statistics.add_partial(2.5, L) + self.assertEqual(sum(L), 4.0) + statistics.add_partial(1e120, L) + statistics.add_partial(1e-120, L) + statistics.add_partial(0.5, L) + self.assertEqual(sum(L), 1e120) + statistics.add_partial(-1e120, L) + self.assertEqual(sum(L), 4.5) + statistics.add_partial(-4.5, L) + self.assertEqual(sum(L), 1e-120) + + def testNan(self): + # Test that add_partial works as expected with NANs. + L = [] + for x in (1.5, float('NAN'), 2.5): + statistics.add_partial(x, L) + self.assertTrue(math.isnan(sum(L))) + + def do_inf_test(self, infinity): + L = [] + statistics.add_partial(1.5, L) + statistics.add_partial(infinity, L) + statistics.add_partial(2.5, L) + total = sum(L) + # Result is an infinity of the correct sign. + self.assertTrue(math.isinf(total)) + self.assertTrue((total > 0) == (infinity > 0)) + # Adding another infinity doesn't change that. + statistics.add_partial(infinity, L) + total = sum(L) + self.assertTrue(math.isinf(total)) + self.assertTrue((total > 0) == (infinity > 0)) + # But adding an infinity of the opposite sign changes it to a NAN. + statistics.add_partial(-infinity, L) + self.assertTrue(math.isnan(sum(L))) + + def testInf(self): + # Test that add_partial works as expected with INFs. + inf = float('inf') + self.do_inf_test(inf) + self.do_inf_test(-inf) + + +# === Tests for public functions === + +class UnivariateCommonMixin: + # Common tests for most univariate functions that take a data argument. + + def testNoArgs(self): + # Fail if given no arguments. + self.assertRaises(TypeError, self.func) + + def testEmptyData(self): + # Fail when the data argument (first argument) is empty. + for empty in ([], (), iter([])): + self.assertRaises(statistics.StatisticsError, self.func, empty) + + def prepare_data(self): + """Return int data for various tests.""" + data = list(range(10)) + while data == sorted(data): + random.shuffle(data) + return data + + def testNoInPlaceModifications(self): + # Test that the function does not modify its input data. + data = self.prepare_data() + assert len(data) != 1 # Necessary to avoid infinite loop. + assert data != sorted(data) + saved = data[:] + assert data is not saved + _ = self.func(data) + self.assertListEqual(data, saved, "data has been modified") + + def testOrderDoesntMatter(self): + # Test that the order of data points doesn't change the result. + + # CAUTION: due to floating point rounding errors, the result actually + # may depend on the order. Consider this test representing an ideal. + # To avoid this test failing, only test with exact values such as ints + # or Fractions. + data = [1, 2, 3, 3, 3, 4, 5, 6]*100 + expected = self.func(data) + random.shuffle(data) + actual = self.func(data) + self.assertEqual(expected, actual) + + def testTypeOfDataCollection(self): + # Test that the type of iterable data doesn't effect the result. + class MyList(list): + pass + class MyTuple(tuple): + pass + def generator(data): + return (obj for obj in data) + data = self.prepare_data() + expected = self.func(data) + for kind in (list, tuple, iter, MyList, MyTuple, generator): + result = self.func(kind(data)) + self.assertEqual(result, expected) + + def testRangeData(self): + # Test that functions work with range objects. + data = range(20, 50, 3) + expected = self.func(list(data)) + self.assertEqual(self.func(data), expected) + + def testBadArgTypes(self): + # Test that function raises when given data of the wrong type. + + # Don't roll the following into a loop like this: + # for bad in list_of_bad: + # self.check_for_type_error(bad) + # + # Since assertRaises doesn't show the arguments that caused the test + # failure, it is very difficult to debug these test failures when the + # following are in a loop. + self.check_for_type_error(None) + self.check_for_type_error(23) + self.check_for_type_error(42.0) + self.check_for_type_error(object()) + + def check_for_type_error(self, *args): + self.assertRaises(TypeError, self.func, *args) + + def testTypeOfDataElement(self): + # Check the type of data elements doesn't affect the numeric result. + # This is a weaker test than UnivariateTypeMixin.testTypesConserved, + # because it checks the numeric result by equality, but not by type. + class MyFloat(float): + def __truediv__(self, other): + return type(self)(super().__truediv__(other)) + def __add__(self, other): + return type(self)(super().__add__(other)) + __radd__ = __add__ + + raw = self.prepare_data() + expected = self.func(raw) + for kind in (float, MyFloat, Decimal, Fraction): + data = [kind(x) for x in raw] + result = type(expected)(self.func(data)) + self.assertEqual(result, expected) + + +class UnivariateTypeMixin: + """Mixin class for type-conserving functions. + + This mixin class holds test(s) for functions which conserve the type of + individual data points. E.g. the mean of a list of Fractions should itself + be a Fraction. + + Not all tests to do with types need go in this class. Only those that + rely on the function returning the same type as its input data. + """ + def testTypesConserved(self): + # Test that functions keeps the same type as their data points. + # (Excludes mixed data types.) This only tests the type of the return + # result, not the value. + class MyFloat(float): + def __truediv__(self, other): + return type(self)(super().__truediv__(other)) + def __sub__(self, other): + return type(self)(super().__sub__(other)) + def __rsub__(self, other): + return type(self)(super().__rsub__(other)) + def __pow__(self, other): + return type(self)(super().__pow__(other)) + def __add__(self, other): + return type(self)(super().__add__(other)) + __radd__ = __add__ + + data = self.prepare_data() + for kind in (float, Decimal, Fraction, MyFloat): + d = [kind(x) for x in data] + result = self.func(d) + self.assertIs(type(result), kind) + + +class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin): + # Test cases for statistics.sum() function. + + def setUp(self): + self.func = statistics.sum + + def testEmptyData(self): + # Override test for empty data. + for data in ([], (), iter([])): + self.assertEqual(self.func(data), 0) + self.assertEqual(self.func(data, 23), 23) + self.assertEqual(self.func(data, 2.3), 2.3) + + def testInts(self): + self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), 60) + self.assertEqual(self.func([4, 2, 3, -8, 7], 1000), 1008) + + def testFloats(self): + self.assertEqual(self.func([0.25]*20), 5.0) + self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5), 3.125) + + def testFractions(self): + F = Fraction + self.assertEqual(self.func([Fraction(1, 1000)]*500), Fraction(1, 2)) + + def testDecimals(self): + D = Decimal + data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"), + D("3.974"), D("2.328"), D("4.617"), D("2.843"), + ] + self.assertEqual(self.func(data), Decimal("20.686")) + + def testDecimalContext(self): + # Test that sum honours the context settings. + data = list(map(Decimal, "0.033 0.133 0.233 0.333 0.433".split())) + with decimal.localcontext( + decimal.Context(prec=1, rounding=decimal.ROUND_DOWN) + ): + self.assertEqual(self.func(data), Decimal("1")) + with decimal.localcontext( + decimal.Context(prec=2, rounding=decimal.ROUND_UP) + ): + self.assertEqual(self.func(data), Decimal("1.2")) + + def testFloatSum(self): + # Compare with the math.fsum function. + # Ideally we ought to get the exact same result, but sometimes + # we differ by a very slight amount :-( + data = [random.uniform(-100, 1000) for _ in range(1000)] + self.assertApproxEqual(self.func(data), math.fsum(data), rel=2e-16) + + def testStartArgument(self): + # Test that the optional start argument works correctly. + data = [random.uniform(1, 1000) for _ in range(100)] + t = self.func(data) + self.assertEqual(t+42, self.func(data, 42)) + self.assertEqual(t-23, self.func(data, -23)) + self.assertEqual(t+1e20, self.func(data, 1e20)) + + def testStringsFail(self): + # Sum of strings should fail. + self.assertRaises(TypeError, self.func, [1, 2, 3], '999') + self.assertRaises(TypeError, self.func, [1, 2, 3, '999']) + + def testBytesFail(self): + # Sum of bytes should fail. + self.assertRaises(TypeError, self.func, [1, 2, 3], b'999') + self.assertRaises(TypeError, self.func, [1, 2, 3, b'999']) + + def testMixedSum(self): + # Mixed sums are allowed. + + # Careful here: order matters. Can't mix Fraction and Decimal directly, + # only after they're converted to float. + data = [1, 2, Fraction(1, 2), 3.0, Decimal("0.25")] + self.assertEqual(self.func(data), 6.75) + + +class SumInternalsTest(NumericTestCase): + # Test internals of the sum function. + + def testIgnoreInstanceFloatMethod(self): + # Test that __float__ methods on data instances are ignored. + + # Python typically calls __dunder__ methods on the class, not the + # instance. The ``sum`` implementation calls __float__ directly. To + # better match the behaviour of Python, we call it only on the class, + # not the instance. This test will fail if somebody "fixes" that code. + + # Create a fake __float__ method. + def __float__(self): + raise AssertionError('test fails') + + # Inject it into an instance. + class MyNumber(Fraction): + pass + x = MyNumber(3) + x.__float__ = types.MethodType(__float__, x) + + # Check it works as expected. + self.assertRaises(AssertionError, x.__float__) + self.assertEqual(float(x), 3.0) + # And now test the function. + self.assertEqual(statistics.sum([1.0, 2.0, x, 4.0]), 10.0) + + +class SumTortureTest(NumericTestCase): + def testTorture(self): + # Tim Peters' torture test for sum, and variants of same. + self.assertEqual(statistics.sum([1, 1e100, 1, -1e100]*10000), 20000.0) + self.assertEqual(statistics.sum([1e100, 1, 1, -1e100]*10000), 20000.0) + self.assertApproxEqual( + statistics.sum([1e-100, 1, 1e-100, -1]*10000), 2.0e-96, rel=5e-16 + ) + + +class SumSpecialValues(NumericTestCase): + # Test that sum works correctly with IEEE-754 special values. + + def testNan(self): + for type_ in (float, Decimal): + nan = type_('nan') + result = statistics.sum([1, nan, 2]) + self.assertIs(type(result), type_) + self.assertTrue(math.isnan(result)) + + def check_infinity(self, x, inf): + """Check x is an infinity of the same type and sign as inf.""" + self.assertTrue(math.isinf(x)) + self.assertIs(type(x), type(inf)) + self.assertEqual(x > 0, inf > 0) + assert x == inf + + def do_test_inf(self, inf): + # Adding a single infinity gives infinity. + result = statistics.sum([1, 2, inf, 3]) + self.check_infinity(result, inf) + # Adding two infinities of the same sign also gives infinity. + result = statistics.sum([1, 2, inf, 3, inf, 4]) + self.check_infinity(result, inf) + + def testFloatInf(self): + inf = float('inf') + for sign in (+1, -1): + self.do_test_inf(sign*inf) + + def testDecimalInf(self): + inf = Decimal('inf') + for sign in (+1, -1): + self.do_test_inf(sign*inf) + + def testFloatMismatchedInf(self): + # Test that adding two infinities of opposite sign gives a NAN. + inf = float('inf') + result = statistics.sum([1, 2, inf, 3, -inf, 4]) + self.assertTrue(math.isnan(result)) + + def testDecimalMismatchedInf(self): + # Test behaviour of Decimal INFs with opposite sign. + inf = Decimal('inf') + data = [1, 2, inf, 3, -inf, 4] + sum = statistics.sum + with decimal.localcontext(decimal.ExtendedContext): + self.assertTrue(math.isnan(sum(data))) + with decimal.localcontext(decimal.BasicContext): + self.assertRaises(decimal.InvalidOperation, sum, data) + + +# === Tests for averages === + +class AverageMixin(UnivariateCommonMixin): + # Mixin class holding common tests for averages. + + def testSingleValue(self): + # Average of a single value is the value itself. + for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')): + self.assertEqual(self.func([x]), x) + + def testRepeatedSingleValue(self): + # The average of a single repeated value is the value itself. + for x in (3.5, 17, 2.5e15, Fraction(61, 67), Decimal('4.9712')): + for count in (2, 5, 10, 20): + data = [x]*count + self.assertEqual(self.func(data), x) + + +class TestMean(NumericTestCase, AverageMixin, UnivariateTypeMixin): + def setUp(self): + self.func = statistics.mean + + def testTorturePep(self): + # "Torture Test" from PEP-450. + self.assertEqual(self.func([1e100, 1, 3, -1e100]), 1) + + def testInts(self): + # Test mean with ints. + data = [0, 1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 7, 7, 7, 8, 9] + random.shuffle(data) + self.assertEqual(self.func(data), 4.8125) + + def testFloats(self): + # Test mean with floats. + data = [17.25, 19.75, 20.0, 21.5, 21.75, 23.25, 25.125, 27.5] + random.shuffle(data) + self.assertEqual(self.func(data), 22.015625) + + def testDecimals(self): + # Test mean with ints. + D = Decimal + data = [D("1.634"), D("2.517"), D("3.912"), D("4.072"), D("5.813")] + random.shuffle(data) + self.assertEqual(self.func(data), D("3.5896")) + + def testFractions(self): + # Test mean with Fractions. + F = Fraction + data = [F(1, 2), F(2, 3), F(3, 4), F(4, 5), F(5, 6), F(6, 7), F(7, 8)] + random.shuffle(data) + self.assertEqual(self.func(data), F(1479, 1960)) + + def testInf(self): + # Test mean with infinities. + raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later. + for kind in (float, Decimal): + for sign in (1, -1): + inf = kind("inf")*sign + data = raw + [inf] + result = self.func(data) + self.assertTrue(math.isinf(result)) + self.assertEqual(result, inf) + + def testMismatchedInfs(self): + # Test mean with infinities of opposite sign. + data = [2, 4, 6, float('inf'), 1, 3, 5, float('-inf')] + result = self.func(data) + self.assertTrue(math.isnan(result)) + + def testNan(self): + # Test mean with NANs. + raw = [1, 3, 5, 7, 9] # Use only ints, to avoid TypeError later. + for kind in (float, Decimal): + inf = kind("nan") + data = raw + [inf] + result = self.func(data) + self.assertTrue(math.isnan(result)) + + def testBigData(self): + # Test adding a large constant to every data point. + c = 1e9 + data = [3.4, 4.5, 4.9, 6.7, 6.8, 7.2, 8.0, 8.1, 9.4] + expected = self.func(data) + c + assert expected != c + result = self.func([x+c for x in data]) + self.assertEqual(result, expected) + + def testDoubledData(self): + # Mean of [a,b,c...z] should be same as for [a,a,b,b,c,c...z,z]. + data = [random.uniform(-3, 5) for _ in range(1000)] + expected = self.func(data) + actual = self.func(data*2) + self.assertApproxEqual(actual, expected) + + +class TestMedian(NumericTestCase, AverageMixin): + # Common tests for median and all median.* functions. + def setUp(self): + self.func = statistics.median + + def prepare_data(self): + """Overload method from UnivariateCommonMixin.""" + data = super().prepare_data() + if len(data)%2 != 1: + data.append(2) + return data + + def testEvenInts(self): + # Test median with an even number of int data points. + data = [1, 2, 3, 4, 5, 6] + assert len(data)%2 == 0 + self.assertEqual(self.func(data), 3.5) + + def testOddInts(self): + # Test median with an odd number of int data points. + data = [1, 2, 3, 4, 5, 6, 9] + assert len(data)%2 == 1 + self.assertEqual(self.func(data), 4) + + def testOddFractions(self): + # Test median works with an odd number of Fractions. + F = Fraction + data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7)] + assert len(data)%2 == 1 + random.shuffle(data) + self.assertEqual(self.func(data), F(3, 7)) + + def testEvenFractions(self): + # Test median works with an even number of Fractions. + F = Fraction + data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), F(1, 2)) + + def testOddDecimals(self): + # Test median works with an odd number of Decimals. + D = Decimal + data = [D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')] + assert len(data)%2 == 1 + random.shuffle(data) + self.assertEqual(self.func(data), D('4.2')) + + def testEvenDecimals(self): + # Test median works with an even number of Decimals. + D = Decimal + data = [D('1.2'), D('2.5'), D('3.1'), D('4.2'), D('5.7'), D('5.8')] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), D('3.65')) + + +class TestMedianDataType(NumericTestCase, UnivariateTypeMixin): + # Test conservation of data element type for median. + def setUp(self): + self.func = statistics.median + + def prepare_data(self): + data = list(range(15)) + assert len(data)%2 == 1 + while data == sorted(data): + random.shuffle(data) + return data + + +class TestMedianLow(TestMedian, UnivariateTypeMixin): + def setUp(self): + self.func = statistics.median.low + + def testEvenInts(self): + # Test median.low with an even number of ints. + data = [1, 2, 3, 4, 5, 6] + assert len(data)%2 == 0 + self.assertEqual(self.func(data), 3) + + def testEvenFractions(self): + # Test median.low works with an even number of Fractions. + F = Fraction + data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), F(3, 7)) + + def testEvenDecimals(self): + # Test median.low works with an even number of Decimals. + D = Decimal + data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), D('3.3')) + + +class TestMedianHigh(TestMedian, UnivariateTypeMixin): + def setUp(self): + self.func = statistics.median.high + + def testEvenInts(self): + # Test median.high with an even number of ints. + data = [1, 2, 3, 4, 5, 6] + assert len(data)%2 == 0 + self.assertEqual(self.func(data), 4) + + def testEvenFractions(self): + # Test median.high works with an even number of Fractions. + F = Fraction + data = [F(1, 7), F(2, 7), F(3, 7), F(4, 7), F(5, 7), F(6, 7)] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), F(4, 7)) + + def testEvenDecimals(self): + # Test median.high works with an even number of Decimals. + D = Decimal + data = [D('1.1'), D('2.2'), D('3.3'), D('4.4'), D('5.5'), D('6.6')] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), D('4.4')) + + +class TestMedianGrouped(TestMedian): + # Test median.grouped. + # Doesn't conserve data element types, so don't use TestMedianType. + def setUp(self): + self.func = statistics.median.grouped + + def testOddNumberRepeated(self): + # Test median.grouped with repeated median values. + data = [12, 13, 14, 14, 14, 15, 15] + assert len(data)%2 == 1 + self.assertEqual(self.func(data), 14) + #--- + data = [12, 13, 14, 14, 14, 14, 15] + assert len(data)%2 == 1 + self.assertEqual(self.func(data), 13.875) + #--- + data = [5, 10, 10, 15, 20, 20, 20, 20, 25, 25, 30] + assert len(data)%2 == 1 + self.assertEqual(self.func(data, 5), 19.375) + #--- + data = [16, 18, 18, 18, 18, 20, 20, 20, 22, 22, 22, 24, 24, 26, 28] + assert len(data)%2 == 1 + self.assertApproxEqual(self.func(data, 2), 20.66666667, tol=1e-8) + + def testEvenNumberRepeated(self): + # Test median.grouped with repeated median values. + data = [5, 10, 10, 15, 20, 20, 20, 25, 25, 30] + assert len(data)%2 == 0 + self.assertApproxEqual(self.func(data, 5), 19.16666667, tol=1e-8) + #--- + data = [2, 3, 4, 4, 4, 5] + assert len(data)%2 == 0 + self.assertApproxEqual(self.func(data), 3.83333333, tol=1e-8) + #--- + data = [2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6] + assert len(data)%2 == 0 + self.assertEqual(self.func(data), 4.5) + #--- + data = [3, 4, 4, 4, 5, 5, 5, 5, 6, 6] + assert len(data)%2 == 0 + self.assertEqual(self.func(data), 4.75) + + def testRepeatedSingleValue(self): + # Override method from AverageMixin. + # Yet again, failure of median.grouped to conserve the data type + # causes me headaches :-( + for x in (5.3, 68, 4.3e17, Fraction(29, 101), Decimal('32.9714')): + for count in (2, 5, 10, 20): + data = [x]*count + self.assertEqual(self.func(data), float(x)) + + def testOddFractions(self): + # Test median.grouped works with an odd number of Fractions. + F = Fraction + data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4)] + assert len(data)%2 == 1 + random.shuffle(data) + self.assertEqual(self.func(data), 3.0) + + def testEvenFractions(self): + # Test median.grouped works with an even number of Fractions. + F = Fraction + data = [F(5, 4), F(9, 4), F(13, 4), F(13, 4), F(17, 4), F(17, 4)] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), 3.25) + + def testOddDecimals(self): + # Test median.grouped works with an odd number of Decimals. + D = Decimal + data = [D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')] + assert len(data)%2 == 1 + random.shuffle(data) + self.assertEqual(self.func(data), 6.75) + + def testEvenDecimals(self): + # Test median.grouped works with an even number of Decimals. + D = Decimal + data = [D('5.5'), D('5.5'), D('6.5'), D('6.5'), D('7.5'), D('8.5')] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), 6.5) + #--- + data = [D('5.5'), D('5.5'), D('6.5'), D('7.5'), D('7.5'), D('8.5')] + assert len(data)%2 == 0 + random.shuffle(data) + self.assertEqual(self.func(data), 7.0) + + def testInterval(self): + # Test median.grouped with interval argument. + data = [2.25, 2.5, 2.5, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75] + self.assertEqual(self.func(data, 0.25), 2.875) + data = [2.25, 2.5, 2.5, 2.75, 2.75, 2.75, 3.0, 3.0, 3.25, 3.5, 3.75] + self.assertApproxEqual(self.func(data, 0.25), 2.83333333, tol=1e-8) + data = [220, 220, 240, 260, 260, 260, 260, 280, 280, 300, 320, 340] + self.assertEqual(self.func(data, 20), 265.0) + + +class TestMode(NumericTestCase, AverageMixin, UnivariateTypeMixin): + # Test cases for the discrete version of mode. + def setUp(self): + self.func = statistics.mode + + def prepare_data(self): + """Overload method from UnivariateCommonMixin.""" + # Make sure test data has exactly one mode. + return [1, 1, 1, 1, 3, 4, 7, 9, 0, 8, 2] + + def testRangeData(self): + # Override test from UnivariateCommonMixin. + data = range(20, 50, 3) + self.assertRaises(statistics.StatisticsError, self.func, data) + expected = self.func(list(data), max_modes=0) + self.assertEqual(self.func(data, max_modes=0), expected) + + def testNominalData(self): + # Test mode with nominal data. + data = 'abcbdb' + self.assertEqual(self.func(data), 'b') + data = 'fe fi fo fum fi fi'.split() + self.assertEqual(self.func(data), 'fi') + + def testDiscreteData(self): + # Test mode with discrete numeric data. + data = list(range(10)) + for i in range(10): + d = data + [i] + random.shuffle(d) + self.assertEqual(self.func(d), i) + + def testBimodalData(self): + # Test mode with bimodal data. + data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9] + assert data.count(2) == data.count(6) == 4 + # Check for an exception with the default. + self.assertRaises(statistics.StatisticsError, self.func, data) + # Now check for correct results with two modes. + result = self.func(data, max_modes=2) + self.assertEqual(sorted(result), [2, 6]) + + def testTrimodalData(self): + # Test mode with trimodal data. + data = list(range(10))*4 + [1, 5, 8] + assert data.count(1) == data.count(5) == data.count(8) == 5 + # Check for an exception with max_modes < 3. + self.assertRaises(statistics.StatisticsError, self.func, data, 1) + self.assertRaises(statistics.StatisticsError, self.func, data, 2) + # And check for the correct modes. + result = self.func(data, max_modes=3) + self.assertEqual(sorted(result), [1, 5, 8]) + + def testUniqueDataFailure(self): + # Test mode exception when data points are all unique. + data = list(range(10)) + self.assertRaises(statistics.StatisticsError, self.func, data) + + def testUniqueDataNoFailure(self): + # Test mode when the data points are all unique. + data = list(range(10)) + result = self.func(data, max_modes=0) + self.assertEqual(sorted(data), sorted(result)) + + def testNoneData(self): + # Test that mode raises TypeError if given None as data. + + # This test is necessary because the implementation of mode uses + # collections.Counter, which accepts None and returns an empty dict. + self.assertRaises(TypeError, self.func, None) + + +# === Tests for variances and standard deviations === + +class VarianceStdevMixin(UnivariateCommonMixin): + # Mixin class holding common tests for variance and std dev. + + # Subclasses should inherit from this before NumericTestClass, in order + # to see the rel attribute below. See testShiftData for an explanation. + + rel = 1e-12 + + def testSingleValue(self): + # Deviation of a single value is zero. + for x in (11, 19.8, 4.6e14, Fraction(21, 34), Decimal('8.392')): + self.assertEqual(self.func([x]), 0) + + def testRepeatedSingleValue(self): + # The deviation of a single repeated value is zero. + for x in (7.2, 49, 8.1e15, Fraction(3, 7), Decimal('62.4802')): + for count in (2, 3, 5, 15): + data = [x]*count + self.assertEqual(self.func(data), 0) + + def testDomainErrorRegression(self): + # Regression test for a domain error exception. + # (Thanks to Geremy Condra.) + data = [0.123456789012345]*10000 + # All the items are identical, so variance should be exactly zero. + # We allow some small round-off error, but not much. + result = self.func(data) + self.assertApproxEqual(result, 0.0, tol=5e-17) + self.assertTrue(result >= 0) # A negative result must fail. + + def testShiftData(self): + # Test that shifting the data by a constant amount does not affect + # the variance or stdev. Or at least not much. + + # Due to rounding, this test should be considered an ideal. We allow + # some tolerance away from "no change at all" by setting tol and/or rel + # attributes. Subclasses may set tighter or looser error tolerances. + raw = [1.03, 1.27, 1.94, 2.04, 2.58, 3.14, 4.75, 4.98, 5.42, 6.78] + expected = self.func(raw) + # Don't set shift too high, the bigger it is, the more rounding error. + shift = 1e5 + data = [x + shift for x in raw] + self.assertApproxEqual(self.func(data), expected) + + def testShiftDataExact(self): + # Like testShiftData, but result is always exact. + raw = [1, 3, 3, 4, 5, 7, 9, 10, 11, 16] + assert all(x==int(x) for x in raw) + expected = self.func(raw) + shift = 10**9 + data = [x + shift for x in raw] + self.assertEqual(self.func(data), expected) + + def testIterListSame(self): + # Test that iter data and list data give the same result. + + # This is an explicit test that iterators and lists are treated the + # same; justification for this test over and above the similar test + # in UnivariateCommonMixin is that an earlier design had variance and + # friends swap between one- and two-pass algorithms, which would + # sometimes give different results. + data = [random.uniform(-3, 8) for _ in range(1000)] + expected = self.func(data) + self.assertEqual(self.func(iter(data)), expected) + + +class TestPVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin): + # Tests for population variance. + def setUp(self): + self.func = statistics.pvariance + + def testExactUniform(self): + # Test the variance against an exact result for uniform data. + data = list(range(10000)) + random.shuffle(data) + expected = (10000**2 - 1)/12 # Exact value. + self.assertEqual(self.func(data), expected) + + def testInts(self): + # Test population variance with int data. + data = [4, 7, 13, 16] + exact = 22.5 + self.assertEqual(self.func(data), exact) + + def testFractions(self): + # Test population variance with Fraction data. + F = Fraction + data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)] + exact = F(3, 8) + result = self.func(data) + self.assertEqual(result, exact) + self.assertTrue(isinstance(result, Fraction)) + + def testDecimals(self): + # Test population variance with Decimal data. + D = Decimal + data = [D("12.1"), D("12.2"), D("12.5"), D("12.9")] + exact = D('0.096875') + result = self.func(data) + self.assertEqual(result, exact) + self.assertTrue(isinstance(result, Decimal)) + + +class TestVariance(VarianceStdevMixin, NumericTestCase, UnivariateTypeMixin): + # Tests for sample variance. + def setUp(self): + self.func = statistics.variance + + def testSingleValue(self): + # Override method from VarianceStdevMixin. + for x in (35, 24.7, 8.2e15, Fraction(19, 30), Decimal('4.2084')): + self.assertRaises(statistics.StatisticsError, self.func, [x]) + + def testInts(self): + # Test sample variance with int data. + data = [4, 7, 13, 16] + exact = 30 + self.assertEqual(self.func(data), exact) + + def testFractions(self): + # Test sample variance with Fraction data. + F = Fraction + data = [F(1, 4), F(1, 4), F(3, 4), F(7, 4)] + exact = F(1, 2) + result = self.func(data) + self.assertEqual(result, exact) + self.assertTrue(isinstance(result, Fraction)) + + def testDecimals(self): + # Test sample variance with Decimal data. + D = Decimal + data = [D(2), D(2), D(7), D(9)] + exact = 4*D('9.5')/D(3) + result = self.func(data) + self.assertEqual(result, exact) + self.assertTrue(isinstance(result, Decimal)) + + +class TestPStdev(VarianceStdevMixin, NumericTestCase): + # Tests for population standard deviation. + def setUp(self): + self.func = statistics.pstdev + + def testCompareToVariance(self): + # Test that stdev is, in fact, the square root of variance. + data = [random.uniform(-17, 24) for _ in range(1000)] + expected = math.sqrt(statistics.pvariance(data)) + self.assertEqual(self.func(data), expected) + + +class TestStdev(VarianceStdevMixin, NumericTestCase): + # Tests for sample standard deviation. + def setUp(self): + self.func = statistics.stdev + + def testSingleValue(self): + # Override method from VarianceStdevMixin. + for x in (81, 203.74, 3.9e14, Fraction(5, 21), Decimal('35.719')): + self.assertRaises(statistics.StatisticsError, self.func, [x]) + + def testCompareToVariance(self): + # Test that stdev is, in fact, the square root of variance. + data = [random.uniform(-2, 9) for _ in range(1000)] + expected = math.sqrt(statistics.variance(data)) + self.assertEqual(self.func(data), expected) + + +if __name__ == "__main__": + unittest.main() + diff -r b0cc8c9ab912 Lib/test/test_statistics_approx.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Lib/test/test_statistics_approx.py Wed Aug 14 00:01:53 2013 -0400 @@ -0,0 +1,596 @@ +"""Numeric approximated equal comparisons and unit testing. + +When doing numeric work, especially with floats, exact equality is often not +what you want. Due to round-off error, it is often a bad idea to try to +compare floats with equality. Instead the usual procedure is to test them +with some (hopefully small!) allowance for error. + +The ``approx_equal`` function allows you to specify either an absolute error +tolerance, or a relative error, or both. + +Absolute error tolerances are simple, but you need to know the magnitude of +the quantities being compared: + +>>> approx_equal(12.345, 12.346, tol=1e-3) +True +>>> approx_equal(12.345e6, 12.346e6, tol=1e-3) # tol is unreasonably small. +False + +Relative errors are more suitable when the values you are comparing can vary +in magnitude: + +>>> approx_equal(12.345, 12.346, rel=1e-4) +True +>>> approx_equal(12.345e6, 12.346e6, rel=1e-4) +True + +but a naive implementation of relative error testing can run into trouble +around zero. + +If you supply both an absolute tolerance and a relative error, the comparison +succeeds if either individual test succeeds: + +>>> approx_equal(12.345e6, 12.346e6, tol=1e-3, rel=1e-4) +True + +For unit-testing, a TestCase subclass ``NumericTestCase`` is provided. In +addition to the standard ``TestCase.assertAlmostEqual``, a method +``assertApproxEqual`` is provided. See the class for further details. + + +This is a self-testing module. From the commandline, call: + + $ python3 -m test_approx + +to run the self-tests. +""" + +__all__ = ['approx_equal', 'NumericTestCase'] + +#from test.support import run_unittest + +from decimal import Decimal +from fractions import Fraction + +import collections +import doctest +import math +import unittest + + +# === Helper functions === + +def _calc_errors(actual, expected): + """Return the absolute and relative errors between two numbers. + + >>> _calc_errors(100, 75) + (25, 0.25) + >>> _calc_errors(100, 100) + (0, 0.0) + + Returns the (absolute error, relative error) between the two arguments. + """ + base = max(abs(actual), abs(expected)) + abs_err = abs(actual - expected) + rel_err = abs_err/base if base else float('inf') + return (abs_err, rel_err) + + +# === Approximately equal to === + +def approx_equal(x, y, tol=1e-12, rel=1e-7): + """approx_equal(x, y [, tol [, rel]]) => True|False + + Return True if numbers x and y are approximately equal, to within some + margin of error, otherwise return False. Numbers which compare equal + will also compare approximately equal. + + x is approximately equal to y if the difference between them is less than + an absolute error tol or a relative error rel, whichever is bigger. + + If given, both tol and rel must be finite, non-negative numbers. If not + given, default values are tol=1e-12 and rel=1e-7. + + >>> approx_equal(1.2589, 1.2587, tol=0.0003, rel=0) + True + >>> approx_equal(1.2589, 1.2587, tol=0.0001, rel=0) + False + + Absolute error is defined as abs(x-y); if that is less than or equal to + tol, x and y are considered approximately equal. + + Relative error is defined as abs((x-y)/x) or abs((x-y)/y), whichever is + smaller, provided x or y are not zero. If that figure is less than or + equal to rel, x and y are considered approximately equal. + + Complex numbers are not directly supported. If you wish to compare to + complex numbers, extract their real and imaginary parts and compare them + individually. + + NANs always compare unequal, even with themselves. Infinities compare + approximately equal if they have the same sign (both positive or both + negative). Infinities with different signs compare unequal; so do + comparisons of infinities with finite numbers. + """ + if tol < 0 or rel < 0: + raise ValueError('error tolerances must be non-negative') + # NANs are never equal to anything, approximately or otherwise. + if math.isnan(x) or math.isnan(y): + return False + # Numbers which compare equal also compare approximately equal. + if x == y: + # This includes the case of two infinities with the same sign. + return True + if math.isinf(x) or math.isinf(y): + # This includes the case of two infinities of opposite sign, or + # one infinity and one finite number. + return False + # Two finite numbers. + actual_error = abs(x - y) + allowed_error = max(tol, rel*max(abs(x), abs(y))) + return actual_error <= allowed_error + + +# === Unit test helper === + +# We prefer this for testing numeric values that may not be exactly equal, +# and avoid using TestCase.almost_equal, because it sucks :-) + +class NumericTestCase(unittest.TestCase): + # By default, we expect exact equality, unless overridden. + tol = rel = 0 + + def assertApproxEqual( + self, first, second, tol=None, rel=None, msg=None + ): + """Test passes if ``first`` and ``second`` are approximately equal. + + This test passes if ``first`` and ``second`` are equal to + within ``tol``, an absolute error, or ``rel``, a relative error. + + If either ``tol`` or ``rel`` are None or not given, they default to + test attributes of the same name (by default, 0). + + The objects may be either numbers, or sequences of numbers. Sequences + are tested element-by-element. + + >>> class MyTest(NumericTestCase): + ... def testNumber(self): + ... x = 1.0/6 + ... y = sum([x]*6) + ... self.assertApproxEqual(y, 1.0, tol=1e-15) + ... def testSequence(self): + ... a = [1.001, 1.001e-10, 1.001e10] + ... b = [1.0, 1e-10, 1e10] + ... self.assertApproxEqual(a, b, rel=1e-3) + ... + >>> import unittest + >>> suite = unittest.TestLoader().loadTestsFromTestCase(MyTest) + >>> unittest.TextTestRunner().run(suite) + + + """ + if tol is None: + tol = self.tol + if rel is None: + rel = self.rel + if ( + isinstance(first, collections.Sequence) and + isinstance(second, collections.Sequence) + ): + check = self._check_approx_seq + else: + check = self._check_approx_num + check(first, second, tol, rel, msg) + + def _check_approx_seq(self, first, second, tol, rel, msg): + if len(first) != len(second): + standardMsg = ( + "sequences differ in length: %d items != %d items" + % (len(first), len(second)) + ) + msg = self._formatMessage(msg, standardMsg) + raise self.failureException(msg) + for i, (a,e) in enumerate(zip(first, second)): + self._check_approx_num(a, e, tol, rel, msg, i) + + def _check_approx_num(self, first, second, tol, rel, msg, idx=None): + if approx_equal(first, second, tol, rel): + # Test passes. Return early, we are done. + return None + # Otherwise we failed. + standardMsg = self._make_std_err_msg(first, second, tol, rel, idx) + msg = self._formatMessage(msg, standardMsg) + raise self.failureException(msg) + + @staticmethod + def _make_std_err_msg(first, second, tol, rel, idx): + # Create the standard error message for approx_equal failures. + assert first != second + template = ( + ' %r != %r\n' + ' values differ by more than tol=%r and rel=%r\n' + ' -> absolute error = %r\n' + ' -> relative error = %r' + ) + if idx is not None: + header = 'numeric sequences first differ at index %d.\n' % idx + template = header + template + # Calculate actual errors: + abs_err, rel_err = _calc_errors(first, second) + return template % (first, second, tol, rel, abs_err, rel_err) + + +# === Tests for approx_equal === + +class ApproxEqualSymmetryTest(unittest.TestCase): + # Test symmetry of approx_equal. + + def testRelativeSymmetry(self): + # Check that approx_equal treats relative error symmetrically. + # (a-b)/a is usually not equal to (a-b)/b. Ensure that this + # doesn't matter. + # + # Note: the reason for this test is that an early version + # of approx_equal was not symmetric. A relative error test + # would pass, or fail, depending on which value was passed + # as the first argument. + # + args1 = [2456, 37.8, -12.45, Decimal('2.54'), Fraction(17, 54)] + args2 = [2459, 37.2, -12.41, Decimal('2.59'), Fraction(15, 54)] + assert len(args1) == len(args2) + for a, b in zip(args1, args2): + self.do_relative_symmetry(a, b) + + def do_relative_symmetry(self, a, b): + a, b = min(a, b), max(a, b) + assert a < b + delta = b - a # The absolute difference between the values. + rel_err1, rel_err2 = abs(delta/a), abs(delta/b) + # Choose an error margin halfway between the two. + rel = (rel_err1 + rel_err2)/2 + # Now see that values a and b compare approx equal regardless of + # which is given first. + self.assertTrue(approx_equal(a, b, tol=0, rel=rel)) + self.assertTrue(approx_equal(b, a, tol=0, rel=rel)) + + def testSymmetry(self): + # Test that approx_equal(a, b) == approx_equal(b, a) + args = [-23, -2, 5, 107, 93568] + delta = 2 + for x in args: + for type_ in (int, float, Decimal, Fraction): + x = type_(x)*100 + y = x + delta + r = abs(delta/max(x, y)) + # There are five cases to check: + # 1) actual error <= tol, <= rel + self.do_symmetry_test(x, y, tol=delta, rel=r) + self.do_symmetry_test(x, y, tol=delta+1, rel=2*r) + # 2) actual error > tol, > rel + self.do_symmetry_test(x, y, tol=delta-1, rel=r/2) + # 3) actual error <= tol, > rel + self.do_symmetry_test(x, y, tol=delta, rel=r/2) + # 4) actual error > tol, <= rel + self.do_symmetry_test(x, y, tol=delta-1, rel=r) + self.do_symmetry_test(x, y, tol=delta-1, rel=2*r) + # 5) exact equality test + self.do_symmetry_test(x, x, tol=0, rel=0) + self.do_symmetry_test(x, y, tol=0, rel=0) + + def do_symmetry_test(self, a, b, tol, rel): + template = "approx_equal comparisons don't match for %r" + flag1 = approx_equal(a, b, tol, rel) + flag2 = approx_equal(b, a, tol, rel) + self.assertEqual(flag1, flag2, template.format((a, b, tol, rel))) + + +class ApproxEqualExactTest(unittest.TestCase): + # Test the approx_equal function with exactly equal values. + # Equal values should compare as approximately equal. + # Test cases for exactly equal values, which should compare approx + # equal regardless of the error tolerances given. + + def do_exactly_equal_test(self, x, tol, rel): + result = approx_equal(x, x, tol=tol, rel=rel) + self.assertTrue(result, 'equality failure for x=%r' % x) + result = approx_equal(-x, -x, tol=tol, rel=rel) + self.assertTrue(result, 'equality failure for x=%r' % -x) + + def testExactlyEqualInts(self): + # Test that equal int values are exactly equal. + for n in [42, 19740, 14974, 230, 1795, 700245, 36587]: + self.do_exactly_equal_test(n, 0, 0) + + def testExactlyEqualFloats(self): + # Test that equal float values are exactly equal. + for x in [0.42, 1.9740, 1497.4, 23.0, 179.5, 70.0245, 36.587]: + self.do_exactly_equal_test(x, 0, 0) + + def testExactlyEqualFractions(self): + # Test that equal Fraction values are exactly equal. + F = Fraction + for f in [F(1, 2), F(0), F(5, 3), F(9, 7), F(35, 36), F(3, 7)]: + self.do_exactly_equal_test(f, 0, 0) + + def testExactlyEqualDecimals(self): + # Test that equal Decimal values are exactly equal. + D = Decimal + for d in map(D, "8.2 31.274 912.04 16.745 1.2047".split()): + self.do_exactly_equal_test(d, 0, 0) + + def testExactlyEqualAbsolute(self): + # Test that equal values are exactly equal with an absolute error. + for n in [16, 1013, 1372, 1198, 971, 4]: + # Test as ints. + self.do_exactly_equal_test(n, 0.01, 0) + # Test as floats. + self.do_exactly_equal_test(n/10, 0.01, 0) + # Test as Fractions. + f = Fraction(n, 1234) + self.do_exactly_equal_test(f, 0.01, 0) + + def testExactlyEqualAbsoluteDecimals(self): + # Test equal Decimal values are exactly equal with an absolute error. + self.do_exactly_equal_test(Decimal("3.571"), Decimal("0.01"), 0) + self.do_exactly_equal_test(-Decimal("81.3971"), Decimal("0.01"), 0) + + def testExactlyEqualRelative(self): + # Test that equal values are exactly equal with a relative error. + for x in [8347, 101.3, -7910.28, Fraction(5, 21)]: + self.do_exactly_equal_test(x, 0, 0.01) + self.do_exactly_equal_test(Decimal("11.68"), 0, Decimal("0.01")) + + def testExactlyEqualBoth(self): + # Test that equal values are equal when both tol and rel are given. + for x in [41017, 16.742, -813.02, Fraction(3, 8)]: + self.do_exactly_equal_test(x, 0.1, 0.01) + D = Decimal + self.do_exactly_equal_test(D("7.2"), D("0.1"), D("0.01")) + + +class ApproxEqualUnequalTest(unittest.TestCase): + # Unequal values should compare unequal with zero error tolerances. + # Test cases for unequal values, with exact equality test. + + def do_exactly_unequal_test(self, x): + for a in (x, -x): + result = approx_equal(a, a+1, tol=0, rel=0) + self.assertFalse(result, 'inequality failure for x=%r' % a) + + def testExactlyUnequalInts(self): + # Test unequal int values are unequal with zero error tolerance. + for n in [951, 572305, 478, 917, 17240]: + self.do_exactly_unequal_test(n) + + def testExactlyUnequalFloats(self): + # Test unequal float values are unequal with zero error tolerance. + for x in [9.51, 5723.05, 47.8, 9.17, 17.24]: + self.do_exactly_unequal_test(x) + + def testExactlyUnequalFractions(self): + # Test that unequal Fractions are unequal with zero error tolerance. + F = Fraction + for f in [F(1, 5), F(7, 9), F(12, 11), F(101, 99023)]: + self.do_exactly_unequal_test(f) + + def testExactlyUnequalDecimals(self): + # Test that unequal Decimals are unequal with zero error tolerance. + for d in map(Decimal, "3.1415 298.12 3.47 18.996 0.00245".split()): + self.do_exactly_unequal_test(d) + + +class ApproxEqualInexactTest(unittest.TestCase): + # Inexact test cases for approx_error. + # Test cases when comparing two values that are not exactly equal. + + # === Absolute error tests === + + def do_approx_equal_abs_test(self, x, delta): + template = "Test failure for x={!r}, y={!r}" + for y in (x + delta, x - delta): + msg = template.format(x, y) + self.assertTrue(approx_equal(x, y, tol=2*delta, rel=0), msg) + self.assertFalse(approx_equal(x, y, tol=delta/2, rel=0), msg) + + def testApproxEqualAbsoluteInts(self): + # Test approximate equality of ints with an absolute error. + for n in [-10737, -1975, -7, -2, 0, 1, 9, 37, 423, 9874, 23789110]: + self.do_approx_equal_abs_test(n, 10) + self.do_approx_equal_abs_test(n, 2) + + def testApproxEqualAbsoluteFloats(self): + # Test approximate equality of floats with an absolute error. + for x in [-284.126, -97.1, -3.4, -2.15, 0.5, 1.0, 7.8, 4.23, 3817.4]: + self.do_approx_equal_abs_test(x, 1.5) + self.do_approx_equal_abs_test(x, 0.01) + self.do_approx_equal_abs_test(x, 0.0001) + + def testApproxEqualAbsoluteFractions(self): + # Test approximate equality of Fractions with an absolute error. + delta = Fraction(1, 29) + numerators = [-84, -15, -2, -1, 0, 1, 5, 17, 23, 34, 71] + for f in (Fraction(n, 29) for n in numerators): + self.do_approx_equal_abs_test(f, delta) + self.do_approx_equal_abs_test(f, float(delta)) + + def testApproxEqualAbsoluteDecimals(self): + # Test approximate equality of Decimals with an absolute error. + delta = Decimal("0.01") + for d in map(Decimal, "1.0 3.5 36.08 61.79 7912.3648".split()): + self.do_approx_equal_abs_test(d, delta) + self.do_approx_equal_abs_test(-d, delta) + + def testCrossZero(self): + # Test for the case of the two values having opposite signs. + self.assertTrue(approx_equal(1e-5, -1e-5, tol=1e-4, rel=0)) + + # === Relative error tests === + + def do_approx_equal_rel_test(self, x, delta): + template = "Test failure for x={!r}, y={!r}" + for y in (x*(1+delta), x*(1-delta)): + msg = template.format(x, y) + self.assertTrue(approx_equal(x, y, tol=0, rel=2*delta), msg) + self.assertFalse(approx_equal(x, y, tol=0, rel=delta/2), msg) + + def testApproxEqualRelativeInts(self): + # Test approximate equality of ints with a relative error. + self.assertTrue(approx_equal(64, 47, tol=0, rel=0.36)) + self.assertTrue(approx_equal(64, 47, tol=0, rel=0.37)) + # --- + self.assertTrue(approx_equal(449, 512, tol=0, rel=0.125)) + self.assertTrue(approx_equal(448, 512, tol=0, rel=0.125)) + self.assertFalse(approx_equal(447, 512, tol=0, rel=0.125)) + + def testApproxEqualRelativeFloats(self): + # Test approximate equality of floats with a relative error. + for x in [-178.34, -0.1, 0.1, 1.0, 36.97, 2847.136, 9145.074]: + self.do_approx_equal_rel_test(x, 0.02) + self.do_approx_equal_rel_test(x, 0.0001) + + def testApproxEqualRelativeFractions(self): + # Test approximate equality of Fractions with a relative error. + F = Fraction + delta = Fraction(3, 8) + for f in [F(3, 84), F(17, 30), F(49, 50), F(92, 85)]: + for d in (delta, float(delta)): + self.do_approx_equal_rel_test(f, d) + self.do_approx_equal_rel_test(-f, d) + + def testApproxEqualRelativeDecimals(self): + # Test approximate equality of Decimals with a relative error. + for d in map(Decimal, "0.02 1.0 5.7 13.67 94.138 91027.9321".split()): + self.do_approx_equal_rel_test(d, Decimal("0.001")) + self.do_approx_equal_rel_test(-d, Decimal("0.05")) + + # === Both absolute and relative error tests === + + # There are four cases to consider: + # 1) actual error <= both absolute and relative error + # 2) actual error <= absolute error but > relative error + # 3) actual error <= relative error but > absolute error + # 4) actual error > both absolute and relative error + + def do_check_both(self, a, b, tol, rel, tol_flag, rel_flag): + check = self.assertTrue if tol_flag else self.assertFalse + check(approx_equal(a, b, tol=tol, rel=0)) + check = self.assertTrue if rel_flag else self.assertFalse + check(approx_equal(a, b, tol=0, rel=rel)) + check = self.assertTrue if (tol_flag or rel_flag) else self.assertFalse + check(approx_equal(a, b, tol=tol, rel=rel)) + + def testApproxEqualBoth1(self): + # Test actual error <= both absolute and relative error. + self.do_check_both(7.955, 7.952, 0.004, 3.8e-4, True, True) + self.do_check_both(-7.387, -7.386, 0.002, 0.0002, True, True) + + def testApproxEqualBoth2(self): + # Test actual error <= absolute error but > relative error. + self.do_check_both(7.955, 7.952, 0.004, 3.7e-4, True, False) + + def testApproxEqualBoth3(self): + # Test actual error <= relative error but > absolute error. + self.do_check_both(7.955, 7.952, 0.001, 3.8e-4, False, True) + + def testApproxEqualBoth4(self): + # Test actual error > both absolute and relative error. + self.do_check_both(2.78, 2.75, 0.01, 0.001, False, False) + self.do_check_both(971.44, 971.47, 0.02, 3e-5, False, False) + + +class ApproxEqualSpecialsTest(unittest.TestCase): + # Test approx_equal with NANs and INFs and zeroes. + + def testInf(self): + for type_ in (float, Decimal): + inf = type_('inf') + self.assertTrue(approx_equal(inf, inf)) + self.assertTrue(approx_equal(inf, inf, 0, 0)) + self.assertTrue(approx_equal(inf, inf, 1, 0.01)) + self.assertTrue(approx_equal(-inf, -inf)) + self.assertFalse(approx_equal(inf, -inf)) + self.assertFalse(approx_equal(inf, 1000)) + + def testNan(self): + for type_ in (float, Decimal): + nan = type_('nan') + for other in (nan, type_('inf'), 1000): + self.assertFalse(approx_equal(nan, other)) + + def testFloatZeroes(self): + nzero = math.copysign(0.0, -1) + self.assertTrue(approx_equal(nzero, 0.0, tol=0.1, rel=0.1)) + + def testDecimalZeroes(self): + nzero = Decimal("-0.0") + self.assertTrue(approx_equal(nzero, Decimal(0), tol=0.1, rel=0.1)) + + +class TestApproxEqualErrors(unittest.TestCase): + # Test error conditions of approx_equal. + + def testBadTol(self): + # Test negative tol raises. + self.assertRaises(ValueError, approx_equal, 100, 100, -1, 0.1) + + def testBadRel(self): + # Test negative rel raises. + self.assertRaises(ValueError, approx_equal, 100, 100, 1, -0.1) + + +# === Tests for NumericTestCase === + +# The formatting routine that generates the error messages is complex enough +# that it too needs testing. + +class TestNumericTestCase(unittest.TestCase): + # The exact wording of NumericTestCase error messages is *not* guaranteed, + # but we need to give them some sort of test to ensure that they are + # generated correctly. As a compromise, we look for specific substrings + # that are expected to be found even if the overall error message changes. + + def do_test(self, args): + actual_msg = NumericTestCase._make_std_err_msg(*args) + expected = self.generate_substrings(*args) + for substring in expected: + self.assertIn(substring, actual_msg) + + def testNumericTestCaseIsTestCase(self): + # Ensure that NumericTestCase actually is a TestCase. + self.assertTrue(issubclass(NumericTestCase, unittest.TestCase)) + + def testErrorMsgNumeric(self): + # Test the error message generated for numeric comparisons. + args = (2.5, 4.0, 0.5, 0.25, None) + self.do_test(args) + + def testErrorMsgSequence(self): + # Test the error message generated for sequence comparisons. + args = (3.75, 8.25, 1.25, 0.5, 7) + self.do_test(args) + + def generate_substrings(self, first, second, tol, rel, idx): + """Return substrings we expect to see in error messages.""" + abs_err, rel_err = _calc_errors(first, second) + substrings = [ + 'tol=%r' % tol, + 'rel=%r' % rel, + 'absolute error = %r' % abs_err, + 'relative error = %r' % rel_err, + ] + if idx is not None: + substrings.append('differ at index %d' % idx) + return substrings + + +# === Self-testing === + +def load_tests(loader, tests, ignore): + """Used for doctest/unittest integration.""" + tests.addTests(doctest.DocTestSuite()) + return tests + +if __name__ == "__main__": + unittest.main() +