diff -r 5a9f2de4dc16 Lib/statistics.py --- a/Lib/statistics.py Sun Jan 03 19:37:07 2016 +0530 +++ b/Lib/statistics.py Sun Jan 03 19:38:53 2016 +0530 @@ -105,6 +105,7 @@ from fractions import Fraction from decimal import Decimal from itertools import groupby +from bisect import bisect_left, bisect_right @@ -305,6 +306,21 @@ return table +def _find_lteq(a, x): + 'Locate the leftmost value exactly equal to x' + i = bisect_left(a, x) + if i != len(a) and a[i] == x: + return i + raise ValueError + + +def _find_rteq(a, l, x): + 'Locate the rightmost value exactly equal to x' + i = bisect_right(a, x, lo=l) + if i != (len(a)+1) and a[i-1] == x: + return i-1 + raise ValueError + # === Measures of central tendency (averages) === def mean(data): @@ -442,9 +458,15 @@ except TypeError: # Mixed type. For now we just coerce to float. L = float(x) - float(interval)/2 - cf = data.index(x) # Number of values below the median interval. - # FIXME The following line could be more efficient for big lists. - f = data.count(x) # Number of data points in the median interval. + + # Uses bisection search to search for x in data with log(n) time complexity + # Find the position of leftmost occurence of x in data + l1 = _find_lteq(data, x) + # Find the position of rightmost occurence of x in data[l1...len(data)] + # Assuming always l1 <= l2 + l2 = _find_rteq(data, l1, x) + cf = l1 + f = l2 - l1 + 1 return L + interval*(n/2 - cf)/f