Index: Lib/collections.py =================================================================== --- Lib/collections.py (revision 68445) +++ Lib/collections.py (working copy) @@ -9,7 +9,12 @@ from operator import itemgetter as _itemgetter from keyword import iskeyword as _iskeyword import sys as _sys +import heapq as _heapq +import itertools as _itertools +######################################################################## +### namedtuple ####################################################### + def namedtuple(typename, field_names, verbose=False): """Returns a new subclass of tuple with named fields. @@ -108,10 +113,148 @@ return result +######################################################################## +### Counter ########################################################## +class Counter(dict): + '''Dict subclass for counting hashable items. Like a bag or multiset. + Elements stored as dictionary keys and their counts are stored as values. + >>> c = Counter('abracadabra') # count elements from a string + >>> c.most_common(3) # three most common elements + [('a', 5), ('r', 2), ('b', 2)] + >>> sorted(c) # list unique elements + ['a', 'b', 'c', 'd', 'r'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbcdrr' + >>> sum(c.values()) # total of all counts + 11 + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['r'] # remove all 'r' + >>> c['r'] # now there are zero 'r' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + + def __init__(self, iterable=None, items=None): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from an items list + of (element, count) pairs. + + >>> c = Counter('hocus pocus') # count elements in an iterable + >>> c = Counter(items=[('a', 4), ('b', 2)]) # take counts from an items list + + ''' + if iterable is not None: + for elem in iterable: + self[elem] += 1 + if items is not None: + for elem, count in items: + self[elem] += count + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abracadabra').most_common(3) + [('a', 5), ('r', 2), ('b', 2)] + + ''' + # Emulate Bag.sortedByCount from Smalltalk. + if n is None: + return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + # Knuth's example of prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> import operator + >>> prime_factors = Counter(items=[(2, 2), (3,3), (17,1)]) + >>> sorted(prime_factors.elements()) # list individual factors + [2, 2, 3, 3, 3, 17] + >>> reduce(operator.mul, prime_factors.elements(), 1) # multiply them + 1836 + + Note, if an element's count has been set to zero or a negative number, + elements() will ignore it. + + ''' + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. + # Prime factor example from Knuth's TAOCP Volume II section 4.6.3. + return _itertools.chain.from_iterable( + _itertools.starmap(_itertools.repeat, + self.iteritems())) + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError('Counter.fromkeys() is undefined') + + def update(self, mapping): + '''Like dict.update() but add counts instead of replacing them. + + Source can be another dictionary or a Counter.instance(). + + >>> c = Counter('which') + >>> d = Counter('witch') + >>> c.update(d) # Add counts from d to those in c + >>> c['h'] # Count of 'h' is now three + 3 + + ''' + for elem, count in mapping.iteritems(): + self[elem] += count + + def copy(self): + 'Like dict.copy() but returns a Counter instance instead of a dict.' + c = Counter() + c.update(self) + return c + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + return '%s(items=%r)' % (self.__class__.__name__, self.most_common()) + + + if __name__ == '__main__': # verify that instances can be pickled from cPickle import loads, dumps