diff -r e0c0bcd60033 Doc/library/heapq.rst --- a/Doc/library/heapq.rst Tue Apr 23 13:48:29 2013 +0200 +++ b/Doc/library/heapq.rst Wed Apr 24 11:21:52 2013 -0700 @@ -7,6 +7,7 @@ .. sectionauthor:: Guido van Rossum .. sectionauthor:: François Pinard .. sectionauthor:: Raymond Hettinger +.. sectionauthor:: Daniel Wong **Source code:** :source:`Lib/heapq.py` @@ -31,12 +32,20 @@ These two make it possible to view the heap as a regular Python list without surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` maintains the -heap invariant! +heap invariant! As an alternative to these list-based heaps, this module also +provides the :class:`Heap` class is provided, as well the :class:`DynamicHeap` +class. -To create a heap, use a list initialized to ``[]``, or you can transform a -populated list into a heap via function :func:`heapify`. +Finally, this module provides a number of functions that implement algorithms +based on heaps. -The following functions are provided: +List-based heaps +---------------- + +To create a list-based heap, use a list initialized to ``[]``, or you can +transform a populated list into a heap via function :func:`heapify`. + +The following functions are provided for manipulating list-based heaps: .. function:: heappush(heap, item) @@ -78,6 +87,92 @@ on the heap. +Classes +------- + +.. versionadded:: 3.??? + +.. class:: Heap(elements=()) + + The :class:`Heap` class provides the same functionality of list-based heaps, + but it has a couple of benefits: + + 1. Unlike lists, there are no public method that allows you to violate the + heap invariant. Only sound operations are permitted. + + 2. The heap invariant can be customized. + + Subclasses may override any of the following to customize the heap invariant + used: + + .. attribute:: key + + A method that takes an element that might be inserted into the + :class:`Heap`, and returns some other value by which to sort the element. + This is very much like the `key` parameter that :meth:`list.sort` + provides. + + .. attribute:: ASCENDING + + A boolean that indicates whether the top of the heap (where elements are + drawn from) should be the least or greatest element. + + The standard heap operations are available as instance methods: + + .. method:: push(element) + + Adds an element to the heap. + + .. method:: pop() + + Removes and returns the least element in the heap (or greatest if + `ASCENDING` is false). + + .. method:: top() + + Returns the same thing as pop without altering the :class:`Heap` itself. + + +.. class:: DynamicHeap(elements=()) + + Like :class:`Heap`, except that keys can change after elements have been + inserted. + + This class has a couple of caveats that do not apply to :class:`Heap`: + + 1. It must be possible for elements to be keys in a dict. E.g. you should + not mutate elements while they are in a DynamicHeap. + + 2. Elements must be unique. + + 3. key is an abstract method (and this is an abstract class), which means + only subclasses that implement key can be instantiated. + + The first two restrictions could be removed in the future, as they are + imposed by the implementation, not because of some fundamental reason. + + :meth:`key` and `ASCENDING` can be overridden with the same effect as with + :class:`Heap`. + + In addition to the methods that :class:`Heap` provides, :class:`DynamicHeap` + provides the following methods: + + .. method:: update_element(element) + + Pass a value that is already in the DynamicHeap to signal that the key has + changed. + + .. method:: heapify() + + Restores the heap invariant when the keys of many elements change + value. This is generally less efficient than calling update_element when + you know the set of elements that have changed keys. Therefore, this + should be used sparingly if at all. + + +Heap-based algorithms +--------------------- + The module also offers three general purpose functions based on heaps. diff -r e0c0bcd60033 Lib/heapq.py --- a/Lib/heapq.py Tue Apr 23 13:48:29 2013 +0200 +++ b/Lib/heapq.py Wed Apr 24 11:21:52 2013 -0700 @@ -124,11 +124,232 @@ From all times, sorting has always been a Great Art! :-) """ -__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge', - 'nlargest', 'nsmallest', 'heappushpop'] +__all__ = ['Heap', 'DynamicHeap', + 'heappush', 'heappop', 'heapify', 'heapreplace', 'heappushpop', + 'merge', 'nlargest', 'nsmallest'] +import abc from itertools import islice, count, tee, chain + +class Heap: + """A collection that can efficiently get a smallest element. + + Provides two attributes that can be overridden to customize behavior: + + key method + A method that takes an element, and returns some other value by which + to sort the element, much like the key parameter that list.sort + provides. + + ASCENDING class attribute + A boolean that indicates whether the top of the heap (where + elements are drawn from) should be the least or greatest + element. + + This class provides the same functionality as some of the global functions + in this module, but instead of dealing with lists, Heap instances ensure + that the heap invariant is maintained. + """ + + # Override these to customize behavior. + # ------------------------------------- + + @staticmethod + def key(element): + """Override this to customize behavior.""" + return element + + # Override this to customize behavior. + ASCENDING = True + + # Public + # ------ + + def __init__(self, elements=()): + self._entries = [self._Entry(self, e) for e in elements] + heapify(self._entries) + + def push(self, element): + heappush(self._entries, self._Entry(self, element)) + + # TODO: raise new exceptions. + + def pop(self): + entry = heappop(self._entries) + return entry.element + + def top(self): + return self._entries[0].element + + # TODO: pushpop, poppush (aka replace). These are minor optimizations + # though. + + # Private + # ------- + + class _Entry: + + RICH_COMPARISONS = frozenset(['eq', 'ne', 'lt', 'gt', 'le', 'ge']) + + def __init__(self, heap, element): + self.heap = heap + self.element = element + + def __repr__(self): + return '<_Entry element=%r>' % (self.element,) + + # Python 3, Y U NO KEEP __cmp__ MAGIC METHOD?! + def __lt__(self, other): + key = self.heap.key + if self.heap.ASCENDING: + return key(self.element) < key(other.element) + else: # descending + return key(self.element) > key(other.element) + + def __ge__(self, other): + return not (self < other) + + +# Friend of Heap -> may access Heap privates. +class DynamicHeap(Heap): + """Like Heap, except that keys can change after element insertion. + + This class has a couple of caveats that do not apply to Heap: + 1. It must be possible for elements to be keys in a dict. E.g. you should + not mutate elements while they are in a DynamicHeap. + 2. Elements must be unique. + 3. key is an abstract method (and this is an abstract class), which means + only subclasses that implement key can be instantiated. + The first two restrictions could be removed in the future, as they are + imposed by the implementation, not because of some fundamental reason. + + key and ASCENDING can be overridden with the same effect as with Heap. + + This class adds one interesting method: update_element (an instance + method). Pass a value that is already in the DynamicHeap to signal that the + key has changed. + + heapify is another additional instance method, although it is probably less + interesting. This restores the heap invariant when the keys of many + elements change value. This is generally less efficient than calling + update_element when you know the set of elements that have changed keys. + Therefore, this should be used sparingly if at all. + + Implementation note: Although this inherits from Heap, methods that mutate + the heap (push and pop) are overridden, because there is other internal + state to maintain. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def key(self, element): + pass + + # Public + # ------ + + def __init__(self, elements=()): + super(DynamicHeap, self).__init__(elements) + self._index = dict((entry.element, idx) + for idx, entry in enumerate(self._entries)) + + def push(self, element): + idx = len(self._entries) + self._index[element] = idx + self._entries.append(self._Entry(self, element)) + idx = self._sift_toward_root(idx) + self._sift_toward_leaves(idx) + + def pop(self): + entry = self._entries[0] + + assert self._index[entry.element] == 0 + del self._index[entry.element] + + if len(self._entries) > 1: + self._entries[0] = self._entries.pop() + self._index[self._entries[0].element] = 0 + self._sift_toward_leaves(0) + else: + self._entries.pop() + + return entry.element + + # inheriting the implementation of top is fine. + + def update_element(self, element): + idx = self._index[element] + idx = self._sift_toward_root(idx) + self._sift_toward_leaves(idx) + + def heapify(self): + heapify(self._entries) + self._index = dict((entry.element, idx) + for idx, entry in enumerate(self._entries)) + + # Private + # ------- + + def _sift_toward_root(self, child_idx): + parent_idx = (child_idx - 1) // 2 + e = self._entries + while child_idx and (e[child_idx] < e[parent_idx]): + self._swap_with_parent(child_idx) + child_idx = parent_idx + parent_idx = (child_idx - 1) // 2 + return child_idx + + def _sift_toward_leaves(self, parent_idx): + def children(): + left_idx = 2 * parent_idx + 1 + return left_idx, left_idx + 1 + left_idx, right_idx = children() + e = self._entries + + while left_idx < len(e): + if e[left_idx] < e[parent_idx]: + # Must swap parent_idx with one of its children, but which + # child to swap with still needs to be determined. + swap_right = (right_idx < len(e) and + e[right_idx] < e[parent_idx] and + e[right_idx] < e[left_idx]) + child_idx = right_idx if swap_right else left_idx + self._swap_with_parent(child_idx) + parent_idx = child_idx + left_idx, right_idx = children() + continue + + assert right_idx <= len(e) + if right_idx == len(e): + # No right (and left value was >= that of parent); therefore, + # we're done. + break + + if e[right_idx] >= e[parent_idx]: + # right and left exist, but neither value is less than that of + # parent; therefore, we're done. + break + + self._swap_with_parent(right_idx) + parent_idx = right_idx + left_idx, right_idx = children() + + return parent_idx + + def _swap_with_parent(self, child_idx): + parent_idx = (child_idx - 1) // 2 + e = self._entries + + child_entry = e[child_idx] + parent_entry = e[parent_idx] + + self._index[child_entry.element] = parent_idx + self._index[parent_entry.element] = child_idx + e[child_idx], e[parent_idx] = e[parent_idx], e[child_idx] + + def heappush(heap, item): """Push item onto heap, maintaining the heap invariant.""" heap.append(item) diff -r e0c0bcd60033 Lib/test/test_heapq.py --- a/Lib/test/test_heapq.py Tue Apr 23 13:48:29 2013 +0200 +++ b/Lib/test/test_heapq.py Wed Apr 24 11:21:52 2013 -0700 @@ -1,5 +1,6 @@ """Unittests for heapq.""" +import string import sys import random import unittest @@ -26,6 +27,157 @@ self.assertEqual(getattr(c_heapq, fname).__module__, '_heapq') +class HeapClassTest(TestCase): + + def testDefaultHeap(self): + heap = py_heapq.Heap() + self.assertRaises(IndexError, heap.top) + + data = list(range(5)) + random.shuffle(data) + heap = py_heapq.Heap(data) + + # Just for test coverage. + self.assertTrue(str(heap._entries[0])) + + self.assertEqual(heap.top(), 0) + self.assertEqual(heap.top(), 0) + self.assertEqual(heap.pop(), 0) + self.assertEqual(heap.top(), 1) + + self.assertEqual(heap.pop(), 1) + + # A little detour for push before we go into super pop mode. + heap.push(-1) + self.assertEqual(heap.top(), -1) + self.assertEqual(heap.pop(), -1) + heap.push(0) + self.assertEqual(heap.top(), 0) + self.assertEqual(heap.pop(), 0) + heap.push(-1) + heap.push(-2) + heap.push(0) + self.assertEqual(heap.top(), -2) + self.assertEqual(heap.pop(), -2) + self.assertEqual(heap.top(), -1) + self.assertEqual(heap.pop(), -1) + self.assertEqual(heap.top(), 0) + self.assertEqual(heap.pop(), 0) + + # Pop until you can't. + self.assertEqual(heap.pop(), 2) + self.assertEqual(heap.pop(), 3) + self.assertEqual(heap.pop(), 4) + self.assertRaises(IndexError, heap.pop) + self.assertRaises(IndexError, heap.pop) # Try harder! + + def testOverrides(self): + class Person: + + def __init__(self, name, height): + self.name = name + self.height = height # arbitrary untis + + def __repr__(self): + return 'Person(%r, %r)' % (self.name, self.height) + + alice = Person('Alice', 5) + bob = Person('Bob', 6) + charlie = Person('Charlie', 4) + # Sorry, no Eve. + + class PersonBackwardsNameHeap(py_heapq.Heap): + @staticmethod + def key(person): + return person.name + ASCENDING = False + + heap = PersonBackwardsNameHeap() + heap.push(bob) + heap.push(alice) + heap.push(charlie) + + self.assertEqual(heap.pop(), charlie) + self.assertEqual(heap.pop(), bob) + self.assertEqual(heap.pop(), alice) + self.assertRaises(IndexError, heap.pop) + + +class DynamicHeapTest(TestCase): + + def test(self): + stock_prices = { + 'A': 10, + 'B': 5, + 'C': 7 + } + + class MostExpensiveStocksDynamicHeap(py_heapq.DynamicHeap): + + @staticmethod + def key(symbol): + return stock_prices[symbol] + + ASCENDING = False + + # No dynamic key changing; just static heap operations. + stock_heap = MostExpensiveStocksDynamicHeap('ABC') + self.assertEqual(stock_heap.pop(), 'A') + self.assertEqual(stock_heap.pop(), 'C') + stock_heap.push('C') + self.assertEqual(stock_heap.top(), 'C') + self.assertEqual(stock_heap.pop(), 'C') + self.assertEqual(stock_heap.pop(), 'B') + self.assertRaises(IndexError, stock_heap.pop) + + # Verify internal state. + self.assertFalse(stock_heap._index) + self.assertFalse(stock_heap._entries) + + # Dynamic behavior. + stock_heap = MostExpensiveStocksDynamicHeap('ABC') + self.assertEqual(stock_heap.top(), 'A') + stock_prices['B'] = 20 # B's price just increase four-fold! + stock_heap.update_element('B') + self.assertEqual(stock_heap.top(), 'B') + stock_prices['B'] = 8 # Price correction on B + stock_heap.update_element('B') + self.assertEqual(stock_heap.pop(), 'A') + self.assertEqual(stock_heap.top(), 'B') + self.assertEqual(stock_heap.top(), 'B') + stock_prices['C'] = 9 # C's value gets a nice bump. + stock_heap.update_element('C') + self.assertEqual(stock_heap.top(), 'C') + stock_heap.push('A') + self.assertEqual(stock_heap.top(), 'A') + + # Next day, prices are all mixed up. + stock_prices = { + 'A': 5, + 'B': 10, + 'C': 7} + + stock_heap.heapify() + self.assertEqual(stock_heap.pop(), 'B') + self.assertEqual(stock_heap.pop(), 'C') + self.assertEqual(stock_heap.pop(), 'A') + self.assertRaises(IndexError, stock_heap.top) + self.assertRaises(IndexError, stock_heap.pop) + + # For coverage + stock_prices = dict( + (s, i) for i, s in enumerate(string.ascii_uppercase)) + symbols = list(string.ascii_uppercase) + random.shuffle(symbols) + stock_heap = MostExpensiveStocksDynamicHeap(symbols) + for char in string.ascii_uppercase[::-1]: + self.assertEqual(stock_heap.pop(), char) + self.assertRaises(IndexError, stock_heap.pop) + + # Purely for coverage. This adds no confidence that the code + # is actually correct. + py_heapq.DynamicHeap.key(stock_heap, 'X') + class TestHeap: def test_push_pop(self): diff -r e0c0bcd60033 Misc/ACKS --- a/Misc/ACKS Tue Apr 23 13:48:29 2013 +0200 +++ b/Misc/ACKS Wed Apr 24 11:21:52 2013 -0700 @@ -1352,6 +1352,7 @@ Klaus-Juergen Wolf Dan Wolfe Richard Wolff +Daniel Wong Adam Woodbeck Steven Work Gordon Worley