Index: Lib/collections.py =================================================================== --- Lib/collections.py (revision 87161) +++ Lib/collections.py (working copy) @@ -6,7 +6,7 @@ import _abcoll __all__ += _abcoll.__all__ -from _collections import deque, defaultdict +from _collections import deque, defaultdict, _Counter from operator import itemgetter as _itemgetter from keyword import iskeyword as _iskeyword import sys as _sys @@ -334,50 +334,6 @@ ### Counter ######################################################################## -class Counter(dict): - '''Dict subclass for counting hashable items. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> c = Counter('abracadabra') # count elements from a string - - >>> c.most_common(3) # three most common elements - [('a', 5), ('r', 2), ('b', 2)] - >>> sorted(c) # list all unique elements - ['a', 'b', 'c', 'd', 'r'] - >>> ''.join(sorted(c.elements())) # list elements with repetitions - 'aaaaabbcdrr' - >>> sum(c.values()) # total of all counts - 11 - - >>> c['a'] # count of letter 'a' - 5 - >>> for elem in 'shazam': # update counts from an iterable - ... c[elem] += 1 # by adding 1 to each element's count - >>> c['a'] # now there are seven 'a' - 7 - >>> del c['r'] # remove all 'r' - >>> c['r'] # now there are zero 'r' - 0 - - >>> d = Counter('simsalabim') # make another counter - >>> c.update(d) # add in the second counter - >>> c['a'] # now there are nine 'a' - 9 - - >>> c.clear() # empty the counter - >>> c - Counter() - - Note: If a count is set to zero or reduced to zero, it will remain - in the counter until the entry is deleted or the counter is cleared: - - >>> c = Counter('aaabbc') - >>> c['b'] -= 2 # reduce the count of 'b' by two - >>> c.most_common() # 'b' is still in, but its count is zero - [('a', 3), ('c', 1), ('b', 0)] - - ''' # References: # http://en.wikipedia.org/wiki/Multiset # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html @@ -385,24 +341,10 @@ # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 - def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. +class CounterBase(): + '''Mixin class inherited by Counter and PyCounter. + Not for direct instantiation.''' - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - self.update(iterable, **kwds) - - def __missing__(self, key): - 'The count of elements not in the Counter is zero.' - # Needed so that self[missing_item] does not raise KeyError - return 0 - def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. @@ -438,50 +380,6 @@ # Emulate Bag.do from Smalltalk and Multiset.begin from C++. return _chain.from_iterable(_starmap(_repeat, self.items())) - # Override dict methods where necessary - - @classmethod - def fromkeys(cls, iterable, v=None): - # There is no equivalent method for counters because setting v=1 - # means that no element can have a count greater than one. - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts - # being mixed-in with all of the other counts for a mismash that - # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we implement straight-addition. Both the inputs - # and outputs are allowed to contain zero and negative counts. - - if iterable is not None: - if isinstance(iterable, Mapping): - if self: - self_get = self.get - for elem, count in iterable.items(): - self[elem] = count + self_get(elem, 0) - else: - dict.update(self, iterable) # fast path when counter is empty - else: - self_get = self.get - for elem in iterable: - self[elem] = 1 + self_get(elem, 0) - if kwds: - self.update(kwds) - def subtract(self, iterable=None, **kwds): '''Like dict.update() but subtracts counts instead of replacing them. Counts can be reduced below zero. Both the inputs and outputs are @@ -511,7 +409,7 @@ def copy(self): 'Like dict.copy() but returns a Counter instance instead of a dict.' - return Counter(self) + return self.__class__(self) def __delitem__(self, elem): 'Like dict.__delitem__() but does not raise KeyError for missing values.' @@ -540,9 +438,10 @@ Counter({'b': 4, 'c': 2, 'a': 1}) ''' - if not isinstance(other, Counter): + cls = self.__class__ + if not isinstance(other, cls): return NotImplemented - result = Counter() + result = cls() for elem in set(self) | set(other): newcount = self[elem] + other[elem] if newcount > 0: @@ -556,9 +455,10 @@ Counter({'b': 2, 'a': 1}) ''' - if not isinstance(other, Counter): + cls = self.__class__ + if not isinstance(other, cls): return NotImplemented - result = Counter() + result = cls() for elem in set(self) | set(other): newcount = self[elem] - other[elem] if newcount > 0: @@ -572,9 +472,10 @@ Counter({'b': 3, 'c': 2, 'a': 1}) ''' - if not isinstance(other, Counter): + cls = self.__class__ + if not isinstance(other, cls): return NotImplemented - result = Counter() + result = cls() for elem in set(self) | set(other): p, q = self[elem], other[elem] newcount = q if p < q else p @@ -589,9 +490,10 @@ Counter({'b': 1}) ''' - if not isinstance(other, Counter): + cls = self.__class__ + if not isinstance(other, cls): return NotImplemented - result = Counter() + result = cls() if len(self) < len(other): self, other = other, self for elem in filter(self.__contains__, other): @@ -601,7 +503,168 @@ result[elem] = newcount return result +# Counter uses C methods for __init__, update, and __missing__ +class Counter(CounterBase, _Counter): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + >>> c = Counter('abracadabra') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('r', 2), ('b', 2)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'r'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbcdrr' + >>> sum(c.values()) # total of all counts + 11 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['r'] # remove all 'r' + >>> c['r'] # now there are zero 'r' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + +# Pure Python Counter +class PyCounter(CounterBase, dict): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> c = Counter('abracadabra') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('r', 2), ('b', 2)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'r'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbcdrr' + >>> sum(c.values()) # total of all counts + 11 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['r'] # remove all 'r' + >>> c['r'] # now there are zero 'r' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + self.update(iterable, **kwds) + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + # Override dict methods where necessary + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + # The regular dict.update() operation makes no sense here because the + # replace behavior results in the some of original untouched counts + # being mixed-in with all of the other counts for a mismash that + # doesn't have a straight-forward interpretation in most counting + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. + + if iterable is not None: + if isinstance(iterable, Mapping): + if self: + self_get = self.get + for elem, count in iterable.items(): + self[elem] = count + self_get(elem, 0) + else: + dict.update(self, iterable) # fast path when counter is empty + else: + self_get = self.get + for elem in iterable: + self[elem] = 1 + self_get(elem, 0) + if kwds: + self.update(kwds) + ################################################################################ ### UserDict ################################################################################ Index: Lib/test/test_collections.py =================================================================== --- Lib/test/test_collections.py (revision 87161) +++ Lib/test/test_collections.py (working copy) @@ -3,7 +3,7 @@ import unittest, doctest, operator import inspect from test import support -from collections import namedtuple, Counter, OrderedDict +from collections import namedtuple, Counter, PyCounter, OrderedDict from test import mapping_tests import pickle, copy from random import randrange, shuffle @@ -686,6 +686,15 @@ self.assertEqual(c['d'], 1) self.assertEqual(c.setdefault('e', 5), 5) self.assertEqual(c['e'], 5) + c.update_fromsubs('abab',2) + self.assertEqual(c['ab'], 2) + self.assertEquals(c['ba'], 1) + c.update_fromsubs('cdcd', 2, 2) + self.assertEquals(c['cd'], 2) + self.assertEquals(c['dc'], 0) + c.update_fromsubs('efef', 2, 1, 1, 3) + self.assertEquals(c['ef'], 0) + self.assertEquals(c['fe'], 1) def test_copying(self): # Check that counters are copyable, deepcopyable, picklable, and @@ -775,6 +784,156 @@ c.subtract('aaaabbcce') self.assertEqual(c, Counter(a=-1, b=0, c=-1, d=1, e=-1)) +class TestPyCounter(unittest.TestCase): + + def test_basics(self): + c = PyCounter('abcaba') + self.assertEqual(c, PyCounter({'a':3 , 'b': 2, 'c': 1})) + self.assertEqual(c, PyCounter(a=3, b=2, c=1)) + self.assertIsInstance(c, dict) + self.assertIsInstance(c, Mapping) + self.assertTrue(issubclass(PyCounter, dict)) + self.assertTrue(issubclass(PyCounter, Mapping)) + self.assertEqual(len(c), 3) + self.assertEqual(sum(c.values()), 6) + self.assertEqual(sorted(c.values()), [1, 2, 3]) + self.assertEqual(sorted(c.keys()), ['a', 'b', 'c']) + self.assertEqual(sorted(c), ['a', 'b', 'c']) + self.assertEqual(sorted(c.items()), + [('a', 3), ('b', 2), ('c', 1)]) + self.assertEqual(c['b'], 2) + self.assertEqual(c['z'], 0) + self.assertEqual(c.__contains__('c'), True) + self.assertEqual(c.__contains__('z'), False) + self.assertEqual(c.get('b', 10), 2) + self.assertEqual(c.get('z', 10), 10) + self.assertEqual(c, dict(a=3, b=2, c=1)) + self.assertEqual(repr(c), "PyCounter({'a': 3, 'b': 2, 'c': 1})") + self.assertEqual(c.most_common(), [('a', 3), ('b', 2), ('c', 1)]) + for i in range(5): + self.assertEqual(c.most_common(i), + [('a', 3), ('b', 2), ('c', 1)][:i]) + self.assertEqual(''.join(sorted(c.elements())), 'aaabbc') + c['a'] += 1 # increment an existing value + c['b'] -= 2 # sub existing value to zero + del c['c'] # remove an entry + del c['c'] # make sure that del doesn't raise KeyError + c['d'] -= 2 # sub from a missing value + c['e'] = -5 # directly assign a missing value + c['f'] += 4 # add to a missing value + self.assertEqual(c, dict(a=4, b=0, d=-2, e=-5, f=4)) + self.assertEqual(''.join(sorted(c.elements())), 'aaaaffff') + self.assertEqual(c.pop('f'), 4) + self.assertNotIn('f', c) + for i in range(3): + elem, cnt = c.popitem() + self.assertNotIn(elem, c) + c.clear() + self.assertEqual(c, {}) + self.assertEqual(repr(c), 'PyCounter()') + self.assertRaises(NotImplementedError, PyCounter.fromkeys, 'abc') + self.assertRaises(TypeError, hash, c) + c.update(dict(a=5, b=3)) + c.update(c=1) + c.update(PyCounter('a' * 50 + 'b' * 30)) + c.update() # test case with no args + c.__init__('a' * 500 + 'b' * 300) + c.__init__('cdc') + c.__init__() + self.assertEqual(c, dict(a=555, b=333, c=3, d=1)) + self.assertEqual(c.setdefault('d', 5), 1) + self.assertEqual(c['d'], 1) + self.assertEqual(c.setdefault('e', 5), 5) + self.assertEqual(c['e'], 5) + + def test_copying(self): + # Check that counters are copyable, deepcopyable, picklable, and + #have a repr/eval round-trip + words = PyCounter('which witch had which witches wrist watch'.split()) + update_test = PyCounter() + update_test.update(words) + for i, dup in enumerate([ + words.copy(), + copy.copy(words), + copy.deepcopy(words), + pickle.loads(pickle.dumps(words, 0)), + pickle.loads(pickle.dumps(words, 1)), + pickle.loads(pickle.dumps(words, 2)), + pickle.loads(pickle.dumps(words, -1)), + eval(repr(words)), + update_test, + PyCounter(words), + ]): + msg = (i, dup, words) + self.assertTrue(dup is not words) + self.assertEqual(dup, words) + self.assertEqual(len(dup), len(words)) + self.assertEqual(type(dup), type(words)) + + def test_conversions(self): + # Convert to: set, list, dict + s = 'she sells sea shells by the sea shore' + self.assertEqual(sorted(PyCounter(s).elements()), sorted(s)) + self.assertEqual(sorted(PyCounter(s)), sorted(set(s))) + self.assertEqual(dict(PyCounter(s)), dict(PyCounter(s).items())) + self.assertEqual(set(PyCounter(s)), set(s)) + + def test_invariant_for_the_in_operator(self): + c = PyCounter(a=10, b=-2, c=0) + for elem in c: + self.assertTrue(elem in c) + self.assertIn(elem, c) + + def test_multiset_operations(self): + # Verify that adding a zero counter will strip zeros and negatives + c = PyCounter(a=10, b=-2, c=0) + PyCounter() + self.assertEqual(dict(c), dict(a=10)) + + elements = 'abcd' + for i in range(1000): + # test random pairs of multisets + p = PyCounter(dict((elem, randrange(-2,4)) for elem in elements)) + p.update(e=1, f=-1, g=0) + q = PyCounter(dict((elem, randrange(-2,4)) for elem in elements)) + q.update(h=1, i=-1, j=0) + for counterop, numberop in [ + (PyCounter.__add__, lambda x, y: max(0, x+y)), + (PyCounter.__sub__, lambda x, y: max(0, x-y)), + (PyCounter.__or__, lambda x, y: max(0,x,y)), + (PyCounter.__and__, lambda x, y: max(0, min(x,y))), + ]: + result = counterop(p, q) + for x in elements: + self.assertEqual(numberop(p[x], q[x]), result[x], + (counterop, x, p, q)) + # verify that results exclude non-positive counts + self.assertTrue(x>0 for x in result.values()) + + elements = 'abcdef' + for i in range(100): + # verify that random multisets with no repeats are exactly like sets + p = PyCounter(dict((elem, randrange(0, 2)) for elem in elements)) + q = PyCounter(dict((elem, randrange(0, 2)) for elem in elements)) + for counterop, setop in [ + (PyCounter.__sub__, set.__sub__), + (PyCounter.__or__, set.__or__), + (PyCounter.__and__, set.__and__), + ]: + counter_result = counterop(p, q) + set_result = setop(set(p.elements()), set(q.elements())) + self.assertEqual(counter_result, dict.fromkeys(set_result, 1)) + + def test_subtract(self): + c = PyCounter(a=-5, b=0, c=5, d=10, e=15,g=40) + c.subtract(a=1, b=2, c=-3, d=10, e=20, f=30, h=-50) + self.assertEqual(c, PyCounter(a=-6, b=-2, c=8, d=0, e=-5, f=-30, g=40, h=50)) + c = PyCounter(a=-5, b=0, c=5, d=10, e=15,g=40) + c.subtract(PyCounter(a=1, b=2, c=-3, d=10, e=20, f=30, h=-50)) + self.assertEqual(c, PyCounter(a=-6, b=-2, c=8, d=0, e=-5, f=-30, g=40, h=50)) + c = PyCounter('aaabbcd') + c.subtract('aaaabbcce') + self.assertEqual(c, PyCounter(a=-1, b=0, c=-1, d=1, e=-1)) + class TestOrderedDict(unittest.TestCase): def test_init(self): @@ -1029,7 +1188,7 @@ def test_main(verbose=None): NamedTupleDocs = doctest.DocTestSuite(module=collections) test_classes = [TestNamedTuple, NamedTupleDocs, TestOneTrickPonyABCs, - TestCollectionABCs, TestCounter, + TestCollectionABCs, TestCounter, TestPyCounter, TestOrderedDict, GeneralMappingTests, SubclassMappingTests] support.run_unittest(*test_classes) support.run_doctest(collections, verbose) Index: Modules/_collectionsmodule.c =================================================================== --- Modules/_collectionsmodule.c (revision 87161) +++ Modules/_collectionsmodule.c (working copy) @@ -1518,12 +1518,426 @@ PyObject_GC_Del, /* tp_free */ }; +/* counter type **********************************************************/ + +typedef struct { + PyDictObject dict; +} counterobject; + +static PyTypeObject counter_type; /* Forward */ + +PyDoc_STRVAR(counter_missing_doc, +"__missing__(key) # Called by __getitem__ for missing key. Return 0 \n\ +for counter.\n\ +"); + +static PyObject * +counter_missing(counterobject *ctr, PyObject *key) +{ + return PyLong_FromLong(0); +} + +static int +counter_updatefromlistortuple(PyObject *self, PyObject *list) +{ + PyObject **src; + PyObject *one, *obj, *num, *newnum; + Py_ssize_t n, i; + + n = PySequence_Fast_GET_SIZE(list); + one = PyLong_FromLong(1); + src = PySequence_Fast_ITEMS(list); + for (i = 0; i < n; i++) { + obj = src[i]; + num = PyDict_GetItem(self, obj); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, obj, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, obj, one); + } + Py_DECREF(one); + return 0; +} + +static int +counter_updatefromiter(PyObject *self, PyObject *iterable) +{ + PyObject *(*iternext)(PyObject *); + PyObject *one, *it, *num, *newnum, *obj; + + it = PyObject_GetIter(iterable); + if (it == NULL) + return -1; + iternext = *it->ob_type->tp_iternext; + one = PyLong_FromLong(1); + for(;;) { + obj = iternext(it); + if (obj == NULL) { + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) + PyErr_Clear(); + else + goto error; + } + break; + } + num = PyDict_GetItem(self, obj); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, obj, newnum); + Py_DECREF(newnum); + } + else { + PyDict_SetItem(self, obj, one); + } + Py_DECREF(obj); + } + Py_DECREF(it); + Py_DECREF(one); + return 0; + error: + Py_DECREF(it); + Py_DECREF(one); + return -1; +} + +static int +counter_updatefromdict(PyObject *self, PyObject *dict) +{ + if (!Py_SIZE(dict)) + return 0; + if (Py_SIZE(self)) { + PyObject *keys, *iter, *key, *value, *num, *newnum; + + keys = PyDict_Keys(dict); + if (keys == NULL) { + PyErr_SetString(PyExc_ValueError, "dict keys broken"); + return -1; + } + iter = PyObject_GetIter(keys); + Py_DECREF(keys); + if (iter == NULL) { + PyErr_SetString(PyExc_TypeError, "dict keys must be iterable"); + return -1; + } + for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) { + num = PyDict_GetItem(self, key); + value = PyDict_GetItem(dict,key); + if (num != NULL) { + newnum = PyNumber_Add(num, value); + PyDict_SetItem(self, key, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, key, value); + Py_DECREF(key); + if (PyErr_Occurred()) + return -1; + } + return 0; + } + else /* Nothing in self. Just merge */ + return PyDict_Merge(self, dict, 1); +} + +static int +counter_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname) +{ + PyObject *arg = NULL; + int result = 0; + + if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg)) + result = -1; + else if (arg != NULL) { + if (PyList_Check(arg) || PyTuple_Check(arg)) + result = counter_updatefromlistortuple(self, arg); + else if (PyDict_Check(arg)) + result = counter_updatefromdict(self, arg); + else + result = counter_updatefromiter(self, arg); + } + if (result == 0 && kwds != NULL) { + if (PyArg_ValidateKeywordArguments(kwds)) + return counter_updatefromdict(self, kwds); + else + return -1; + } + return result; +} + +PyDoc_STRVAR(counter_update_doc, +"update(arg, keywords) # Update the Counter with a list, tuple, iterator, \n\ +dictionary, or Counter.\n\ +"); + +static PyObject * +counter_update(PyObject *self, PyObject *args, PyObject *kwds) +{ + if (counter_update_common(self, args, kwds, "update") != -1) + Py_RETURN_NONE; + return NULL; +} + +PyDoc_STRVAR(counter_update_fromsubs_doc, +"update_fromsubs(seq, frame[, step[, lo[, hi]]])\n\ +Counts subarrays in the sequence seq.\n\ +frame is the length of the subarrays.\n\ +step is the step to move ahead after each subarray.\n\ +lo and hi are the respective starting and ending indices.\n\ +"); + +static PyObject * +counter_update_fromsubs(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *seq = NULL; + PyObject *it, *sub, *num, *newnum, *one; + PyObject *(*iternext)(PyObject *); + Py_ssize_t lo = 0; + Py_ssize_t hi = -1; + Py_ssize_t frame = 0; + Py_ssize_t step = 1; + Py_ssize_t ssize, i; + static char *keywords[] = {"seq", "frame", "step", "lo", "hi", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "Oi|iii:update_fromsubs", + keywords, &seq, &frame, &step, &lo, &hi)) + return NULL; + if (seq == NULL || frame < 2 || !PySequence_Check(seq)) + return NULL; + if (frame < 2) { + PyErr_SetString(PyExc_IndexError, "frame must be 2 or greater"); + return NULL; + } + ssize = Py_SIZE(seq); + if (ssize < 2) + Py_RETURN_NONE; + if (lo < 0) { + lo += ssize; + if (lo < 0) { + PyErr_SetString(PyExc_IndexError, "lo out of range"); + return NULL; + } + } + else if (lo >= ssize) { + PyErr_SetString(PyExc_IndexError, "lo out of range"); + return NULL; + } + if (hi < 0) + hi += ssize + 1; + if (hi < 3 || hi > ssize) { + PyErr_SetString(PyExc_IndexError, "hi out of range"); + return NULL; + } + if (step < 1) { + PyErr_SetString(PyExc_IndexError, "step must be > 0"); + return NULL; + } + it = PyObject_GetIter(seq); + if (it == NULL) + return NULL; + iternext = *it->ob_type->tp_iternext; + hi -= (frame - 1); + one = PyLong_FromLong(1); + if (step == 1) { /* fast track for step = 1 */ + for (i = lo; i < hi; ++i) { + sub = PySequence_GetSlice(seq, i, i + frame); + num = PyDict_GetItem(self, sub); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, sub, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, sub, one); + Py_DECREF(sub); + if (PyErr_Occurred() != NULL) + goto error; + } + } + else { + for (i = lo; i < hi; i += step) { + sub = PySequence_GetSlice(seq, i, i + frame); + num = PyDict_GetItem(self, sub); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, sub, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, sub, one); + Py_DECREF(sub); + if (PyErr_Occurred() != NULL) + goto error; + } + } + Py_DECREF(one); + Py_DECREF(it); + Py_RETURN_NONE; + error: + Py_DECREF(one); + Py_DECREF(it); + return NULL; +} + +PyDoc_STRVAR(counter_copy_doc, "D.copy() -> a shallow copy of D."); + +static PyObject * +counter_copy(counterobject *ctr) +{ + /* This calls the object's class. That only works for subclasses + whose class constructor has the same signature. Subclasses that + define a different constructor signature must override copy(). + */ + + return PyObject_CallFunctionObjArgs((PyObject*)Py_TYPE(ctr), + ctr, NULL); +} + +static PyObject * +counter_reduce(counterobject *ctr) +{ + /* __reduce__ must return a 5-tuple as follows: + + - factory function + - tuple of args for the factory function + - additional state (here None) + - sequence iterator (here None) + - dictionary iterator (yielding successive (key, value) pairs + + This API is used by pickle.py and copy.py. + + For this to be useful with pickle.py, the default_factory + must be picklable; e.g., None, a built-in, or a global + function in a module or package. + + Both shallow and deep copying are supported, but for deep + copying, the default_factory must be deep-copyable; e.g. None, + or a built-in (functions are not copyable at this time). + + This only works for subclasses as long as their constructor + signature is compatible; the first argument must be the + optional default_factory, defaulting to None. + */ + PyObject *args = PyTuple_New(0); + PyObject *items, *iter, *result; + + items = PyObject_CallMethod((PyObject *)ctr, "items", "()"); + if (items == NULL) { + Py_DECREF(args); + return NULL; + } + iter = PyObject_GetIter(items); + if (iter == NULL) { + Py_DECREF(items); + Py_DECREF(args); + return NULL; + } + result = PyTuple_Pack(5, Py_TYPE(ctr), args, + Py_None, Py_None, iter); + Py_DECREF(iter); + Py_DECREF(items); + Py_DECREF(args); + return result; +} + +static PyMethodDef counter_methods[] = { + {"__missing__", (PyCFunction)counter_missing, METH_O, + counter_missing_doc}, + {"copy", (PyCFunction)counter_copy, METH_NOARGS, + counter_copy_doc}, + {"__copy__", (PyCFunction)counter_copy, METH_NOARGS, + counter_copy_doc}, + {"__reduce__", (PyCFunction)counter_reduce, METH_NOARGS, + reduce_doc}, + {"update", (PyCFunction)counter_update, METH_VARARGS | METH_KEYWORDS, + counter_update_doc}, + {"update_fromsubs",(PyCFunction)counter_update_fromsubs, + METH_VARARGS | METH_KEYWORDS, counter_update_fromsubs_doc}, + {NULL} +}; + +static void +counter_dealloc(counterobject *ctr) +{ + PyDict_Type.tp_dealloc((PyObject *)ctr); +} + +static int +counter_traverse(PyObject *self, visitproc visit, void *arg) +{ + return PyDict_Type.tp_traverse(self, visit,arg); +} + +static int +counter_tp_clear(counterobject *ctr) +{ + return PyDict_Type.tp_clear((PyObject *)ctr); +} + +static int +counter_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + return counter_update_common(self, args, kwds, "Counter"); +} + +/* See comment in xxsubtype.c */ +#define DEFERRED_ADDRESS(ADDR) 0 + +static PyTypeObject counter_type = { + PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) + "collections._Counter", /* tp_name */ + sizeof(counterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)counter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + counter_traverse, /* tp_traverse */ + (inquiry)counter_tp_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + counter_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + DEFERRED_ADDRESS(&PyDict_Type), /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + counter_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + 0, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, "High performance data structures.\n\ - deque: ordered collection accessible from endpoints only\n\ - defaultdict: dict subclass with a default value factory\n\ +- Counter: dict subclass for counting\n\ "); @@ -1565,5 +1979,12 @@ if (PyType_Ready(&dequereviter_type) < 0) return NULL; + counter_type.tp_base = &PyDict_Type; + if (PyType_Ready(&counter_type) < 0) + return NULL; + Py_INCREF(&counter_type); + PyModule_AddObject(m, "_Counter", (PyObject *)&counter_type); + + return m; }