Index: Lib/collections.py =================================================================== --- Lib/collections.py (revision 87132) +++ Lib/collections.py (working copy) @@ -6,7 +6,7 @@ import _abcoll __all__ += _abcoll.__all__ -from _collections import deque, defaultdict +from _collections import deque, defaultdict, _Counter from operator import itemgetter as _itemgetter from keyword import iskeyword as _iskeyword import sys as _sys @@ -334,7 +334,7 @@ ### Counter ######################################################################## -class Counter(dict): +class Counter(_Counter): '''Dict subclass for counting hashable items. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. @@ -385,24 +385,6 @@ # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 - def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - self.update(iterable, **kwds) - - def __missing__(self, key): - 'The count of elements not in the Counter is zero.' - # Needed so that self[missing_item] does not raise KeyError - return 0 - def most_common(self, n=None): '''List the n most common elements and their counts from the most common to the least. If n is None, then list all element counts. @@ -447,41 +429,6 @@ raise NotImplementedError( 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts - # being mixed-in with all of the other counts for a mismash that - # doesn't have a straight-forward interpretation in most counting - # contexts. Instead, we implement straight-addition. Both the inputs - # and outputs are allowed to contain zero and negative counts. - - if iterable is not None: - if isinstance(iterable, Mapping): - if self: - self_get = self.get - for elem, count in iterable.items(): - self[elem] = count + self_get(elem, 0) - else: - dict.update(self, iterable) # fast path when counter is empty - else: - self_get = self.get - for elem in iterable: - self[elem] = 1 + self_get(elem, 0) - if kwds: - self.update(kwds) - def subtract(self, iterable=None, **kwds): '''Like dict.update() but subtracts counts instead of replacing them. Counts can be reduced below zero. Both the inputs and outputs are Index: Lib/test/test_collections.py =================================================================== --- Lib/test/test_collections.py (revision 87132) +++ Lib/test/test_collections.py (working copy) @@ -686,6 +686,15 @@ self.assertEqual(c['d'], 1) self.assertEqual(c.setdefault('e', 5), 5) self.assertEqual(c['e'], 5) + c.update_fromsubs('abab',2) + self.assertEqual(c['ab'], 2) + self.assertEquals(c['ba'], 1) + c.update_fromsubs('cdcd', 2, 2) + self.assertEquals(c['cd'], 2) + self.assertEquals(c['dc'], 0) + c.update_fromsubs('efef', 2, 1, 1, 3) + self.assertEquals(c['ef'], 0) + self.assertEquals(c['fe'], 1) def test_copying(self): # Check that counters are copyable, deepcopyable, picklable, and Index: Modules/_collectionsmodule.c =================================================================== --- Modules/_collectionsmodule.c (revision 87132) +++ Modules/_collectionsmodule.c (working copy) @@ -1488,7 +1488,7 @@ (reprfunc)defdict_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ + 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ @@ -1518,12 +1518,422 @@ PyObject_GC_Del, /* tp_free */ }; +/* counter type **********************************************************/ + +typedef struct { + PyDictObject dict; +} counterobject; + +static PyTypeObject counter_type; /* Forward */ + +PyDoc_STRVAR(counter_missing_doc, +"__missing__(key) # Called by __getitem__ for missing key. Return 0 \n\ +for counter.\n\ +"); + +static PyObject * +counter_missing(counterobject *ctr, PyObject *key) +{ + return PyLong_FromLong(0); +} + +static int +counter_updatefromlistortuple(PyObject *self, PyObject *list) +{ + PyObject **src; + PyObject *one, *obj, *num, *newnum; + Py_ssize_t n, i; + + n = PySequence_Fast_GET_SIZE(list); + one = PyLong_FromLong(1); + src = PySequence_Fast_ITEMS(list); + for (i = 0; i < n; i++) { + obj = src[i]; + num = PyDict_GetItem(self, obj); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, obj, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, obj, one); + } + Py_DECREF(one); + return 0; +} + +static int +counter_updatefromiter(PyObject *self, PyObject *iterable) +{ + PyObject *(*iternext)(PyObject *); + PyObject *one, *it, *num, *newnum, *obj; + + it = PyObject_GetIter(iterable); + if (it == NULL) { + //PyErr_SetString(PyExc_TypeError, "input must be iterable."); + return -1; + } + iternext = *it->ob_type->tp_iternext; + one = PyLong_FromLong(1); + for(;;) { + obj = iternext(it); + if (obj == NULL) { + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) + PyErr_Clear(); + else + goto error; + } + break; + } + num = PyDict_GetItem(self, obj); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, obj, newnum); + Py_DECREF(newnum); + } + else { + PyDict_SetItem(self, obj, one); + } + Py_DECREF(obj); + } + Py_DECREF(it); + Py_DECREF(one); + return 0; + error: + Py_DECREF(it); + Py_DECREF(one); + return -1; +} + +static int +counter_updatefromdict(PyObject *self, PyObject *dict) +{ + if (!Py_SIZE(dict)) + return 0; + if (Py_SIZE(self)) { + PyObject *keys, *iter, *key, *value, *num, *newnum; + + keys = PyDict_Keys(dict); + if (keys == NULL) { + PyErr_SetString(PyExc_ValueError, "dict keys broken"); + return -1; + } + iter = PyObject_GetIter(keys); + Py_DECREF(keys); + if (iter == NULL) { + PyErr_SetString(PyExc_TypeError, "dict keys must be iterable"); + return -1; + } + for (key = PyIter_Next(iter); key; key = PyIter_Next(iter)) { + num = PyDict_GetItem(self, key); + value = PyDict_GetItem(dict,key); + /*if (!PyLong_Check(value)) { + PyErr_SetString(PyExc_ValueError, + "counter values must be integers"); + Py_DECREF(iter); + return -1; + }*/ + if (num != NULL) { + newnum = PyNumber_Add(num, value); + PyDict_SetItem(self, key, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, key, value); + Py_DECREF(key); + if (PyErr_Occurred()) + return -1; + } + return 0; + } + else /* Nothing in self. Just merge */ + return PyDict_Merge(self, dict, 1); +} + +static int +counter_update_common(PyObject *self, PyObject *args, PyObject *kwds, char *methname) +{ + PyObject *arg = NULL; + int result = 0; + + if (!PyArg_UnpackTuple(args, methname, 0, 1, &arg)) + result = -1; + else if (arg != NULL) { + if (PyList_Check(arg) || PyTuple_Check(arg)) + result = counter_updatefromlistortuple(self, arg); + else if (PyDict_Check(arg)) + result = counter_updatefromdict(self, arg); + else + result = counter_updatefromiter(self, arg); + } + if (result == 0 && kwds != NULL) { + if (PyArg_ValidateKeywordArguments(kwds)) + return counter_updatefromdict(self, kwds); + else + return -1; + } + return result; +} + +PyDoc_STRVAR(counter_update_doc, +"update(arg, keywords) # Update the Counter with a list, tuple, iterator, \n\ +dictionary, or Counter.\n\ +"); + +static PyObject * +counter_update(PyObject *self, PyObject *args, PyObject *kwds) +{ + if (counter_update_common(self, args, kwds, "update") != -1) + Py_RETURN_NONE; + return NULL; +} + +PyDoc_STRVAR(counter_update_fromsubs_doc, +"update_fromsubs(seq, frame[, step[, lo[, hi]]])\n\ +Counts subarrays in the sequence seq.\n\ +frame is the length of the subarrays.\n\ +step is the step to move ahead after each subarray.\n\ +lo and hi are the respective starting and ending indices.\n\ +"); + +static PyObject * +counter_update_fromsubs(PyObject *self, PyObject *args, PyObject *kwds) +{ + PyObject *seq = NULL; + PyObject *it, *sub, *num, *newnum, *one; + PyObject *(*iternext)(PyObject *); + Py_ssize_t lo = 0; + Py_ssize_t hi = -1; + Py_ssize_t frame = 0; + Py_ssize_t step = 1; + Py_ssize_t ssize, i; + static char *keywords[] = {"seq", "frame", "step", "lo", "hi", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "Oi|iii:update_fromsubs", + keywords, &seq, &frame, &step, &lo, &hi)) + return NULL; + if (seq == NULL || frame < 2 || !PySequence_Check(seq)) + return NULL; + ssize = Py_SIZE(seq); + if (ssize < 2) + return NULL; + if (lo < 0) { + lo += ssize; + if (lo < 0) + return NULL; + } + if (lo >= ssize) + return NULL; + if (hi < 0) + hi += ssize + 1; + if (hi < 3 || hi > ssize) + return NULL; + if (step < 1) + return NULL; + it = PyObject_GetIter(seq); + if (it == NULL) + return NULL; + iternext = *it->ob_type->tp_iternext; + hi -= (frame - 1); + one = PyLong_FromLong(1); + if (step == 1) { /* fast track for step = 1 */ + for (i = lo; i < hi; ++i) { + sub = PySequence_GetSlice(seq, i, i + frame); + num = PyDict_GetItem(self, sub); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, sub, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, sub, one); + Py_DECREF(sub); + if (PyErr_Occurred() != NULL) + goto error; + } + } + else { + for (i = lo; i < hi; i += step) { + sub = PySequence_GetSlice(seq, i, i + frame); + num = PyDict_GetItem(self, sub); + if (num != NULL) { + newnum = PyNumber_Add(num, one); + PyDict_SetItem(self, sub, newnum); + Py_DECREF(newnum); + } + else + PyDict_SetItem(self, sub, one); + Py_DECREF(sub); + if (PyErr_Occurred() != NULL) + goto error; + } + } + Py_DECREF(one); + Py_DECREF(it); + Py_RETURN_NONE; + error: + Py_DECREF(one); + Py_DECREF(it); + return NULL; +} + +PyDoc_STRVAR(counter_copy_doc, "D.copy() -> a shallow copy of D."); + +static PyObject * +counter_copy(counterobject *ctr) +{ + /* This calls the object's class. That only works for subclasses + whose class constructor has the same signature. Subclasses that + define a different constructor signature must override copy(). + */ + + return PyObject_CallFunctionObjArgs((PyObject*)Py_TYPE(ctr), + ctr, NULL); +} + +static PyObject * +counter_reduce(counterobject *ctr) +{ + /* __reduce__ must return a 5-tuple as follows: + + - factory function + - tuple of args for the factory function + - additional state (here None) + - sequence iterator (here None) + - dictionary iterator (yielding successive (key, value) pairs + + This API is used by pickle.py and copy.py. + + For this to be useful with pickle.py, the default_factory + must be picklable; e.g., None, a built-in, or a global + function in a module or package. + + Both shallow and deep copying are supported, but for deep + copying, the default_factory must be deep-copyable; e.g. None, + or a built-in (functions are not copyable at this time). + + This only works for subclasses as long as their constructor + signature is compatible; the first argument must be the + optional default_factory, defaulting to None. + */ + PyObject *args = PyTuple_New(0); + PyObject *items, *iter, *result; + + items = PyObject_CallMethod((PyObject *)ctr, "items", "()"); + if (items == NULL) { + Py_DECREF(args); + return NULL; + } + iter = PyObject_GetIter(items); + if (iter == NULL) { + Py_DECREF(items); + Py_DECREF(args); + return NULL; + } + result = PyTuple_Pack(5, Py_TYPE(ctr), args, + Py_None, Py_None, iter); + Py_DECREF(iter); + Py_DECREF(items); + Py_DECREF(args); + return result; +} + +static PyMethodDef counter_methods[] = { + {"__missing__", (PyCFunction)counter_missing, METH_O, + counter_missing_doc}, + {"copy", (PyCFunction)counter_copy, METH_NOARGS, + counter_copy_doc}, + {"__copy__", (PyCFunction)counter_copy, METH_NOARGS, + counter_copy_doc}, + {"__reduce__", (PyCFunction)counter_reduce, METH_NOARGS, + reduce_doc}, + {"update", (PyCFunction)counter_update, METH_VARARGS | METH_KEYWORDS, + counter_update_doc}, + {"update_fromsubs",(PyCFunction)counter_update_fromsubs, + METH_VARARGS | METH_KEYWORDS, counter_update_fromsubs_doc}, + {NULL} +}; + +static void +counter_dealloc(counterobject *ctr) +{ + PyDict_Type.tp_dealloc((PyObject *)ctr); +} + +static int +counter_traverse(PyObject *self, visitproc visit, void *arg) +{ + return PyDict_Type.tp_traverse(self, visit,arg); +} + +static int +counter_tp_clear(counterobject *ctr) +{ + return PyDict_Type.tp_clear((PyObject *)ctr); +} + +static int +counter_init(PyObject *self, PyObject *args, PyObject *kwds) +{ + return counter_update_common(self, args, kwds, "Counter"); +} + +/* See comment in xxsubtype.c */ +#define DEFERRED_ADDRESS(ADDR) 0 + +static PyTypeObject counter_type = { + PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0) + "collections._Counter", /* tp_name */ + sizeof(counterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)counter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + /* tp_flags */ + 0, /* tp_doc */ + counter_traverse, /* tp_traverse */ + (inquiry)counter_tp_clear, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset*/ + 0, /* tp_iter */ + 0, /* tp_iternext */ + counter_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + DEFERRED_ADDRESS(&PyDict_Type), /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + counter_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + 0, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, "High performance data structures.\n\ - deque: ordered collection accessible from endpoints only\n\ - defaultdict: dict subclass with a default value factory\n\ +- Counter: dict subclass for counting\n\ "); @@ -1565,5 +1975,12 @@ if (PyType_Ready(&dequereviter_type) < 0) return NULL; + counter_type.tp_base = &PyDict_Type; + if (PyType_Ready(&counter_type) < 0) + return NULL; + Py_INCREF(&counter_type); + PyModule_AddObject(m, "_Counter", (PyObject *)&counter_type); + + return m; }