diff -r aeb3faaf4754 Doc/library/itertools.rst --- a/Doc/library/itertools.rst Mon Sep 02 17:01:10 2013 -0700 +++ b/Doc/library/itertools.rst Tue Sep 03 01:50:59 2013 +0100 @@ -48,6 +48,7 @@ ==================== ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F`` +:func:`.chunk` iterable, n (p0, p1, ..., pn-1) (pn, pn+1, ..., p2n-1) ... ``chunk('ABCDE', 3) --> (A, B, C), (D, E)`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F`` :func:`dropwhile` pred, seq seq[n], seq[n+1], starting when pred fails ``dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1`` :func:`filterfalse` pred, seq elements of seq where pred(elem) is False ``filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8`` @@ -166,6 +167,20 @@ yield element +.. function:: chunk(iterable, n) + + Make an iterator splitting iterable into chunks of n. Chunks are tuples + and the last one can contain less than n (but still more than 0) elements + if there's not enough elements in the iterable to fill whole chunk. + Equivalent to:: + + def chunk(iterable, n): + it = iter(iterable) + while True: + first = next(it) + yield tuple(chain((first,), islice(it, n - 1))) + + .. function:: combinations(iterable, r) Return *r* length subsequences of elements from the input *iterable*. diff -r aeb3faaf4754 Lib/test/test_itertools.py --- a/Lib/test/test_itertools.py Mon Sep 02 17:01:10 2013 -0700 +++ b/Lib/test/test_itertools.py Tue Sep 03 01:50:59 2013 +0100 @@ -179,6 +179,33 @@ self.assertRaises(TypeError, list, oper(chain(2, 3))) self.pickletest(chain('abc', 'def'), compare=list('abcdef')) + def test_chunk(self): + + def chunk2(iterable, n): + it = iter(iterable) + if not isinstance(n, int): + raise TypeError('n must be an integer') + if n < 1: + raise ValueError('n must be at least 1') + + def g(): + while True: + first = next(it) + yield tuple(chain((first,), islice(it, n - 1))) + + return g() + + for c in (chunk, chunk2): + self.assertRaises(ValueError, c, 'abc', -1) + self.assertRaises(ValueError, c, 'abc', 0) + self.assertRaises(TypeError, c, 0, 1) + self.assertEqual(list(c([], 1)), []) + self.assertEqual(list(c(range(5), 2)), [(0, 1), (2, 3), (4,)]) + + def test_chunk_reducible(self): + self.pickletest(chunk('ABCDE', 2), + compare=[('A', 'B'), ('C', 'D'), ('E',)]) + def test_combinations(self): self.assertRaises(TypeError, combinations, 'abc') # missing r argument self.assertRaises(TypeError, combinations, 'abc', 2, 1) # too many arguments @@ -1296,6 +1323,7 @@ self.assertRaises(StopIteration, next, f(lambda x:x, [])) self.assertRaises(StopIteration, next, f(lambda x:x, StopNow())) + class TestExamples(unittest.TestCase): def test_accumulate(self): @@ -1319,6 +1347,10 @@ def test_chain_from_iterable(self): self.assertEqual(''.join(chain.from_iterable(['ABC', 'DEF'])), 'ABCDEF') + def test_chunk(self): + self.assertEqual([''.join(chunk_) for chunk_ in chunk('ABCDE', 3)], + ['ABC', 'DE']) + def test_combinations(self): self.assertEqual(list(combinations('ABCD', 2)), [('A','B'), ('A','C'), ('A','D'), ('B','C'), ('B','D'), ('C','D')]) @@ -1404,6 +1436,10 @@ a = [] self.makecycle(accumulate([1,2,a,3]), a) + def test_chunk(self): + a = [] + self.makecycle(chunk(a, 1), a) + def test_chain(self): a = [] self.makecycle(chain(a), a) @@ -1593,6 +1629,15 @@ self.assertRaises(TypeError, list, chain(N(s))) self.assertRaises(ZeroDivisionError, list, chain(E(s))) + def test_chunk(self): + for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)): + for g in (G, I, Ig, S, L, R): + self.assertEqual(list(chain.from_iterable(chunk(g(s), 3))), + list(g(s))) + self.assertRaises(TypeError, chunk, X(s), 2) + self.assertRaises(TypeError, chunk, N(s), 2) + self.assertRaises(ZeroDivisionError, list, chunk(E(s), 2)) + def test_compress(self): for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)): n = len(s) diff -r aeb3faaf4754 Modules/itertoolsmodule.c --- a/Modules/itertoolsmodule.c Mon Sep 02 17:01:10 2013 -0700 +++ b/Modules/itertoolsmodule.c Tue Sep 03 01:50:59 2013 +0100 @@ -4447,6 +4447,180 @@ PyObject_GC_Del, /* tp_free */ }; + +/* chunk object ***********************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *iterator; + int n; +} chunkobject; + +static PyTypeObject chunk_type; + +static PyObject * +chunk_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + static char *kwargs[] = {"iterable", "n", NULL}; + chunkobject *o; + PyObject *iterable; + int n; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i", kwargs, + &iterable, &n)) { + return NULL; + } + + if (n < 1) { + PyErr_SetString(PyExc_ValueError, "n has to be at least 1"); + return NULL; + } + + o = (chunkobject *)type->tp_alloc(type, 0); + if (o == NULL) { + return NULL; + } + + o->iterator = PyObject_GetIter(iterable); + if (o->iterator == NULL) { + Py_DECREF(o); + return NULL; + } + + Py_INCREF(o->iterator); + o->n = n; + return (PyObject *)o; +} + +static void +chunk_dealloc(chunkobject *o) +{ + PyObject_GC_UnTrack(o); + Py_XDECREF(o->iterator); + Py_TYPE(o)->tp_free(o); +} + +static int +chunk_traverse(chunkobject *o, visitproc visit, void *arg) +{ + Py_VISIT(o->iterator); + return 0; +} + +static PyObject * +chunk_next(chunkobject *o) +{ + PyObject *element, *result; + int position = -1; + + result = PyTuple_New(o->n); + + while (position < (o->n - 1) + && ((element = PyIter_Next(o->iterator)) != NULL)) { + PyTuple_SetItem(result, ++position, element); + } + + if (position == -1) { + Py_DECREF(result); + + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_StopIteration, ""); + } + + return NULL; + } + + if (position != o->n - 1) { + _PyTuple_Resize(&result, position + 1); + } + + return result; +} + +static PyObject * +chunk_reduce(chunkobject *o) +{ + return Py_BuildValue("O(Oi)", Py_TYPE(o), o->iterator, o->n); +} + +static PyObject * +chunk_setstate(chunkobject *o, PyObject *state) +{ + PyObject *iterator; + int n; + + if (!PyArg_ParseTuple(state, "Oi", &iterator, &n)) { + return NULL; + } + + o->n = n; + + Py_CLEAR(o->iterator); + o->iterator = iterator; + Py_INCREF(o->iterator); + Py_RETURN_NONE; +} + +static PyMethodDef chunk_methods[] = { + + {"__reduce__", (PyCFunction)chunk_reduce, METH_NOARGS, + reduce_doc}, + {"__setstate__", (PyCFunction)chunk_setstate, METH_O, + setstate_doc}, + {NULL, NULL} /* sentinel */ +}; + +PyDoc_STRVAR(chunk_doc, +"chunk(iterable, n) -> create an iterator which returns\n\ +elements from original iterable grouped in chunks of n.\n"); + +static PyTypeObject chunk_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "itertools.chunk", /* tp_name */ + sizeof(chunkobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)chunk_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + chunk_doc, /* tp_doc */ + (traverseproc)chunk_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)chunk_next, /* tp_iternext */ + chunk_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + chunk_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + + + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, @@ -4513,6 +4687,7 @@ &islice_type, &starmap_type, &chain_type, + &chunk_type, &compress_type, &filterfalse_type, &count_type,