Index: Lib/test/test_itertools.py =================================================================== --- Lib/test/test_itertools.py (revision 59186) +++ Lib/test/test_itertools.py (working copy) @@ -478,6 +478,29 @@ for f in (ifilter, ifilterfalse, imap, takewhile, dropwhile, starmap): self.assertRaises(StopIteration, f(lambda x:x, []).next) self.assertRaises(StopIteration, f(lambda x:x, StopNow()).next) + self.assertRaises(StopIteration, chunkify([], 1).next) + self.assertRaises(StopIteration, chunkify([], 1, pad=None).next) + def test_chunkify(self): + #invalid chunksize + self.assertRaises(ValueError, chunkify, [], -1) + #contradictory kwargs + self.assertRaises(ValueError, chunkify, [], 0, partial=True, pad=True) + #non-iterable argument + self.assertRaises(TypeError, chunkify, 10, 1) + self.assertEqual([["a"], ["b"], ["c"]], list(chunkify("abc", 1))) + for item in chunkify("abc" * 100, 3): + self.assertEqual(len(item), 3) + self.assertEqual([["a", "b", "c"]], list(chunkify("abcd", 3))) + self.assertEqual([], chunkify([], 0).next()) + + self.assertEqual([["a", "b", "c"], ["d"]], list(chunkify("abcd", 3, partial=True))) + self.assertEqual([["a", "b", "c"]], list(chunkify("abc", 3, partial=True))) + self.assertEqual([["a", "b", "c"], ["d", None, None]], list(chunkify("abcd", 3, pad=None))) + + #test with inifinite iterator + chunks = chunkify(repeat(None), 4) + for ii in xrange(0, 10): + self.assertEqual([None]*4, chunks.next()) class TestGC(unittest.TestCase): @@ -533,6 +556,10 @@ def test_takewhile(self): a = [] self.makecycle(takewhile(bool, [1, 0, a, a]), a) + + def test_chunkify(self): + a = [] + self.makecycle(chunkify(a, 1), a) def R(seqn): 'Regular generator' @@ -734,6 +761,17 @@ self.assertRaises(TypeError, tee, X(s)) self.assertRaises(TypeError, list, tee(N(s))[0]) self.assertRaises(ZeroDivisionError, list, tee(E(s))[0]) + + def test_chunkify(self): + for s in ("123", "", range(1000), ('do', 1.2), xrange(2000,2200,5)): + for g in (G, I, Ig, S, L, R): + self.assertEqual( + list(chunkify(g(s), 1)), + list([v] for v in g(s)) + ) + self.assertRaises(TypeError, chunkify, X(s)) + self.assertRaises(TypeError, list, chunkify(N(s), 1)) + self.assertRaises(ZeroDivisionError, list, chunkify(E(s), 1)) class LengthTransparency(unittest.TestCase): @@ -807,7 +845,7 @@ def test_keywords_in_subclass(self): # count is not subclassable... for cls in (repeat, izip, ifilter, ifilterfalse, chain, imap, - starmap, islice, takewhile, dropwhile, cycle): + starmap, islice, takewhile, dropwhile, cycle, chunkify): class Subclass(cls): def __init__(self, newarg=None, *args): cls.__init__(self, *args) Index: Modules/itertoolsmodule.c =================================================================== --- Modules/itertoolsmodule.c (revision 59186) +++ Modules/itertoolsmodule.c (working copy) @@ -2745,6 +2745,163 @@ PyObject_GC_Del, /* tp_free */ }; +/* chunkify object ***********************************************************/ + +typedef struct { + PyObject_HEAD + Py_ssize_t chunksize; + PyObject *iter; /* iterator */ + char partial; /* return partial blocks */ + PyObject *pad; /* value to pad partial blocks with */ +} chunkifyobject; + +static PyObject * +chunkify_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + chunkifyobject *co; + PyObject *iter; + Py_ssize_t chunksize = -1; + char partial = 0; + PyObject *pad = NULL; + static char *kwlist[] = {"iter", "chunksize", "pad", "partial", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "On|Ob:chunkify", kwlist, + &iter, &chunksize, &pad, &partial) + ) + return NULL; + + if (partial && pad) { + PyErr_SetString(PyExc_ValueError, "can't have both pad and partial set"); + return NULL; + } + if (chunksize < 0) { + PyErr_SetString(PyExc_ValueError, "chunksize must not be negative"); + return NULL; + } + + iter = PyObject_GetIter(iter); + if (iter ==NULL) + return NULL; + + co = (chunkifyobject *)type->tp_alloc(type, 0); + if (co == NULL) + return NULL; + ; + co->iter = iter; + co->chunksize = chunksize; + co->partial = !!partial; + co->pad = pad; + Py_XINCREF(co->pad); + return (PyObject *)co; +}; + +static void +chunkify_dealloc(chunkifyobject *co) +{ + PyObject_GC_UnTrack(co); + Py_DECREF(co->iter); + Py_XDECREF(co->pad); + co->ob_type->tp_free(co); +}; + +static PyObject * +chunkify_next(chunkifyobject *co) +{ + Py_ssize_t ii; + PyObject *item; + PyObject *rv; + assert(co->iter && PyIter_Check(co->iter) && co->chunksize >= 0); + rv = PyList_New(0); + if (rv == NULL) + return NULL; + for (ii = 0; ii < co->chunksize; ii++) { + item = PyIter_Next(co->iter); + if (item == NULL) {// early extinguish of the iterator + // let the partial list be returned + if (co->partial && ii > 0) { + break; + // pad out the partial list + } else if (co->pad && ii > 0) { + for (;ii < co->chunksize;ii++) { + PyList_Append(rv, co->pad); + } + break; + //discard the partial list and return + } else { + Py_DECREF(rv); + return NULL; + }; + } + PyList_Append(rv, item); + Py_DECREF(item); + } + return rv; +} + +static int +chunkify_traverse(chunkifyobject *co, visitproc visit, void *arg) +{ + Py_VISIT(co->iter); + Py_VISIT(co->pad); + return 0; +} + +PyDoc_STRVAR(chunkify_doc, + "chunkify(iterable, chunksize, [pad], [partial=True]) -> \n" + "Iterates over the passed iterable, yielding lists of\n" + "chunksize blocks. If chunksize is 0, will yield infinite\n" + "amounts of zero length lists. If pad is true, any partial\n" + "chunk will be padded with the vali in pad. If partial \n" + "is true, then any partial chunks will be yielded as-is.\n" + "partial and true are mutually exclusive. If neither is set\n" + "partial chunks will be discarded." + ); + +static PyTypeObject chunkify_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "itertools.chunkify", /* tp_name */ + sizeof(chunkifyobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)chunkify_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + chunkify_doc, /* tp_doc */ + (traverseproc)chunkify_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)chunkify_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + chunkify_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, @@ -2769,7 +2926,7 @@ takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\ dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\ groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\ -"); +chunkify(iterable, blocksize[, pad, partial=FALSE]) --> subroups of blocksize in length\n"); static PyMethodDef module_methods[] = { @@ -2798,6 +2955,7 @@ &iziplongest_type, &repeat_type, &groupby_type, + &chunkify_type, NULL };