diff -r 51ce9830d85a Lib/test/test_bytes.py --- a/Lib/test/test_bytes.py Sat Oct 13 11:58:23 2012 -0400 +++ b/Lib/test/test_bytes.py Sun Oct 14 01:06:42 2012 +0300 @@ -288,8 +288,22 @@ self.assertEqual(self.type2test(b"").join(lst), b"abc") self.assertEqual(self.type2test(b"").join(tuple(lst)), b"abc") self.assertEqual(self.type2test(b"").join(iter(lst)), b"abc") - self.assertEqual(self.type2test(b".").join([b"ab", b"cd"]), b"ab.cd") - # XXX more... + dot_join = self.type2test(b".:").join + self.assertEqual(dot_join([b"ab", b"cd"]), b"ab.:cd") + self.assertEqual(dot_join([memoryview(b"ab"), b"cd"]), b"ab.:cd") + self.assertEqual(dot_join([b"ab", memoryview(b"cd")]), b"ab.:cd") + self.assertEqual(dot_join([bytearray(b"ab"), b"cd"]), b"ab.:cd") + self.assertEqual(dot_join([b"ab", bytearray(b"cd")]), b"ab.:cd") + # Stress it with many items + seq = [b"abc"] * 1000 + expected = b"abc" + b".:abc" * 999 + self.assertEqual(dot_join(seq), expected) + # Error handling and cleanup when some item in the middle of the + # sequence has the wrong type. + with self.assertRaises(TypeError): + dot_join([bytearray(b"ab"), "cd", b"ef"]) + with self.assertRaises(TypeError): + dot_join([memoryview(b"ab"), "cd", b"ef"]) def test_count(self): b = self.type2test(b'mississippi') diff -r 51ce9830d85a Objects/bytearrayobject.c --- a/Objects/bytearrayobject.c Sat Oct 13 11:58:23 2012 -0400 +++ b/Objects/bytearrayobject.c Sun Oct 14 01:06:42 2012 +0300 @@ -2569,75 +2569,126 @@ in between each pair, and return the result as a new bytearray."); static PyObject * -bytearray_join(PyByteArrayObject *self, PyObject *it) +bytearray_join(PyObject *self, PyObject *orig) { - PyObject *seq; - Py_ssize_t mysize = Py_SIZE(self); - Py_ssize_t i; - Py_ssize_t n; - PyObject **items; - Py_ssize_t totalsize = 0; - PyObject *result; - char *dest; - - seq = PySequence_Fast(it, "can only join an iterable"); - if (seq == NULL) + char *sep = PyByteArray_AS_STRING(self); + const Py_ssize_t seplen = PyByteArray_GET_SIZE(self); + PyObject *res = NULL; + char *p; + Py_ssize_t seqlen = 0; + Py_ssize_t sz = 0; + Py_ssize_t i, j, nitems, nbufs; + PyObject *seq, *item; + Py_buffer *buffers = NULL; +#define NB_STATIC_BUFFERS 10 + Py_buffer static_buffers[NB_STATIC_BUFFERS]; + + seq = PySequence_Fast(orig, "can only join an iterable"); + if (seq == NULL) { return NULL; - n = PySequence_Fast_GET_SIZE(seq); - items = PySequence_Fast_ITEMS(seq); - - /* Compute the total size, and check that they are all bytes */ - /* XXX Shouldn't we use _getbuffer() on these items instead? */ - for (i = 0; i < n; i++) { - PyObject *obj = items[i]; - if (!PyByteArray_Check(obj) && !PyBytes_Check(obj)) { - PyErr_Format(PyExc_TypeError, - "can only join an iterable of bytes " - "(item %ld has type '%.100s')", - /* XXX %ld isn't right on Win64 */ - (long)i, Py_TYPE(obj)->tp_name); + } + + seqlen = PySequence_Size(seq); + if (seqlen == 0) { + Py_DECREF(seq); + return PyByteArray_FromStringAndSize("", 0); + } + if (seqlen > NB_STATIC_BUFFERS) { + buffers = PyMem_NEW(Py_buffer, seqlen); + if (buffers == NULL) { + Py_DECREF(seq); + return NULL; + } + } + else { + buffers = static_buffers; + } + + /* There is at least one thing to join. + * Do a pre-pass to figure out the total amount of space we'll + * need (sz), and see whether all arguments are buffer-compatible. + */ + for (i = nitems = nbufs = 0; i < seqlen; i++) { + Py_ssize_t itemlen; + item = PySequence_Fast_GET_ITEM(seq, i); + if (PyBytes_CheckExact(item)) { + Py_INCREF(item); + itemlen = PyBytes_GET_SIZE(item); + } + else { + if (_getbuffer(item, &buffers[nbufs]) < 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected bytes, bytearray, " + "or an object with a buffer interface, %.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + itemlen = buffers[nbufs].len; + nbufs++; + } + nitems = i + 1; /* for error cleanup */ + if (itemlen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long for bytes"); goto error; } - if (i > 0) - totalsize += mysize; - totalsize += Py_SIZE(obj); - if (totalsize < 0) { - PyErr_NoMemory(); - goto error; + sz += itemlen; + if (i != 0) { + if (seplen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long for bytes"); + goto error; + } + sz += seplen; } } - /* Allocate the result, and copy the bytes */ - result = PyByteArray_FromStringAndSize(NULL, totalsize); - if (result == NULL) + /* Allocate result space. */ + res = PyByteArray_FromStringAndSize((char *) NULL, sz); + if (res == NULL) goto error; - dest = PyByteArray_AS_STRING(result); - for (i = 0; i < n; i++) { - PyObject *obj = items[i]; - Py_ssize_t size = Py_SIZE(obj); - char *buf; - if (PyByteArray_Check(obj)) - buf = PyByteArray_AS_STRING(obj); + + /* Catenate everything. */ + p = PyByteArray_AS_STRING(res); + for (i = j = 0; i < seqlen; ++i) { + Py_ssize_t n; + char *q; + if (i) { + Py_MEMCPY(p, sep, seplen); + p += seplen; + } + item = PySequence_Fast_GET_ITEM(seq, i); + if (j < nbufs && buffers[i].obj == item) { + n = buffers[j].len; + q = buffers[j].buf; + j++; + } + else { + n = PyBytes_GET_SIZE(item); + q = PyBytes_AS_STRING(item); + } + Py_MEMCPY(p, q, n); + p += n; + } + goto done; + +error: + res = NULL; +done: + Py_DECREF(seq); + for (i = j = 0; i < nitems; i++) { + item = PySequence_Fast_GET_ITEM(seq, i); + if (j < nbufs && buffers[i].obj == item) + PyBuffer_Release(&buffers[j++]); else - buf = PyBytes_AS_STRING(obj); - if (i) { - memcpy(dest, self->ob_bytes, mysize); - dest += mysize; - } - memcpy(dest, buf, size); - dest += size; + Py_DECREF(item); } - - /* Done */ - Py_DECREF(seq); - return result; - - /* Error handling */ - error: - Py_DECREF(seq); - return NULL; + if (buffers != static_buffers) + PyMem_FREE(buffers); + return res; } + PyDoc_STRVAR(splitlines__doc__, "B.splitlines([keepends]) -> list of lines\n\ \n\ diff -r 51ce9830d85a Objects/bytesobject.c --- a/Objects/bytesobject.c Sat Oct 13 11:58:23 2012 -0400 +++ b/Objects/bytesobject.c Sun Oct 14 01:06:42 2012 +0300 @@ -1114,11 +1114,14 @@ PyObject *res = NULL; char *p; Py_ssize_t seqlen = 0; - size_t sz = 0; - Py_ssize_t i; + Py_ssize_t sz = 0; + Py_ssize_t i, j, nitems, nbufs; PyObject *seq, *item; - - seq = PySequence_Fast(orig, ""); + Py_buffer *buffers = NULL; +#define NB_STATIC_BUFFERS 10 + Py_buffer static_buffers[NB_STATIC_BUFFERS]; + + seq = PySequence_Fast(orig, "can only join an iterable"); if (seq == NULL) { return NULL; } @@ -1136,64 +1139,99 @@ return item; } } - - /* There are at least two things to join, or else we have a subclass - * of the builtin types in the sequence. - * Do a pre-pass to figure out the total amount of space we'll - * need (sz), and see whether all argument are bytes. - */ - /* XXX Shouldn't we use _getbuffer() on these items instead? */ - for (i = 0; i < seqlen; i++) { - const size_t old_sz = sz; - item = PySequence_Fast_GET_ITEM(seq, i); - if (!PyBytes_Check(item) && !PyByteArray_Check(item)) { - PyErr_Format(PyExc_TypeError, - "sequence item %zd: expected bytes," - " %.80s found", - i, Py_TYPE(item)->tp_name); - Py_DECREF(seq); - return NULL; - } - sz += Py_SIZE(item); - if (i != 0) - sz += seplen; - if (sz < old_sz || sz > PY_SSIZE_T_MAX) { - PyErr_SetString(PyExc_OverflowError, - "join() result is too long for bytes"); + if (seqlen > NB_STATIC_BUFFERS) { + buffers = PyMem_NEW(Py_buffer, seqlen); + if (buffers == NULL) { Py_DECREF(seq); return NULL; } } + else { + buffers = static_buffers; + } + + /* There are at least two things to join, or else we have a subclass + * of the builtin types in the sequence. + * Do a pre-pass to figure out the total amount of space we'll + * need (sz), and see whether all arguments are buffer-compatible. + */ + for (i = nitems = nbufs = 0; i < seqlen; i++) { + Py_ssize_t itemlen; + item = PySequence_Fast_GET_ITEM(seq, i); + if (PyBytes_CheckExact(item)) { + Py_INCREF(item); + itemlen = PyBytes_GET_SIZE(item); + } + else { + if (_getbuffer(item, &buffers[nbufs]) < 0) { + PyErr_Format(PyExc_TypeError, + "sequence item %zd: expected bytes, bytearray, " + "or an object with a buffer interface, %.80s found", + i, Py_TYPE(item)->tp_name); + goto error; + } + itemlen = buffers[nbufs].len; + nbufs++; + } + nitems = i + 1; /* for error cleanup */ + if (itemlen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long for bytes"); + goto error; + } + sz += itemlen; + if (i != 0) { + if (seplen > PY_SSIZE_T_MAX - sz) { + PyErr_SetString(PyExc_OverflowError, + "join() result is too long for bytes"); + goto error; + } + sz += seplen; + } + } /* Allocate result space. */ - res = PyBytes_FromStringAndSize((char*)NULL, sz); - if (res == NULL) { - Py_DECREF(seq); - return NULL; - } + res = PyBytes_FromStringAndSize((char *) NULL, sz); + if (res == NULL) + goto error; /* Catenate everything. */ - /* I'm not worried about a PyByteArray item growing because there's - nowhere in this function where we release the GIL. */ p = PyBytes_AS_STRING(res); - for (i = 0; i < seqlen; ++i) { - size_t n; + for (i = j = 0; i < seqlen; ++i) { + Py_ssize_t n; char *q; if (i) { Py_MEMCPY(p, sep, seplen); p += seplen; } item = PySequence_Fast_GET_ITEM(seq, i); - n = Py_SIZE(item); - if (PyBytes_Check(item)) + if (j < nbufs && buffers[i].obj == item) { + n = buffers[j].len; + q = buffers[j].buf; + j++; + } + else { + n = PyBytes_GET_SIZE(item); q = PyBytes_AS_STRING(item); - else - q = PyByteArray_AS_STRING(item); + } Py_MEMCPY(p, q, n); p += n; } - + goto done; + +error: + res = NULL; +done: Py_DECREF(seq); + for (i = j = 0; i < nitems; i++) { + item = PySequence_Fast_GET_ITEM(seq, i); + if (j < nbufs && buffers[i].obj == item) + PyBuffer_Release(&buffers[j++]); + else + Py_DECREF(item); + } + if (buffers != static_buffers) + PyMem_FREE(buffers); return res; }