diff -r b5ccdf7c032a Lib/test/test_bytes.py --- a/Lib/test/test_bytes.py Sun Aug 21 00:39:18 2011 +0200 +++ b/Lib/test/test_bytes.py Sun Aug 21 16:32:09 2011 -0700 @@ -288,8 +288,16 @@ self.assertEqual(self.type2test(b"").join(lst), b"abc") self.assertEqual(self.type2test(b"").join(tuple(lst)), b"abc") self.assertEqual(self.type2test(b"").join(iter(lst)), b"abc") - self.assertEqual(self.type2test(b".").join([b"ab", b"cd"]), b"ab.cd") - # XXX more... + + abcd = [b"a", b"b", b"c", b"d"] + for i in range(1, 7): + sep = b"." * i + self.assertEqual(self.type2test(sep).join(abcd), + b"a" + sep + b"b" + sep + b"c" + sep + b"d") + + self.assertRaises(TypeError, self.type2test(b"").join, ['abcd']) + self.assertRaises(TypeError, self.type2test(b"").join, 123) + def test_count(self): b = self.type2test(b'mississippi') diff -r b5ccdf7c032a Objects/bytesobject.c --- a/Objects/bytesobject.c Sun Aug 21 00:39:18 2011 +0200 +++ b/Objects/bytesobject.c Sun Aug 21 16:32:09 2011 -0700 @@ -1138,6 +1138,11 @@ seqlen = PySequence_Size(seq); if (seqlen == 0) { + if (seplen == 0 && PyBytes_CheckExact(self)) { + Py_DECREF(seq); + Py_INCREF(self); + return (PyObject *)self; + } Py_DECREF(seq); return PyBytes_FromString(""); } @@ -1189,21 +1194,56 @@ /* I'm not worried about a PyByteArray item growing because there's nowhere in this function where we release the GIL. */ p = PyBytes_AS_STRING(res); - for (i = 0; i < seqlen; ++i) { + + /* this is a stupid optimization but saves time for the following cases: + * + * 1. seplen == 0 + * 2. seplen == 1 + * 3. seplen > 1 and all bytes in sep are the same + */ + int first_pass = 0; + switch (seplen) { + case 6: if (sep[5] != sep[4]) break; + case 5: if (sep[4] != sep[3]) break; + case 4: if (sep[3] != sep[2]) break; + case 3: if (sep[2] != sep[1]) break; + case 2: if (sep[1] != sep[0]) break; + case 1: memset(p, sep[0], sz); + case 0: first_pass = 1; + } + if (first_pass) { + /* already wrote the separator where it should be. + * use a faster loop */ + const char *q; size_t n; - char *q; - if (i) { - Py_MEMCPY(p, sep, seplen); - p += seplen; + for (i = 0; i < seqlen; ++i) { + item = PySequence_Fast_GET_ITEM(seq, i); + n = Py_SIZE(item); + if (PyBytes_Check(item)) + q = PyBytes_AS_STRING(item); + else + q = PyByteArray_AS_STRING(item); + Py_MEMCPY(p, q, n); + p += n + seplen; } - item = PySequence_Fast_GET_ITEM(seq, i); - n = Py_SIZE(item); - if (PyBytes_Check(item)) - q = PyBytes_AS_STRING(item); - else - q = PyByteArray_AS_STRING(item); - Py_MEMCPY(p, q, n); - p += n; + } + else { + const char *q; + size_t n; + for (i = 0; i < seqlen; ++i) { + if (i) { + Py_MEMCPY(p, sep, seplen); + p += seplen; + } + item = PySequence_Fast_GET_ITEM(seq, i); + n = Py_SIZE(item); + if (PyBytes_Check(item)) + q = PyBytes_AS_STRING(item); + else + q = PyByteArray_AS_STRING(item); + Py_MEMCPY(p, q, n); + p += n; + } } Py_DECREF(seq);