Index: Objects/bytesobject.c =================================================================== --- Objects/bytesobject.c (revision 58048) +++ Objects/bytesobject.c (working copy) @@ -2104,7 +2104,7 @@ Py_LOCAL_INLINE(PyObject *) split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) { - register Py_ssize_t i, j, count=0; + register Py_ssize_t i, j, count = 0; PyObject *str; PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); @@ -2113,7 +2113,7 @@ i = j = 0; while ((j < len) && (maxcount-- > 0)) { - for(; j list of bytes\n\ +"B.split([sep [, maxsplit]]) -> list of bytes\n\ \n\ Return a list of the bytes in the string B, using sep as the\n\ -delimiter. If maxsplit is given, at most maxsplit\n\ -splits are done."); +delimiter. If sep is not given, B is split on ASCII whitespace\n\ +characters (space, tab, return, newline, formfeed, vertical tab).\n\ +If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytes_split(PyBytesObject *self, PyObject *args) { Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count=0; + Py_ssize_t maxsplit = -1, count = 0; const char *s = PyBytes_AS_STRING(self), *sub; - PyObject *list, *str, *subobj; + PyObject *list, *str, *subobj = Py_None; #ifdef USE_FAST Py_ssize_t pos; #endif - if (!PyArg_ParseTuple(args, "O|n:split", &subobj, &maxsplit)) + if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; + + if (subobj == Py_None) + return split_whitespace(s, len, maxsplit); + if (PyBytes_Check(subobj)) { sub = PyBytes_AS_STRING(subobj); n = PyBytes_GET_SIZE(subobj); @@ -2167,7 +2211,7 @@ PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; } - else if (n == 1) + if (n == 1) return split_char(s, len, sub[0], maxsplit); list = PyList_New(PREALLOC_SIZE(maxsplit)); @@ -2293,26 +2337,71 @@ return NULL; } +Py_LOCAL_INLINE(PyObject *) +rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxcount) +{ + register Py_ssize_t i, j, count = 0; + PyObject *str; + PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); + + if (list == NULL) + return NULL; + + for (i = j = len - 1; i >= 0; ) { + /* find a token */ + while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) + i--; + j = i; + while (i >= 0 && !Py_UNICODE_ISSPACE(s[i])) + i--; + if (j > i) { + if (maxcount-- <= 0) + break; + SPLIT_ADD(s, i + 1, j + 1); + while (i >= 0 && Py_UNICODE_ISSPACE(s[i])) + i--; + j = i; + } + } + if (j >= 0) { + SPLIT_ADD(s, 0, j + 1); + } + FIX_PREALLOC_SIZE(list); + if (PyList_Reverse(list) < 0) + goto onError; + + return list; + + onError: + Py_DECREF(list); + return NULL; +} + PyDoc_STRVAR(rsplit__doc__, "B.rsplit(sep [,maxsplit]) -> list of bytes\n\ \n\ Return a list of the sections in the byte B, using sep as the\n\ delimiter, starting at the end of the bytes and working\n\ -to the front. If maxsplit is given, at most maxsplit splits are\n\ -done."); +to the front. If sep is not given, B is split on ASCII whitespace\n\ +characters (space, tab, return, newline, formfeed, vertical tab).\n\ +If maxsplit is given, at most maxsplit splits are done."); static PyObject * bytes_rsplit(PyBytesObject *self, PyObject *args) { Py_ssize_t len = PyBytes_GET_SIZE(self), n, i, j; - Py_ssize_t maxsplit = -1, count=0; + Py_ssize_t maxsplit = -1, count = 0; const char *s = PyBytes_AS_STRING(self), *sub; - PyObject *list, *str, *subobj; + PyObject *list, *str, *subobj = Py_None; - if (!PyArg_ParseTuple(args, "O|n:rsplit", &subobj, &maxsplit)) + if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) return NULL; if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; + + if (subobj == Py_None) + return rsplit_whitespace(s, len, maxsplit); + if (PyBytes_Check(subobj)) { sub = PyBytes_AS_STRING(subobj); n = PyBytes_GET_SIZE(subobj); Index: Lib/test/test_bytes.py =================================================================== --- Lib/test/test_bytes.py (revision 58048) +++ Lib/test/test_bytes.py (working copy) @@ -617,17 +617,35 @@ self.assertEqual(b.split(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) self.assertEqual(b.split(b'ss'), [b'mi', b'i', b'ippi']) self.assertEqual(b.split(b'w'), [b]) - # require an arg (no magic whitespace split) - self.assertRaises(TypeError, b.split) + def test_split_whitespace(self): + for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', + b'arf\fbarf', b'arf\vbarf'): + self.assertEqual(b.split(), [b'arf', b'barf']) + self.assertEqual(b.split(None), [b'arf', b'barf']) + self.assertEqual(b.split(None, 2), [b'arf', b'barf']) + self.assertEqual(b' a bb c '.split(None, 0), [b'a bb c ']) + self.assertEqual(b' a bb c '.split(None, 1), [b'a', b'bb c ']) + self.assertEqual(b' a bb c '.split(None, 2), [b'a', b'bb', b'c ']) + self.assertEqual(b' a bb c '.split(None, 3), [b'a', b'bb', b'c']) + def test_rsplit(self): b = b'mississippi' self.assertEqual(b.rsplit(b'i'), [b'm', b'ss', b'ss', b'pp', b'']) self.assertEqual(b.rsplit(b'ss'), [b'mi', b'i', b'ippi']) self.assertEqual(b.rsplit(b'w'), [b]) - # require an arg (no magic whitespace split) - self.assertRaises(TypeError, b.rsplit) + def test_rsplit_whitespace(self): + for b in (b' arf barf ', b'arf\tbarf', b'arf\nbarf', b'arf\rbarf', + b'arf\fbarf', b'arf\vbarf'): + self.assertEqual(b.rsplit(), [b'arf', b'barf']) + self.assertEqual(b.rsplit(None), [b'arf', b'barf']) + self.assertEqual(b.rsplit(None, 2), [b'arf', b'barf']) + self.assertEqual(b' a bb c '.rsplit(None, 0), [b' a bb c']) + self.assertEqual(b' a bb c '.rsplit(None, 1), [b' a bb', b'c']) + self.assertEqual(b' a bb c '.rsplit(None,2), [b' a', b'bb', b'c']) + self.assertEqual(b' a bb c '.rsplit(None, 3), [b'a', b'bb', b'c']) + def test_partition(self): b = b'mississippi' self.assertEqual(b.partition(b'ss'), (b'mi', b'ss', b'issippi'))