? launch ? patch-strsplit.diff ? rsplitbug.py ? ulog ? yo Index: Lib/test/string_tests.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Lib/test/string_tests.py,v retrieving revision 1.35 diff -c -r1.35 string_tests.py *** Lib/test/string_tests.py 15 Dec 2003 18:49:19 -0000 1.35 --- Lib/test/string_tests.py 29 Dec 2003 02:16:35 -0000 *************** *** 175,213 **** def test_split(self): self.checkequal(['this', 'is', 'the', 'split', 'function'], 'this is the split function', 'split') ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|') ! self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2) self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4) self.checkequal(['a b c d'], 'a b c d', 'split', None, 0) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) ! self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split') self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test') self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) def test_rsplit(self): self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], 'this is the rsplit function', 'rsplit') ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|') ! self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2) self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4) self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0) - self.checkequal(['a, b, c', 'd'], 'a, b, c, d', 'rsplit', ', ', 1) - self.checkequal(['a, b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 2) - self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 3) - self.checkequal(['a', 'b', 'c', 'd'], 'a, b, c, d', 'rsplit', ', ', 4) - self.checkequal(['a, b, c, d'], 'a, b, c, d', 'rsplit', ', ', 0) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) ! self.checkequal(['a\x00b', 'c'], 'a\x00b\x00c', 'rsplit', '\x00', 1) ! self.checkequal(['', ''], 'abcd', 'rsplit', 'abcd') self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2) def test_strip(self): self.checkequal('hello', ' hello ', 'strip') --- 175,256 ---- def test_split(self): self.checkequal(['this', 'is', 'the', 'split', 'function'], 'this is the split function', 'split') ! ! # by whitespace ! self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'split') self.checkequal(['a', 'b c d'], 'a b c d', 'split', None, 1) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'split', None, 4) self.checkequal(['a b c d'], 'a b c d', 'split', None, 0) self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', None, 2) ! ! # by a char ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|') ! self.checkequal(['a', 'b|c|d'], 'a|b|c|d', 'split', '|', 1) ! self.checkequal(['a', 'b', 'c|d'], 'a|b|c|d', 'split', '|', 2) ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 3) ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'split', '|', 4) ! self.checkequal(['a|b|c|d'], 'a|b|c|d', 'split', '|', 0) ! self.checkequal(['a', '', 'b||c||d'], 'a||b||c||d', 'split', '|', 2) ! self.checkequal(['endcase ', ''], 'endcase |', 'split', '|') ! self.checkequal(['a', '', 'b\x00c\x00d'], 'a\x00\x00b\x00c\x00d', 'split', '\x00', 2) ! ! # by string self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//') + self.checkequal(['a', 'b//c//d'], 'a//b//c//d', 'split', '//', 1) + self.checkequal(['a', 'b', 'c//d'], 'a//b//c//d', 'split', '//', 2) + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 3) + self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//', 4) + self.checkequal(['a//b//c//d'], 'a//b//c//d', 'split', '//', 0) + self.checkequal(['a', '', 'b////c////d'], 'a////b////c////d', 'split', '//', 2) self.checkequal(['endcase ', ''], 'endcase test', 'split', 'test') + # mixed use of str and unicode + self.checkequal([u'a', u'b', u'c d'], 'a b c d', 'split', u' ', 2) + + # argument type self.checkraises(TypeError, 'hello', 'split', 42, 42, 42) def test_rsplit(self): self.checkequal(['this', 'is', 'the', 'rsplit', 'function'], 'this is the rsplit function', 'rsplit') ! ! # by whitespace ! self.checkequal(['a', 'b', 'c', 'd'], 'a b c d ', 'rsplit') self.checkequal(['a b c', 'd'], 'a b c d', 'rsplit', None, 1) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 3) self.checkequal(['a', 'b', 'c', 'd'], 'a b c d', 'rsplit', None, 4) self.checkequal(['a b c d'], 'a b c d', 'rsplit', None, 0) self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', None, 2) ! ! # by a char ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|') ! self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1) ! self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2) ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3) ! self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4) ! self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0) ! self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2) ! self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|') ! self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2) ! ! # by string ! self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//') ! self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1) ! self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2) ! self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3) ! self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4) ! self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0) ! self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2) ! self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test') ! ! # mixed use of str and unicode self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2) + + # argument type + self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42) def test_strip(self): self.checkequal('hello', ' hello ', 'strip') Index: Objects/stringobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/stringobject.c,v retrieving revision 2.216 diff -c -r2.216 stringobject.c *** Objects/stringobject.c 22 Dec 2003 16:31:41 -0000 2.216 --- Objects/stringobject.c 29 Dec 2003 02:16:36 -0000 *************** *** 1282,1293 **** #define STRIPNAME(i) (stripformat[i]+3) static PyObject * split_whitespace(const char *s, int len, int maxsplit) { ! int i, j, err; ! PyObject* item; PyObject *list = PyList_New(0); if (list == NULL) --- 1282,1316 ---- #define STRIPNAME(i) (stripformat[i]+3) + #define SPLIT_APPEND(data, left, right) \ + str = PyString_FromStringAndSize((data) + (left), \ + (right) - (left)); \ + if (str == NULL) \ + goto onError; \ + if (PyList_Append(list, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); + + #define SPLIT_INSERT(data, left, right) \ + str = PyString_FromStringAndSize((data) + (left), \ + (right) - (left)); \ + if (str == NULL) \ + goto onError; \ + if (PyList_Insert(list, 0, str)) { \ + Py_DECREF(str); \ + goto onError; \ + } \ + else \ + Py_DECREF(str); static PyObject * split_whitespace(const char *s, int len, int maxsplit) { ! int i, j; ! PyObject *str; PyObject *list = PyList_New(0); if (list == NULL) *************** *** 1302,1334 **** if (j < i) { if (maxsplit-- <= 0) break; ! item = PyString_FromStringAndSize(s+j, (int)(i-j)); ! if (item == NULL) ! goto finally; ! err = PyList_Append(list, item); ! Py_DECREF(item); ! if (err < 0) ! goto finally; while (i < len && isspace(Py_CHARMASK(s[i]))) i++; j = i; } } if (j < len) { ! item = PyString_FromStringAndSize(s+j, (int)(len - j)); ! if (item == NULL) ! goto finally; ! err = PyList_Append(list, item); ! Py_DECREF(item); ! if (err < 0) ! goto finally; } return list; ! finally: Py_DECREF(list); return NULL; } PyDoc_STRVAR(split__doc__, "S.split([sep [,maxsplit]]) -> list of strings\n\ --- 1325,1373 ---- if (j < i) { if (maxsplit-- <= 0) break; ! SPLIT_APPEND(s, j, i); while (i < len && isspace(Py_CHARMASK(s[i]))) i++; j = i; } } if (j < len) { ! SPLIT_APPEND(s, j, len); } return list; ! onError: Py_DECREF(list); return NULL; } + static PyObject * + split_char(const char *s, int len, char ch, int maxcount) + { + register int i, j; + PyObject *str; + PyObject *list = PyList_New(0); + + if (list == NULL) + return NULL; + + for (i = j = 0; i < len; ) { + if (s[i] == ch) { + if (maxcount-- <= 0) + break; + SPLIT_APPEND(s, j, i); + i = j = i + 1; + } else + i++; + } + if (j <= len) { + SPLIT_APPEND(s, j, len); + } + return list; + + onError: + Py_DECREF(list); + return NULL; + } PyDoc_STRVAR(split__doc__, "S.split([sep [,maxsplit]]) -> list of strings\n\ *************** *** 1362,1371 **** --- 1401,1413 ---- #endif else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return NULL; + if (n == 0) { PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; } + else if (n == 1) + return split_char(s, len, sub[0], maxsplit); list = PyList_New(0); if (list == NULL) *************** *** 1406,1413 **** static PyObject * rsplit_whitespace(const char *s, int len, int maxsplit) { ! int i, j, err; ! PyObject* item; PyObject *list = PyList_New(0); if (list == NULL) --- 1448,1455 ---- static PyObject * rsplit_whitespace(const char *s, int len, int maxsplit) { ! int i, j; ! PyObject *str; PyObject *list = PyList_New(0); if (list == NULL) *************** *** 1422,1454 **** if (j > i) { if (maxsplit-- <= 0) break; ! item = PyString_FromStringAndSize(s+i+1, (int)(j-i)); ! if (item == NULL) ! goto finally; ! err = PyList_Insert(list, 0, item); ! Py_DECREF(item); ! if (err < 0) ! goto finally; while (i >= 0 && isspace(Py_CHARMASK(s[i]))) i--; j = i; } } if (j >= 0) { ! item = PyString_FromStringAndSize(s, (int)(j + 1)); ! if (item == NULL) ! goto finally; ! err = PyList_Insert(list, 0, item); ! Py_DECREF(item); ! if (err < 0) ! goto finally; } return list; ! finally: Py_DECREF(list); return NULL; } PyDoc_STRVAR(rsplit__doc__, "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ --- 1464,1512 ---- if (j > i) { if (maxsplit-- <= 0) break; ! SPLIT_INSERT(s, i + 1, j + 1); while (i >= 0 && isspace(Py_CHARMASK(s[i]))) i--; j = i; } } if (j >= 0) { ! SPLIT_INSERT(s, 0, j + 1); } return list; ! onError: Py_DECREF(list); return NULL; } + static PyObject * + rsplit_char(const char *s, int len, char ch, int maxcount) + { + register int i, j; + PyObject *str; + PyObject *list = PyList_New(0); + + if (list == NULL) + return NULL; + + for (i = j = len - 1; i >= 0; ) { + if (s[i] == ch) { + if (maxcount-- <= 0) + break; + SPLIT_INSERT(s, i + 1, j + 1); + j = i = i - 1; + } else + i--; + } + if (j >= -1) { + SPLIT_INSERT(s, 0, j + 1); + } + return list; + + onError: + Py_DECREF(list); + return NULL; + } PyDoc_STRVAR(rsplit__doc__, "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ *************** *** 1483,1492 **** --- 1541,1553 ---- #endif else if (PyObject_AsCharBuffer(subobj, &sub, &n)) return NULL; + if (n == 0) { PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; } + else if (n == 1) + return rsplit_char(s, len, sub[0], maxsplit); list = PyList_New(0); if (list == NULL) *************** *** 3103,3119 **** Return a list of the lines in S, breaking at line boundaries.\n\ Line breaks are not included in the resulting list unless keepends\n\ is given and true."); - - #define SPLIT_APPEND(data, left, right) \ - str = PyString_FromStringAndSize(data + left, right - left); \ - if (!str) \ - goto onError; \ - if (PyList_Append(list, str)) { \ - Py_DECREF(str); \ - goto onError; \ - } \ - else \ - Py_DECREF(str); static PyObject* string_splitlines(PyStringObject *self, PyObject *args) --- 3164,3169 ----