diff -r 570ada02d0f0 Include/bytes_methods.h --- a/Include/bytes_methods.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Include/bytes_methods.h Fri Apr 15 11:00:35 2016 +0300 @@ -21,6 +21,15 @@ extern void _Py_bytes_title(char *result extern void _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len); extern void _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len); +extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args); +extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args); +extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args); +extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args); +extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args); +extern int _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg); +extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args); +extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args); + /* The maketrans() static method. */ extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); @@ -37,7 +46,19 @@ extern const char _Py_upper__doc__[]; extern const char _Py_title__doc__[]; extern const char _Py_capitalize__doc__[]; extern const char _Py_swapcase__doc__[]; +extern const char _Py_count__doc__[]; +extern const char _Py_find__doc__[]; +extern const char _Py_index__doc__[]; +extern const char _Py_rfind__doc__[]; +extern const char _Py_rindex__doc__[]; +extern const char _Py_startswith__doc__[]; +extern const char _Py_endswith__doc__[]; extern const char _Py_maketrans__doc__[]; +extern const char _Py_expandtabs__doc__[]; +extern const char _Py_ljust__doc__[]; +extern const char _Py_rjust__doc__[]; +extern const char _Py_center__doc__[]; +extern const char _Py_zfill__doc__[]; /* this is needed because some docs are shared from the .o, not static */ #define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str) diff -r 570ada02d0f0 Objects/bytearrayobject.c --- a/Objects/bytearrayobject.c Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/bytearrayobject.c Fri Apr 15 11:00:35 2016 +0300 @@ -1110,149 +1110,6 @@ bytearray_dealloc(PyByteArrayObject *sel #include "stringlib/transmogrify.h" -/* The following Py_LOCAL_INLINE and Py_LOCAL functions -were copied from the old char* style string object. */ - -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - -Py_LOCAL_INLINE(Py_ssize_t) -bytearray_find_internal(PyByteArrayObject *self, PyObject *args, int dir) -{ - PyObject *subobj; - char byte; - Py_buffer subbuf; - const char *sub; - Py_ssize_t len, sub_len; - Py_ssize_t start=0, end=PY_SSIZE_T_MAX; - Py_ssize_t res; - - if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex", - args, &subobj, &byte, &start, &end)) - return -2; - - if (subobj) { - if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) - return -2; - - sub = subbuf.buf; - sub_len = subbuf.len; - } - else { - sub = &byte; - sub_len = 1; - } - len = PyByteArray_GET_SIZE(self); - - ADJUST_INDICES(start, end, len); - if (end - start < sub_len) - res = -1; - else if (sub_len == 1) { - if (dir > 0) - res = stringlib_find_char( - PyByteArray_AS_STRING(self) + start, end - start, - *sub); - else - res = stringlib_rfind_char( - PyByteArray_AS_STRING(self) + start, end - start, - *sub); - if (res >= 0) - res += start; - } - else { - if (dir > 0) - res = stringlib_find_slice( - PyByteArray_AS_STRING(self), len, - sub, sub_len, start, end); - else - res = stringlib_rfind_slice( - PyByteArray_AS_STRING(self), len, - sub, sub_len, start, end); - } - - if (subobj) - PyBuffer_Release(&subbuf); - - return res; -} - -PyDoc_STRVAR(find__doc__, -"B.find(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in B where subsection sub is found,\n\ -such that sub is contained within B[start,end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -bytearray_find(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t result = bytearray_find_internal(self, args, +1); - if (result == -2) - return NULL; - return PyLong_FromSsize_t(result); -} - -PyDoc_STRVAR(count__doc__, -"B.count(sub[, start[, end]]) -> int\n\ -\n\ -Return the number of non-overlapping occurrences of subsection sub in\n\ -bytes B[start:end]. Optional arguments start and end are interpreted\n\ -as in slice notation."); - -static PyObject * -bytearray_count(PyByteArrayObject *self, PyObject *args) -{ - PyObject *sub_obj; - const char *str = PyByteArray_AS_STRING(self), *sub; - Py_ssize_t sub_len; - char byte; - Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; - - Py_buffer vsub; - PyObject *count_obj; - - if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte, - &start, &end)) - return NULL; - - if (sub_obj) { - if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) - return NULL; - - sub = vsub.buf; - sub_len = vsub.len; - } - else { - sub = &byte; - sub_len = 1; - } - - ADJUST_INDICES(start, end, PyByteArray_GET_SIZE(self)); - - count_obj = PyLong_FromSsize_t( - stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) - ); - - if (sub_obj) - PyBuffer_Release(&vsub); - - return count_obj; -} - /*[clinic input] bytearray.clear @@ -1286,218 +1143,6 @@ bytearray_copy_impl(PyByteArrayObject *s PyByteArray_GET_SIZE(self)); } -PyDoc_STRVAR(index__doc__, -"B.index(sub[, start[, end]]) -> int\n\ -\n\ -Like B.find() but raise ValueError when the subsection is not found."); - -static PyObject * -bytearray_index(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t result = bytearray_find_internal(self, args, +1); - if (result == -2) - return NULL; - if (result == -1) { - PyErr_SetString(PyExc_ValueError, - "subsection not found"); - return NULL; - } - return PyLong_FromSsize_t(result); -} - - -PyDoc_STRVAR(rfind__doc__, -"B.rfind(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in B where subsection sub is found,\n\ -such that sub is contained within B[start,end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -bytearray_rfind(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t result = bytearray_find_internal(self, args, -1); - if (result == -2) - return NULL; - return PyLong_FromSsize_t(result); -} - - -PyDoc_STRVAR(rindex__doc__, -"B.rindex(sub[, start[, end]]) -> int\n\ -\n\ -Like B.rfind() but raise ValueError when the subsection is not found."); - -static PyObject * -bytearray_rindex(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t result = bytearray_find_internal(self, args, -1); - if (result == -2) - return NULL; - if (result == -1) { - PyErr_SetString(PyExc_ValueError, - "subsection not found"); - return NULL; - } - return PyLong_FromSsize_t(result); -} - - -static int -bytearray_contains(PyObject *self, PyObject *arg) -{ - Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); - if (ival == -1 && PyErr_Occurred()) { - Py_buffer varg; - Py_ssize_t pos; - PyErr_Clear(); - if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) - return -1; - pos = stringlib_find(PyByteArray_AS_STRING(self), Py_SIZE(self), - varg.buf, varg.len, 0); - PyBuffer_Release(&varg); - return pos >= 0; - } - if (ival < 0 || ival >= 256) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); - return -1; - } - - return memchr(PyByteArray_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL; -} - - -/* Matches the end (direction >= 0) or start (direction < 0) of self - * against substr, using the start and end arguments. Returns - * -1 on error, 0 if not found and 1 if found. - */ -Py_LOCAL(int) -_bytearray_tailmatch(PyByteArrayObject *self, PyObject *substr, Py_ssize_t start, - Py_ssize_t end, int direction) -{ - Py_ssize_t len = PyByteArray_GET_SIZE(self); - const char* str; - Py_buffer vsubstr; - int rv = 0; - - str = PyByteArray_AS_STRING(self); - - if (PyObject_GetBuffer(substr, &vsubstr, PyBUF_SIMPLE) != 0) - return -1; - - ADJUST_INDICES(start, end, len); - - if (direction < 0) { - /* startswith */ - if (start+vsubstr.len > len) { - goto done; - } - } else { - /* endswith */ - if (end-start < vsubstr.len || start > len) { - goto done; - } - - if (end-vsubstr.len > start) - start = end - vsubstr.len; - } - if (end-start >= vsubstr.len) - rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len); - -done: - PyBuffer_Release(&vsubstr); - return rv; -} - - -PyDoc_STRVAR(startswith__doc__, -"B.startswith(prefix[, start[, end]]) -> bool\n\ -\n\ -Return True if B starts with the specified prefix, False otherwise.\n\ -With optional start, test B beginning at that position.\n\ -With optional end, stop comparing B at that position.\n\ -prefix can also be a tuple of bytes to try."); - -static PyObject * -bytearray_startswith(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *subobj; - int result; - - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - result = _bytearray_tailmatch(self, - PyTuple_GET_ITEM(subobj, i), - start, end, -1); - if (result == -1) - return NULL; - else if (result) { - Py_RETURN_TRUE; - } - } - Py_RETURN_FALSE; - } - result = _bytearray_tailmatch(self, subobj, start, end, -1); - if (result == -1) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes " - "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - else - return PyBool_FromLong(result); -} - -PyDoc_STRVAR(endswith__doc__, -"B.endswith(suffix[, start[, end]]) -> bool\n\ -\n\ -Return True if B ends with the specified suffix, False otherwise.\n\ -With optional start, test B beginning at that position.\n\ -With optional end, stop comparing B at that position.\n\ -suffix can also be a tuple of bytes to try."); - -static PyObject * -bytearray_endswith(PyByteArrayObject *self, PyObject *args) -{ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *subobj; - int result; - - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - result = _bytearray_tailmatch(self, - PyTuple_GET_ITEM(subobj, i), - start, end, +1); - if (result == -1) - return NULL; - else if (result) { - Py_RETURN_TRUE; - } - } - Py_RETURN_FALSE; - } - result = _bytearray_tailmatch(self, subobj, start, end, +1); - if (result == -1) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or " - "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - else - return PyBool_FromLong(result); -} - /*[clinic input] bytearray.translate @@ -1562,7 +1207,7 @@ bytearray_translate_impl(PyByteArrayObje result = PyByteArray_FromStringAndSize((char *)NULL, inlen); if (result == NULL) goto done; - output_start = output = PyByteArray_AsString(result); + output_start = output = PyByteArray_AS_STRING(result); input = PyByteArray_AS_STRING(input_obj); if (vdel.len == 0 && table_chars != NULL) { @@ -1632,503 +1277,6 @@ bytearray_maketrans_impl(Py_buffer *frm, } -/* find and count characters and substrings */ - -#define findchar(target, target_len, c) \ - ((char *)memchr((const void *)(target), c, target_len)) - - -/* Bytes ops must return a string, create a copy */ -Py_LOCAL(PyByteArrayObject *) -return_self(PyByteArrayObject *self) -{ - /* always return a new bytearray */ - return (PyByteArrayObject *)PyByteArray_FromStringAndSize( - PyByteArray_AS_STRING(self), - PyByteArray_GET_SIZE(self)); -} - -Py_LOCAL_INLINE(Py_ssize_t) -countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - const char *start=target; - const char *end=target+target_len; - - while ( (start=findchar(start, end-start, c)) != NULL ) { - count++; - if (count >= maxcount) - break; - start += 1; - } - return count; -} - - -/* Algorithms for different cases of string replacement */ - -/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_interleave(PyByteArrayObject *self, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - Py_ssize_t self_len, result_len; - Py_ssize_t count, i; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - - /* 1 at the end plus 1 after every character; - count = min(maxcount, self_len + 1) */ - if (maxcount <= self_len) - count = maxcount; - else - /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ - count = self_len + 1; - - /* Check for overflow */ - /* result_len = count * to_len + self_len; */ - assert(count > 0); - if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replace string is too long"); - return NULL; - } - result_len = count * to_len + self_len; - - if (! (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) ) - return NULL; - - self_s = PyByteArray_AS_STRING(self); - result_s = PyByteArray_AS_STRING(result); - - if (to_len > 1) { - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - } - } - else { - result_s[0] = to_s[0]; - result_s += to_len; - count -= 1; - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - result_s[0] = to_s[0]; - result_s += to_len; - } - } - - /* Copy the rest of the original string */ - Py_MEMCPY(result_s, self_s, self_len-i); - - return result; -} - -/* Special case for deleting a single character */ -/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_delete_single_character(PyByteArrayObject *self, - char from_c, Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - self_s = PyByteArray_AS_STRING(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - return return_self(self); - } - - result_len = self_len - count; /* from_len == 1 */ - assert(result_len>=0); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - start = next+1; - } - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ - -Py_LOCAL(PyByteArrayObject *) -replace_delete_substring(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyByteArrayObject *result; - - self_len = PyByteArray_GET_SIZE(self); - self_s = PyByteArray_AS_STRING(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches */ - return return_self(self); - } - - result_len = self_len - (count * from_len); - assert (result_len>=0); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL ) - return NULL; - - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start + offset; - - Py_MEMCPY(result_s, start, next-start); - - result_s += (next-start); - start = next+from_len; - } - Py_MEMCPY(result_s, start, end-start); - return result; -} - -/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_single_character_in_place(PyByteArrayObject *self, - char from_c, char to_c, - Py_ssize_t maxcount) -{ - char *self_s, *result_s, *start, *end, *next; - Py_ssize_t self_len; - PyByteArrayObject *result; - - /* The result string will be the same size */ - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - next = findchar(self_s, self_len, from_c); - - if (next == NULL) { - /* No matches; return the original bytes */ - return return_self(self); - } - - /* Need to make a new bytes */ - result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + (next-self_s); - *start = to_c; - start++; - end = result_s + self_len; - - while (--maxcount > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - *next = to_c; - start = next+1; - } - - return result; -} - -/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_substring_in_place(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *result_s, *start, *end; - char *self_s; - Py_ssize_t self_len, offset; - PyByteArrayObject *result; - - /* The result bytes will be the same size */ - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - offset = stringlib_find(self_s, self_len, - from_s, from_len, - 0); - if (offset == -1) { - /* No matches; return the original bytes */ - return return_self(self); - } - - /* Need to make a new bytes */ - result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + offset; - Py_MEMCPY(start, to_s, from_len); - start += from_len; - end = result_s + self_len; - - while ( --maxcount > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset==-1) - break; - Py_MEMCPY(start+offset, to_s, from_len); - start += offset+from_len; - } - - return result; -} - -/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_single_character(PyByteArrayObject *self, - char from_c, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyByteArrayObject *result; - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* use the difference between current and new, hence the "-1" */ - /* result_len = self_len + count * (to_len-1) */ - assert(count > 0); - if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } - result_len = self_len + count * (to_len - 1); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += 1; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+1; - } - } - /* Copy the remainder of the remaining bytes */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyByteArrayObject *) -replace_substring(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyByteArrayObject *result; - - self_s = PyByteArray_AS_STRING(self); - self_len = PyByteArray_GET_SIZE(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* Check for overflow */ - /* result_len = self_len + count * (to_len-from_len) */ - assert(count > 0); - if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); - return NULL; - } - result_len = self_len + count * (to_len - from_len); - - if ( (result = (PyByteArrayObject *) - PyByteArray_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyByteArray_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start+offset; - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += from_len; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+from_len; - } - } - /* Copy the remainder of the remaining bytes */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - - -Py_LOCAL(PyByteArrayObject *) -replace(PyByteArrayObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - if (maxcount < 0) { - maxcount = PY_SSIZE_T_MAX; - } else if (maxcount == 0 || PyByteArray_GET_SIZE(self) == 0) { - /* nothing to do; return the original bytes */ - return return_self(self); - } - - if (maxcount == 0 || - (from_len == 0 && to_len == 0)) { - /* nothing to do; return the original bytes */ - return return_self(self); - } - - /* Handle zero-length special cases */ - - if (from_len == 0) { - /* insert the 'to' bytes everywhere. */ - /* >>> "Python".replace("", ".") */ - /* '.P.y.t.h.o.n.' */ - return replace_interleave(self, to_s, to_len, maxcount); - } - - /* Except for "".replace("", "A") == "A" there is no way beyond this */ - /* point for an empty self bytes to generate a non-empty bytes */ - /* Special case so the remaining code always gets a non-empty bytes */ - if (PyByteArray_GET_SIZE(self) == 0) { - return return_self(self); - } - - if (to_len == 0) { - /* delete all occurrences of 'from' bytes */ - if (from_len == 1) { - return replace_delete_single_character( - self, from_s[0], maxcount); - } else { - return replace_delete_substring(self, from_s, from_len, maxcount); - } - } - - /* Handle special case where both bytes have the same length */ - - if (from_len == to_len) { - if (from_len == 1) { - return replace_single_character_in_place( - self, - from_s[0], - to_s[0], - maxcount); - } else { - return replace_substring_in_place( - self, from_s, from_len, to_s, to_len, maxcount); - } - } - - /* Otherwise use the more generic algorithms */ - if (from_len == 1) { - return replace_single_character(self, from_s[0], - to_s, to_len, maxcount); - } else { - /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); - } -} - - /*[clinic input] bytearray.replace @@ -2150,7 +1298,7 @@ bytearray_replace_impl(PyByteArrayObject Py_buffer *new, Py_ssize_t count) /*[clinic end generated code: output=d39884c4dc59412a input=aa379d988637c7fb]*/ { - return (PyObject *)replace((PyByteArrayObject *) self, + return (PyObject *)stringlib_replace((PyObject *) self, old->buf, old->len, new->buf, new->len, count); } @@ -2826,14 +1974,6 @@ Create a string of hexadecimal numbers f Example: bytearray([0xb9, 0x01, 0xef]).hex() -> 'b901ef'."); static PyObject * -bytearray_hex(PyBytesObject *self) -{ - char* argbuf = PyByteArray_AS_STRING(self); - Py_ssize_t arglen = PyByteArray_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); -} - -static PyObject * _common_reduce(PyByteArrayObject *self, int proto) { PyObject *dict; @@ -2926,7 +2066,7 @@ static PySequenceMethods bytearray_as_se 0, /* sq_slice */ (ssizeobjargproc)bytearray_setitem, /* sq_ass_item */ 0, /* sq_ass_slice */ - (objobjproc)bytearray_contains, /* sq_contains */ + (objobjproc)stringlib_contains, /* sq_contains */ (binaryfunc)bytearray_iconcat, /* sq_inplace_concat */ (ssizeargfunc)bytearray_irepeat, /* sq_inplace_repeat */ }; @@ -2951,19 +2091,22 @@ bytearray_methods[] = { BYTEARRAY_APPEND_METHODDEF {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, _Py_capitalize__doc__}, - {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, + {"center", (PyCFunction)stringlib_center, METH_VARARGS, _Py_center__doc__}, BYTEARRAY_CLEAR_METHODDEF BYTEARRAY_COPY_METHODDEF - {"count", (PyCFunction)bytearray_count, METH_VARARGS, count__doc__}, + {"count", (PyCFunction)stringlib_method_count, METH_VARARGS, + _Py_count__doc__}, BYTEARRAY_DECODE_METHODDEF - {"endswith", (PyCFunction)bytearray_endswith, METH_VARARGS, endswith__doc__}, + {"endswith", (PyCFunction)stringlib_endswith, METH_VARARGS, + _Py_endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, - expandtabs__doc__}, + _Py_expandtabs__doc__}, BYTEARRAY_EXTEND_METHODDEF - {"find", (PyCFunction)bytearray_find, METH_VARARGS, find__doc__}, + {"find", (PyCFunction)stringlib_method_find, METH_VARARGS, + _Py_find__doc__}, BYTEARRAY_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytearray_hex, METH_NOARGS, hex__doc__}, - {"index", (PyCFunction)bytearray_index, METH_VARARGS, index__doc__}, + {"hex", (PyCFunction)stringlib_hex, METH_NOARGS, hex__doc__}, + {"index", (PyCFunction)stringlib_index, METH_VARARGS, _Py_index__doc__}, BYTEARRAY_INSERT_METHODDEF {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, @@ -2980,7 +2123,7 @@ bytearray_methods[] = { {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, _Py_isupper__doc__}, BYTEARRAY_JOIN_METHODDEF - {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, + {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, BYTEARRAY_LSTRIP_METHODDEF BYTEARRAY_MAKETRANS_METHODDEF @@ -2989,23 +2132,24 @@ bytearray_methods[] = { BYTEARRAY_REMOVE_METHODDEF BYTEARRAY_REPLACE_METHODDEF BYTEARRAY_REVERSE_METHODDEF - {"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, rfind__doc__}, - {"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, rindex__doc__}, - {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, + {"rfind", (PyCFunction)stringlib_method_rfind, METH_VARARGS, + _Py_rfind__doc__}, + {"rindex", (PyCFunction)stringlib_rindex, METH_VARARGS, _Py_rindex__doc__}, + {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__}, BYTEARRAY_RPARTITION_METHODDEF BYTEARRAY_RSPLIT_METHODDEF BYTEARRAY_RSTRIP_METHODDEF BYTEARRAY_SPLIT_METHODDEF BYTEARRAY_SPLITLINES_METHODDEF - {"startswith", (PyCFunction)bytearray_startswith, METH_VARARGS , - startswith__doc__}, + {"startswith", (PyCFunction)stringlib_startswith, METH_VARARGS, + _Py_startswith__doc__}, BYTEARRAY_STRIP_METHODDEF {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, _Py_swapcase__doc__}, {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, BYTEARRAY_TRANSLATE_METHODDEF {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, - {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, + {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__}, {NULL} }; diff -r 570ada02d0f0 Objects/bytes_methods.c --- a/Objects/bytes_methods.c Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/bytes_methods.c Fri Apr 15 11:00:35 2016 +0300 @@ -387,3 +387,426 @@ PyObject * return res; } + +#define FASTSEARCH fastsearch +#define STRINGLIB(F) stringlib_##F +#define STRINGLIB_CHAR char +#define STRINGLIB_SIZEOF_CHAR 1 + +#include "stringlib/fastsearch.h" +#include "stringlib/count.h" +#include "stringlib/find.h" + +/* +Wraps stringlib_parse_args_finds() and additionally checks whether the +first argument is an integer in range(0, 256). + +If this is the case, writes the integer value to the byte parameter +and sets subobj to NULL. Otherwise, sets the first argument to subobj +and doesn't touch byte. The other parameters are similar to those of +stringlib_parse_args_finds(). +*/ + +Py_LOCAL_INLINE(int) +parse_args_finds_byte(const char *function_name, PyObject *args, + PyObject **subobj, char *byte, + Py_ssize_t *start, Py_ssize_t *end) +{ + PyObject *tmp_subobj; + Py_ssize_t ival; + PyObject *err; + + if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj, + start, end)) + return 0; + + if (!PyNumber_Check(tmp_subobj)) { + *subobj = tmp_subobj; + return 1; + } + + ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError); + if (ival == -1) { + err = PyErr_Occurred(); + if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) { + PyErr_Clear(); + *subobj = tmp_subobj; + return 1; + } + } + + if (ival < 0 || ival > 255) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return 0; + } + + *subobj = NULL; + *byte = (char)ival; + return 1; +} + +/* helper macro to fixup start/end slice values */ +#define ADJUST_INDICES(start, end, len) \ + if (end > len) \ + end = len; \ + else if (end < 0) { \ + end += len; \ + if (end < 0) \ + end = 0; \ + } \ + if (start < 0) { \ + start += len; \ + if (start < 0) \ + start = 0; \ + } + +Py_LOCAL_INLINE(Py_ssize_t) +_Py_bytes_find_internal(const char *str, Py_ssize_t len, const char *function_name, PyObject *args, int dir) +{ + PyObject *subobj; + char byte; + Py_buffer subbuf; + const char *sub; + Py_ssize_t sub_len; + Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; + Py_ssize_t res; + + if (!parse_args_finds_byte(function_name, args, + &subobj, &byte, &start, &end)) + return -2; + + if (subobj) { + if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) + return -2; + + sub = subbuf.buf; + sub_len = subbuf.len; + } + else { + sub = &byte; + sub_len = 1; + } + + ADJUST_INDICES(start, end, len); + if (end - start < sub_len) + res = -1; + else if (sub_len == 1) { + if (dir > 0) + res = stringlib_find_char( + str + start, end - start, + *sub); + else + res = stringlib_rfind_char( + str + start, end - start, + *sub); + if (res >= 0) + res += start; + } + else { + if (dir > 0) + res = stringlib_find_slice( + str, len, + sub, sub_len, start, end); + else + res = stringlib_rfind_slice( + str, len, + sub, sub_len, start, end); + } + + if (subobj) + PyBuffer_Release(&subbuf); + + return res; +} + +PyDoc_STRVAR_shared(_Py_find__doc__, +"B.find(sub[, start[, end]]) -> int\n\ +\n\ +Return the lowest index in B where subsection sub is found,\n\ +such that sub is contained within B[start,end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."); + +PyObject * +_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args) +{ + Py_ssize_t result = _Py_bytes_find_internal(str, len, "find", args, +1); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR_shared(_Py_index__doc__, +"B.index(sub[, start[, end]]) -> int\n\ +\n\ +Like B.find() but raise ValueError when the subsection is not found."); + +PyObject * +_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args) +{ + Py_ssize_t result = _Py_bytes_find_internal(str, len, "index", args, +1); + if (result == -2) + return NULL; + if (result == -1) { + PyErr_SetString(PyExc_ValueError, + "subsection not found"); + return NULL; + } + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR_shared(_Py_rfind__doc__, +"B.rfind(sub[, start[, end]]) -> int\n\ +\n\ +Return the highest index in B where subsection sub is found,\n\ +such that sub is contained within B[start,end]. Optional\n\ +arguments start and end are interpreted as in slice notation.\n\ +\n\ +Return -1 on failure."); + +PyObject * +_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args) +{ + Py_ssize_t result = _Py_bytes_find_internal(str, len, "rfind", args, -1); + if (result == -2) + return NULL; + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR_shared(_Py_rindex__doc__, +"B.rindex(sub[, start[, end]]) -> int\n\ +\n\ +Like B.rfind() but raise ValueError when the subsection is not found."); + +PyObject * +_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args) +{ + Py_ssize_t result = _Py_bytes_find_internal(str, len, "rindex", args, -1); + if (result == -2) + return NULL; + if (result == -1) { + PyErr_SetString(PyExc_ValueError, + "subsection not found"); + return NULL; + } + return PyLong_FromSsize_t(result); +} + +PyDoc_STRVAR_shared(_Py_count__doc__, +"B.count(sub[, start[, end]]) -> int\n\ +\n\ +Return the number of non-overlapping occurrences of subsection sub in\n\ +bytes B[start:end]. Optional arguments start and end are interpreted\n\ +as in slice notation."); + +PyObject * +_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args) +{ + PyObject *sub_obj; + const char *sub; + Py_ssize_t sub_len; + char byte; + Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; + + Py_buffer vsub; + PyObject *count_obj; + + if (!parse_args_finds_byte("count", args, + &sub_obj, &byte, &start, &end)) + return NULL; + + if (sub_obj) { + if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) + return NULL; + + sub = vsub.buf; + sub_len = vsub.len; + } + else { + sub = &byte; + sub_len = 1; + } + + ADJUST_INDICES(start, end, len); + + count_obj = PyLong_FromSsize_t( + stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) + ); + + if (sub_obj) + PyBuffer_Release(&vsub); + + return count_obj; +} + +int +_Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg) +{ + Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); + if (ival == -1 && PyErr_Occurred()) { + Py_buffer varg; + Py_ssize_t pos; + PyErr_Clear(); + if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) + return -1; + pos = stringlib_find(str, len, + varg.buf, varg.len, 0); + PyBuffer_Release(&varg); + return pos >= 0; + } + if (ival < 0 || ival >= 256) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return -1; + } + + return memchr(str, (int) ival, len) != NULL; +} + + +/* Matches the end (direction >= 0) or start (direction < 0) of the buffer + * against substr, using the start and end arguments. Returns + * -1 on error, 0 if not found and 1 if found. + */ +Py_LOCAL(int) +tailmatch(const char *str, Py_ssize_t len, PyObject *substr, + Py_ssize_t start, Py_ssize_t end, int direction) +{ + Py_buffer sub_view = {NULL, NULL}; + const char *sub; + Py_ssize_t slen; + + if (PyBytes_Check(substr)) { + sub = PyBytes_AS_STRING(substr); + slen = PyBytes_GET_SIZE(substr); + } + else { + if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) + return -1; + sub = sub_view.buf; + slen = sub_view.len; + } + + ADJUST_INDICES(start, end, len); + + if (direction < 0) { + /* startswith */ + if (start + slen > len) + goto notfound; + } else { + /* endswith */ + if (end - start < slen || start > len) + goto notfound; + + if (end - slen > start) + start = end - slen; + } + if (end - start < slen) + goto notfound; + if (memcmp(str + start, sub, slen) != 0) + goto notfound; + + PyBuffer_Release(&sub_view); + return 1; + +notfound: + PyBuffer_Release(&sub_view); + return 0; +} + +Py_LOCAL(PyObject *) +_Py_bytes_tailmatch(const char *str, Py_ssize_t len, + const char *function_name, PyObject *args, + int direction) +{ + Py_ssize_t start = 0; + Py_ssize_t end = PY_SSIZE_T_MAX; + PyObject *subobj; + int result; + + if (!stringlib_parse_args_finds(function_name, args, &subobj, &start, &end)) + return NULL; + if (PyTuple_Check(subobj)) { + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { + result = tailmatch(str, len, PyTuple_GET_ITEM(subobj, i), + start, end, direction); + if (result == -1) + return NULL; + else if (result) { + Py_RETURN_TRUE; + } + } + Py_RETURN_FALSE; + } + result = tailmatch(str, len, subobj, start, end, direction); + if (result == -1) { + if (PyErr_ExceptionMatches(PyExc_TypeError)) + PyErr_Format(PyExc_TypeError, + "%s first arg must be bytes or a tuple of bytes, " + "not %s", + function_name, Py_TYPE(subobj)->tp_name); + return NULL; + } + else + return PyBool_FromLong(result); +} + +PyDoc_STRVAR_shared(_Py_startswith__doc__, +"B.startswith(prefix[, start[, end]]) -> bool\n\ +\n\ +Return True if B starts with the specified prefix, False otherwise.\n\ +With optional start, test B beginning at that position.\n\ +With optional end, stop comparing B at that position.\n\ +prefix can also be a tuple of bytes to try."); + +PyObject * +_Py_bytes_startswith(const char *str, Py_ssize_t len, PyObject *args) +{ + return _Py_bytes_tailmatch(str, len, "startswith", args, -1); +} + +PyDoc_STRVAR_shared(_Py_endswith__doc__, +"B.endswith(suffix[, start[, end]]) -> bool\n\ +\n\ +Return True if B ends with the specified suffix, False otherwise.\n\ +With optional start, test B beginning at that position.\n\ +With optional end, stop comparing B at that position.\n\ +suffix can also be a tuple of bytes to try."); + +PyObject * +_Py_bytes_endswith(const char *str, Py_ssize_t len, PyObject *args) +{ + return _Py_bytes_tailmatch(str, len, "endswith", args, +1); +} + +PyDoc_STRVAR_shared(_Py_expandtabs__doc__, +"B.expandtabs(tabsize=8) -> copy of B\n\ +\n\ +Return a copy of B where all tab characters are expanded using spaces.\n\ +If tabsize is not given, a tab size of 8 characters is assumed."); + +PyDoc_STRVAR_shared(_Py_ljust__doc__, +"B.ljust(width[, fillchar]) -> copy of B\n" +"\n" +"Return B left justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +PyDoc_STRVAR_shared(_Py_rjust__doc__, +"B.rjust(width[, fillchar]) -> copy of B\n" +"\n" +"Return B right justified in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)"); + +PyDoc_STRVAR_shared(_Py_center__doc__, +"B.center(width[, fillchar]) -> copy of B\n" +"\n" +"Return B centered in a string of length width. Padding is\n" +"done using the specified fill character (default is a space)."); + +PyDoc_STRVAR_shared(_Py_zfill__doc__, +"B.zfill(width) -> copy of B\n" +"\n" +"Pad a numeric string B with zeros on the left, to fill a field\n" +"of the specified width. B is never truncated."); + diff -r 570ada02d0f0 Objects/bytesobject.c --- a/Objects/bytesobject.c Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/bytesobject.c Fri Apr 15 11:00:35 2016 +0300 @@ -485,11 +485,11 @@ formatlong(PyObject *v, int flags, int p static int byte_converter(PyObject *arg, char *p) { - if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) { + if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) { *p = PyBytes_AS_STRING(arg)[0]; return 1; } - else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) { + else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) { *p = PyByteArray_AS_STRING(arg)[0]; return 1; } @@ -1484,29 +1484,6 @@ bytes_repeat(PyBytesObject *a, Py_ssize_ return (PyObject *) op; } -static int -bytes_contains(PyObject *self, PyObject *arg) -{ - Py_ssize_t ival = PyNumber_AsSsize_t(arg, PyExc_ValueError); - if (ival == -1 && PyErr_Occurred()) { - Py_buffer varg; - Py_ssize_t pos; - PyErr_Clear(); - if (PyObject_GetBuffer(arg, &varg, PyBUF_SIMPLE) != 0) - return -1; - pos = stringlib_find(PyBytes_AS_STRING(self), Py_SIZE(self), - varg.buf, varg.len, 0); - PyBuffer_Release(&varg); - return pos >= 0; - } - if (ival < 0 || ival >= 256) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); - return -1; - } - - return memchr(PyBytes_AS_STRING(self), (int) ival, Py_SIZE(self)) != NULL; -} - static PyObject * bytes_item(PyBytesObject *a, Py_ssize_t i) { @@ -1717,7 +1694,7 @@ static PySequenceMethods bytes_as_sequen 0, /*sq_slice*/ 0, /*sq_ass_item*/ 0, /*sq_ass_slice*/ - (objobjproc)bytes_contains /*sq_contains*/ + (objobjproc)stringlib_contains /*sq_contains*/ }; static PyMappingMethods bytes_as_mapping = { @@ -1891,159 +1868,6 @@ PyObject * return bytes_join((PyBytesObject*)sep, x); } -/* helper macro to fixup start/end slice values */ -#define ADJUST_INDICES(start, end, len) \ - if (end > len) \ - end = len; \ - else if (end < 0) { \ - end += len; \ - if (end < 0) \ - end = 0; \ - } \ - if (start < 0) { \ - start += len; \ - if (start < 0) \ - start = 0; \ - } - -Py_LOCAL_INLINE(Py_ssize_t) -bytes_find_internal(PyBytesObject *self, PyObject *args, int dir) -{ - PyObject *subobj; - char byte; - Py_buffer subbuf; - const char *sub; - Py_ssize_t len, sub_len; - Py_ssize_t start=0, end=PY_SSIZE_T_MAX; - Py_ssize_t res; - - if (!stringlib_parse_args_finds_byte("find/rfind/index/rindex", - args, &subobj, &byte, &start, &end)) - return -2; - - if (subobj) { - if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0) - return -2; - - sub = subbuf.buf; - sub_len = subbuf.len; - } - else { - sub = &byte; - sub_len = 1; - } - len = PyBytes_GET_SIZE(self); - - ADJUST_INDICES(start, end, len); - if (end - start < sub_len) - res = -1; - else if (sub_len == 1) { - if (dir > 0) - res = stringlib_find_char( - PyBytes_AS_STRING(self) + start, end - start, - *sub); - else - res = stringlib_rfind_char( - PyBytes_AS_STRING(self) + start, end - start, - *sub); - if (res >= 0) - res += start; - } - else { - if (dir > 0) - res = stringlib_find_slice( - PyBytes_AS_STRING(self), len, - sub, sub_len, start, end); - else - res = stringlib_rfind_slice( - PyBytes_AS_STRING(self), len, - sub, sub_len, start, end); - } - - if (subobj) - PyBuffer_Release(&subbuf); - - return res; -} - - -PyDoc_STRVAR(find__doc__, -"B.find(sub[, start[, end]]) -> int\n\ -\n\ -Return the lowest index in B where substring sub is found,\n\ -such that sub is contained within B[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -bytes_find(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t result = bytes_find_internal(self, args, +1); - if (result == -2) - return NULL; - return PyLong_FromSsize_t(result); -} - - -PyDoc_STRVAR(index__doc__, -"B.index(sub[, start[, end]]) -> int\n\ -\n\ -Like B.find() but raise ValueError when the substring is not found."); - -static PyObject * -bytes_index(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t result = bytes_find_internal(self, args, +1); - if (result == -2) - return NULL; - if (result == -1) { - PyErr_SetString(PyExc_ValueError, - "substring not found"); - return NULL; - } - return PyLong_FromSsize_t(result); -} - - -PyDoc_STRVAR(rfind__doc__, -"B.rfind(sub[, start[, end]]) -> int\n\ -\n\ -Return the highest index in B where substring sub is found,\n\ -such that sub is contained within B[start:end]. Optional\n\ -arguments start and end are interpreted as in slice notation.\n\ -\n\ -Return -1 on failure."); - -static PyObject * -bytes_rfind(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t result = bytes_find_internal(self, args, -1); - if (result == -2) - return NULL; - return PyLong_FromSsize_t(result); -} - - -PyDoc_STRVAR(rindex__doc__, -"B.rindex(sub[, start[, end]]) -> int\n\ -\n\ -Like B.rfind() but raise ValueError when the substring is not found."); - -static PyObject * -bytes_rindex(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t result = bytes_find_internal(self, args, -1); - if (result == -2) - return NULL; - if (result == -1) { - PyErr_SetString(PyExc_ValueError, - "substring not found"); - return NULL; - } - return PyLong_FromSsize_t(result); -} - Py_LOCAL_INLINE(PyObject *) do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj) @@ -2183,54 +2007,6 @@ bytes_rstrip_impl(PyBytesObject *self, P } -PyDoc_STRVAR(count__doc__, -"B.count(sub[, start[, end]]) -> int\n\ -\n\ -Return the number of non-overlapping occurrences of substring sub in\n\ -string B[start:end]. Optional arguments start and end are interpreted\n\ -as in slice notation."); - -static PyObject * -bytes_count(PyBytesObject *self, PyObject *args) -{ - PyObject *sub_obj; - const char *str = PyBytes_AS_STRING(self), *sub; - Py_ssize_t sub_len; - char byte; - Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; - - Py_buffer vsub; - PyObject *count_obj; - - if (!stringlib_parse_args_finds_byte("count", args, &sub_obj, &byte, - &start, &end)) - return NULL; - - if (sub_obj) { - if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0) - return NULL; - - sub = vsub.buf; - sub_len = vsub.len; - } - else { - sub = &byte; - sub_len = 1; - } - - ADJUST_INDICES(start, end, PyBytes_GET_SIZE(self)); - - count_obj = PyLong_FromSsize_t( - stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) - ); - - if (sub_obj) - PyBuffer_Release(&vsub); - - return count_obj; -} - - /*[clinic input] bytes.translate @@ -2312,7 +2088,7 @@ bytes_translate_impl(PyBytesObject *self PyBuffer_Release(&table_view); return NULL; } - output_start = output = PyBytes_AsString(result); + output_start = output = PyBytes_AS_STRING(result); input = PyBytes_AS_STRING(input_obj); if (dellen == 0 && table_chars != NULL) { @@ -2388,508 +2164,6 @@ bytes_maketrans_impl(Py_buffer *frm, Py_ return _Py_bytes_maketrans(frm, to); } -/* find and count characters and substrings */ - -#define findchar(target, target_len, c) \ - ((char *)memchr((const void *)(target), c, target_len)) - -/* String ops must return a string. */ -/* If the object is subclass of string, create a copy */ -Py_LOCAL(PyBytesObject *) -return_self(PyBytesObject *self) -{ - if (PyBytes_CheckExact(self)) { - Py_INCREF(self); - return self; - } - return (PyBytesObject *)PyBytes_FromStringAndSize( - PyBytes_AS_STRING(self), - PyBytes_GET_SIZE(self)); -} - -Py_LOCAL_INLINE(Py_ssize_t) -countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) -{ - Py_ssize_t count=0; - const char *start=target; - const char *end=target+target_len; - - while ( (start=findchar(start, end-start, c)) != NULL ) { - count++; - if (count >= maxcount) - break; - start += 1; - } - return count; -} - - -/* Algorithms for different cases of string replacement */ - -/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_interleave(PyBytesObject *self, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - Py_ssize_t self_len, result_len; - Py_ssize_t count, i; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - - /* 1 at the end plus 1 after every character; - count = min(maxcount, self_len + 1) */ - if (maxcount <= self_len) - count = maxcount; - else - /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ - count = self_len + 1; - - /* Check for overflow */ - /* result_len = count * to_len + self_len; */ - assert(count > 0); - if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = count * to_len + self_len; - - if (! (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) ) - return NULL; - - self_s = PyBytes_AS_STRING(self); - result_s = PyBytes_AS_STRING(result); - - if (to_len > 1) { - /* Lay the first one down (guaranteed this will occur) */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - count -= 1; - - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - } - } - else { - result_s[0] = to_s[0]; - result_s += to_len; - count -= 1; - for (i = 0; i < count; i++) { - *result_s++ = *self_s++; - result_s[0] = to_s[0]; - result_s += to_len; - } - } - - /* Copy the rest of the original string */ - Py_MEMCPY(result_s, self_s, self_len-i); - - return result; -} - -/* Special case for deleting a single character */ -/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_delete_single_character(PyBytesObject *self, - char from_c, Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - self_s = PyBytes_AS_STRING(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - return return_self(self); - } - - result_len = self_len - count; /* from_len == 1 */ - assert(result_len>=0); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - start = next+1; - } - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ - -Py_LOCAL(PyBytesObject *) -replace_delete_substring(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - Py_ssize_t maxcount) { - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyBytesObject *result; - - self_len = PyBytes_GET_SIZE(self); - self_s = PyBytes_AS_STRING(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches */ - return return_self(self); - } - - result_len = self_len - (count * from_len); - assert (result_len>=0); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL ) - return NULL; - - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start + offset; - - Py_MEMCPY(result_s, start, next-start); - - result_s += (next-start); - start = next+from_len; - } - Py_MEMCPY(result_s, start, end-start); - return result; -} - -/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_single_character_in_place(PyBytesObject *self, - char from_c, char to_c, - Py_ssize_t maxcount) -{ - char *self_s, *result_s, *start, *end, *next; - Py_ssize_t self_len; - PyBytesObject *result; - - /* The result string will be the same size */ - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - next = findchar(self_s, self_len, from_c); - - if (next == NULL) { - /* No matches; return the original string */ - return return_self(self); - } - - /* Need to make a new string */ - result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + (next-self_s); - *start = to_c; - start++; - end = result_s + self_len; - - while (--maxcount > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - *next = to_c; - start = next+1; - } - - return result; -} - -/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_substring_in_place(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *result_s, *start, *end; - char *self_s; - Py_ssize_t self_len, offset; - PyBytesObject *result; - - /* The result string will be the same size */ - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - offset = stringlib_find(self_s, self_len, - from_s, from_len, - 0); - if (offset == -1) { - /* No matches; return the original string */ - return return_self(self); - } - - /* Need to make a new string */ - result = (PyBytesObject *) PyBytes_FromStringAndSize(NULL, self_len); - if (result == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - Py_MEMCPY(result_s, self_s, self_len); - - /* change everything in-place, starting with this one */ - start = result_s + offset; - Py_MEMCPY(start, to_s, from_len); - start += from_len; - end = result_s + self_len; - - while ( --maxcount > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset==-1) - break; - Py_MEMCPY(start+offset, to_s, from_len); - start += offset+from_len; - } - - return result; -} - -/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_single_character(PyBytesObject *self, - char from_c, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count; - PyBytesObject *result; - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - count = countchar(self_s, self_len, from_c, maxcount); - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* use the difference between current and new, hence the "-1" */ - /* result_len = self_len + count * (to_len-1) */ - assert(count > 0); - if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = self_len + count * (to_len - 1); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - next = findchar(start, end-start, from_c); - if (next == NULL) - break; - - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += 1; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+1; - } - } - /* Copy the remainder of the remaining string */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - -/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ -Py_LOCAL(PyBytesObject *) -replace_substring(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) { - char *self_s, *result_s; - char *start, *next, *end; - Py_ssize_t self_len, result_len; - Py_ssize_t count, offset; - PyBytesObject *result; - - self_s = PyBytes_AS_STRING(self); - self_len = PyBytes_GET_SIZE(self); - - count = stringlib_count(self_s, self_len, - from_s, from_len, - maxcount); - - if (count == 0) { - /* no matches, return unchanged */ - return return_self(self); - } - - /* Check for overflow */ - /* result_len = self_len + count * (to_len-from_len) */ - assert(count > 0); - if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { - PyErr_SetString(PyExc_OverflowError, - "replacement bytes are too long"); - return NULL; - } - result_len = self_len + count * (to_len-from_len); - - if ( (result = (PyBytesObject *) - PyBytes_FromStringAndSize(NULL, result_len)) == NULL) - return NULL; - result_s = PyBytes_AS_STRING(result); - - start = self_s; - end = self_s + self_len; - while (count-- > 0) { - offset = stringlib_find(start, end-start, - from_s, from_len, - 0); - if (offset == -1) - break; - next = start+offset; - if (next == start) { - /* replace with the 'to' */ - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start += from_len; - } else { - /* copy the unchanged old then the 'to' */ - Py_MEMCPY(result_s, start, next-start); - result_s += (next-start); - Py_MEMCPY(result_s, to_s, to_len); - result_s += to_len; - start = next+from_len; - } - } - /* Copy the remainder of the remaining string */ - Py_MEMCPY(result_s, start, end-start); - - return result; -} - - -Py_LOCAL(PyBytesObject *) -replace(PyBytesObject *self, - const char *from_s, Py_ssize_t from_len, - const char *to_s, Py_ssize_t to_len, - Py_ssize_t maxcount) -{ - if (maxcount < 0) { - maxcount = PY_SSIZE_T_MAX; - } else if (maxcount == 0 || PyBytes_GET_SIZE(self) == 0) { - /* nothing to do; return the original string */ - return return_self(self); - } - - if (maxcount == 0 || - (from_len == 0 && to_len == 0)) { - /* nothing to do; return the original string */ - return return_self(self); - } - - /* Handle zero-length special cases */ - - if (from_len == 0) { - /* insert the 'to' string everywhere. */ - /* >>> "Python".replace("", ".") */ - /* '.P.y.t.h.o.n.' */ - return replace_interleave(self, to_s, to_len, maxcount); - } - - /* Except for "".replace("", "A") == "A" there is no way beyond this */ - /* point for an empty self string to generate a non-empty string */ - /* Special case so the remaining code always gets a non-empty string */ - if (PyBytes_GET_SIZE(self) == 0) { - return return_self(self); - } - - if (to_len == 0) { - /* delete all occurrences of 'from' string */ - if (from_len == 1) { - return replace_delete_single_character( - self, from_s[0], maxcount); - } else { - return replace_delete_substring(self, from_s, - from_len, maxcount); - } - } - - /* Handle special case where both strings have the same length */ - - if (from_len == to_len) { - if (from_len == 1) { - return replace_single_character_in_place( - self, - from_s[0], - to_s[0], - maxcount); - } else { - return replace_substring_in_place( - self, from_s, from_len, to_s, to_len, - maxcount); - } - } - - /* Otherwise use the more generic algorithms */ - if (from_len == 1) { - return replace_single_character(self, from_s[0], - to_s, to_len, maxcount); - } else { - /* len('from')>=2, len('to')>=1 */ - return replace_substring(self, from_s, from_len, to_s, to_len, - maxcount); - } -} - /*[clinic input] bytes.replace @@ -2912,154 +2186,13 @@ bytes_replace_impl(PyBytesObject*self, P Py_ssize_t count) /*[clinic end generated code: output=403dc9d7a83c5a1d input=b2fbbf0bf04de8e5]*/ { - return (PyObject *)replace((PyBytesObject *) self, + return (PyObject *)stringlib_replace((PyObject *) self, (const char *)old->buf, old->len, (const char *)new->buf, new->len, count); } /** End DALKE **/ -/* Matches the end (direction >= 0) or start (direction < 0) of self - * against substr, using the start and end arguments. Returns - * -1 on error, 0 if not found and 1 if found. - */ -Py_LOCAL(int) -_bytes_tailmatch(PyBytesObject *self, PyObject *substr, Py_ssize_t start, - Py_ssize_t end, int direction) -{ - Py_ssize_t len = PyBytes_GET_SIZE(self); - Py_ssize_t slen; - Py_buffer sub_view = {NULL, NULL}; - const char* sub; - const char* str; - - if (PyBytes_Check(substr)) { - sub = PyBytes_AS_STRING(substr); - slen = PyBytes_GET_SIZE(substr); - } - else { - if (PyObject_GetBuffer(substr, &sub_view, PyBUF_SIMPLE) != 0) - return -1; - sub = sub_view.buf; - slen = sub_view.len; - } - str = PyBytes_AS_STRING(self); - - ADJUST_INDICES(start, end, len); - - if (direction < 0) { - /* startswith */ - if (start+slen > len) - goto notfound; - } else { - /* endswith */ - if (end-start < slen || start > len) - goto notfound; - - if (end-slen > start) - start = end - slen; - } - if (end-start < slen) - goto notfound; - if (memcmp(str+start, sub, slen) != 0) - goto notfound; - - PyBuffer_Release(&sub_view); - return 1; - -notfound: - PyBuffer_Release(&sub_view); - return 0; -} - - -PyDoc_STRVAR(startswith__doc__, -"B.startswith(prefix[, start[, end]]) -> bool\n\ -\n\ -Return True if B starts with the specified prefix, False otherwise.\n\ -With optional start, test B beginning at that position.\n\ -With optional end, stop comparing B at that position.\n\ -prefix can also be a tuple of bytes to try."); - -static PyObject * -bytes_startswith(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *subobj; - int result; - - if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - result = _bytes_tailmatch(self, - PyTuple_GET_ITEM(subobj, i), - start, end, -1); - if (result == -1) - return NULL; - else if (result) { - Py_RETURN_TRUE; - } - } - Py_RETURN_FALSE; - } - result = _bytes_tailmatch(self, subobj, start, end, -1); - if (result == -1) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "startswith first arg must be bytes " - "or a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - else - return PyBool_FromLong(result); -} - - -PyDoc_STRVAR(endswith__doc__, -"B.endswith(suffix[, start[, end]]) -> bool\n\ -\n\ -Return True if B ends with the specified suffix, False otherwise.\n\ -With optional start, test B beginning at that position.\n\ -With optional end, stop comparing B at that position.\n\ -suffix can also be a tuple of bytes to try."); - -static PyObject * -bytes_endswith(PyBytesObject *self, PyObject *args) -{ - Py_ssize_t start = 0; - Py_ssize_t end = PY_SSIZE_T_MAX; - PyObject *subobj; - int result; - - if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) - return NULL; - if (PyTuple_Check(subobj)) { - Py_ssize_t i; - for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { - result = _bytes_tailmatch(self, - PyTuple_GET_ITEM(subobj, i), - start, end, +1); - if (result == -1) - return NULL; - else if (result) { - Py_RETURN_TRUE; - } - } - Py_RETURN_FALSE; - } - result = _bytes_tailmatch(self, subobj, start, end, +1); - if (result == -1) { - if (PyErr_ExceptionMatches(PyExc_TypeError)) - PyErr_Format(PyExc_TypeError, "endswith first arg must be bytes or " - "a tuple of bytes, not %s", Py_TYPE(subobj)->tp_name); - return NULL; - } - else - return PyBool_FromLong(result); -} - /*[clinic input] bytes.decode @@ -3210,14 +2343,6 @@ Create a string of hexadecimal numbers f Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'."); static PyObject * -bytes_hex(PyBytesObject *self) -{ - char* argbuf = PyBytes_AS_STRING(self); - Py_ssize_t arglen = PyBytes_GET_SIZE(self); - return _Py_strhex(argbuf, arglen); -} - -static PyObject * bytes_getnewargs(PyBytesObject *v) { return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v)); @@ -3229,17 +2354,19 @@ bytes_methods[] = { {"__getnewargs__", (PyCFunction)bytes_getnewargs, METH_NOARGS}, {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS, _Py_capitalize__doc__}, - {"center", (PyCFunction)stringlib_center, METH_VARARGS, center__doc__}, - {"count", (PyCFunction)bytes_count, METH_VARARGS, count__doc__}, + {"center", (PyCFunction)stringlib_center, METH_VARARGS, _Py_center__doc__}, + {"count", (PyCFunction)stringlib_method_count, METH_VARARGS, + _Py_count__doc__}, BYTES_DECODE_METHODDEF - {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS, - endswith__doc__}, + {"endswith", (PyCFunction)stringlib_endswith, METH_VARARGS, + _Py_endswith__doc__}, {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS, - expandtabs__doc__}, - {"find", (PyCFunction)bytes_find, METH_VARARGS, find__doc__}, + _Py_expandtabs__doc__}, + {"find", (PyCFunction)stringlib_method_find, METH_VARARGS, + _Py_find__doc__}, BYTES_FROMHEX_METHODDEF - {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__}, - {"index", (PyCFunction)bytes_index, METH_VARARGS, index__doc__}, + {"hex", (PyCFunction)stringlib_hex, METH_NOARGS, hex__doc__}, + {"index", (PyCFunction)stringlib_index, METH_VARARGS, _Py_index__doc__}, {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS, _Py_isalnum__doc__}, {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS, @@ -3255,29 +2382,30 @@ bytes_methods[] = { {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS, _Py_isupper__doc__}, BYTES_JOIN_METHODDEF - {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, ljust__doc__}, + {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__}, {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__}, BYTES_LSTRIP_METHODDEF BYTES_MAKETRANS_METHODDEF BYTES_PARTITION_METHODDEF BYTES_REPLACE_METHODDEF - {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, rfind__doc__}, - {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, rindex__doc__}, - {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, rjust__doc__}, + {"rfind", (PyCFunction)stringlib_method_rfind, METH_VARARGS, + _Py_rfind__doc__}, + {"rindex", (PyCFunction)stringlib_rindex, METH_VARARGS, _Py_rindex__doc__}, + {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__}, BYTES_RPARTITION_METHODDEF BYTES_RSPLIT_METHODDEF BYTES_RSTRIP_METHODDEF BYTES_SPLIT_METHODDEF BYTES_SPLITLINES_METHODDEF - {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS, - startswith__doc__}, + {"startswith", (PyCFunction)stringlib_startswith, METH_VARARGS, + _Py_startswith__doc__}, BYTES_STRIP_METHODDEF {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS, _Py_swapcase__doc__}, {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__}, BYTES_TRANSLATE_METHODDEF {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__}, - {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, zfill__doc__}, + {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__}, {NULL, NULL} /* sentinel */ }; diff -r 570ada02d0f0 Objects/stringlib/codecs.h --- a/Objects/stringlib/codecs.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/codecs.h Fri Apr 15 11:00:35 2016 +0300 @@ -1,6 +1,8 @@ /* stringlib: codec implementations */ -#if STRINGLIB_IS_UNICODE +#if !STRINGLIB_IS_UNICODE +# error "codecs.h is specific to Unicode" +#endif /* Mask to quickly check whether a C 'long' contains a non-ASCII, UTF8-encoded char. */ @@ -823,5 +825,3 @@ STRINGLIB(utf32_encode)(const STRINGLIB_ #undef SWAB4 #endif - -#endif /* STRINGLIB_IS_UNICODE */ diff -r 570ada02d0f0 Objects/stringlib/ctype.h --- a/Objects/stringlib/ctype.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/ctype.h Fri Apr 15 11:00:35 2016 +0300 @@ -1,5 +1,6 @@ -/* NOTE: this API is -ONLY- for use with single byte character strings. */ -/* Do not use it with Unicode. */ +#if STRINGLIB_IS_UNICODE +# error "ctype.h only compatible with byte-wise strings" +#endif #include "bytes_methods.h" diff -r 570ada02d0f0 Objects/stringlib/find.h --- a/Objects/stringlib/find.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/find.h Fri Apr 15 11:00:35 2016 +0300 @@ -117,76 +117,3 @@ STRINGLIB(parse_args_finds)(const char * } #undef FORMAT_BUFFER_SIZE - -#if STRINGLIB_IS_UNICODE - -/* -Wraps stringlib_parse_args_finds() and additionally ensures that the -first argument is a unicode object. -*/ - -Py_LOCAL_INLINE(int) -STRINGLIB(parse_args_finds_unicode)(const char * function_name, PyObject *args, - PyObject **substring, - Py_ssize_t *start, Py_ssize_t *end) -{ - if(STRINGLIB(parse_args_finds)(function_name, args, substring, - start, end)) { - if (ensure_unicode(*substring) < 0) - return 0; - return 1; - } - return 0; -} - -#else /* !STRINGLIB_IS_UNICODE */ - -/* -Wraps stringlib_parse_args_finds() and additionally checks whether the -first argument is an integer in range(0, 256). - -If this is the case, writes the integer value to the byte parameter -and sets subobj to NULL. Otherwise, sets the first argument to subobj -and doesn't touch byte. The other parameters are similar to those of -stringlib_parse_args_finds(). -*/ - -Py_LOCAL_INLINE(int) -STRINGLIB(parse_args_finds_byte)(const char *function_name, PyObject *args, - PyObject **subobj, char *byte, - Py_ssize_t *start, Py_ssize_t *end) -{ - PyObject *tmp_subobj; - Py_ssize_t ival; - PyObject *err; - - if(!STRINGLIB(parse_args_finds)(function_name, args, &tmp_subobj, - start, end)) - return 0; - - if (!PyNumber_Check(tmp_subobj)) { - *subobj = tmp_subobj; - return 1; - } - - ival = PyNumber_AsSsize_t(tmp_subobj, PyExc_OverflowError); - if (ival == -1) { - err = PyErr_Occurred(); - if (err && !PyErr_GivenExceptionMatches(err, PyExc_OverflowError)) { - PyErr_Clear(); - *subobj = tmp_subobj; - return 1; - } - } - - if (ival < 0 || ival > 255) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); - return 0; - } - - *subobj = NULL; - *byte = (char)ival; - return 1; -} - -#endif /* STRINGLIB_IS_UNICODE */ diff -r 570ada02d0f0 Objects/stringlib/find_max_char.h --- a/Objects/stringlib/find_max_char.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/find_max_char.h Fri Apr 15 11:00:35 2016 +0300 @@ -1,6 +1,8 @@ /* Finding the optimal width of unicode characters in a buffer */ -#if STRINGLIB_IS_UNICODE +#if !STRINGLIB_IS_UNICODE +# error "find_max_char.h is specific to Unicode" +#endif /* Mask to quickly check whether a C 'long' contains a non-ASCII, UTF8-encoded char. */ @@ -129,5 +131,4 @@ STRINGLIB(find_max_char)(const STRINGLIB #undef MAX_CHAR_UCS4 #endif /* STRINGLIB_SIZEOF_CHAR == 1 */ -#endif /* STRINGLIB_IS_UNICODE */ diff -r 570ada02d0f0 Objects/stringlib/join.h --- a/Objects/stringlib/join.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/join.h Fri Apr 15 11:00:35 2016 +0300 @@ -1,6 +1,6 @@ /* stringlib: bytes joining implementation */ -#if STRINGLIB_SIZEOF_CHAR != 1 +#if STRINGLIB_IS_UNICODE #error join.h only compatible with byte-wise strings #endif diff -r 570ada02d0f0 Objects/stringlib/localeutil.h --- a/Objects/stringlib/localeutil.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/localeutil.h Fri Apr 15 11:00:35 2016 +0300 @@ -2,8 +2,8 @@ #include -#ifndef STRINGLIB_IS_UNICODE -# error "localeutil is specific to Unicode" +#if !STRINGLIB_IS_UNICODE +# error "localeutil.h is specific to Unicode" #endif typedef struct { diff -r 570ada02d0f0 Objects/stringlib/transmogrify.h --- a/Objects/stringlib/transmogrify.h Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/stringlib/transmogrify.h Fri Apr 15 11:00:35 2016 +0300 @@ -1,14 +1,76 @@ -/* NOTE: this API is -ONLY- for use with single byte character strings. */ -/* Do not use it with Unicode. */ +#if STRINGLIB_IS_UNICODE +# error "transmogrify.h only compatible with byte-wise strings" +#endif + +Py_LOCAL(PyObject *) +stringlib_method_find(PyObject *self, PyObject *args) +{ + return _Py_bytes_find(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_index(PyObject *self, PyObject *args) +{ + return _Py_bytes_index(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_method_rfind(PyObject *self, PyObject *args) +{ + return _Py_bytes_rfind(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_rindex(PyObject *self, PyObject *args) +{ + return _Py_bytes_rindex(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_method_count(PyObject *self, PyObject *args) +{ + return _Py_bytes_count(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(int) +stringlib_contains(PyObject *self, PyObject *arg) +{ + return _Py_bytes_contains(STRINGLIB_STR(self), STRINGLIB_LEN(self), arg); +} + +Py_LOCAL(PyObject *) +stringlib_startswith(PyObject *self, PyObject *args) +{ + return _Py_bytes_startswith(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_endswith(PyObject *self, PyObject *args) +{ + return _Py_bytes_endswith(STRINGLIB_STR(self), STRINGLIB_LEN(self), args); +} + +Py_LOCAL(PyObject *) +stringlib_hex(PyObject *self) +{ + return _Py_strhex(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} + /* the more complicated methods. parts of these should be pulled out into the shared code in bytes_methods.c to cut down on duplicate code bloat. */ -PyDoc_STRVAR(expandtabs__doc__, -"B.expandtabs(tabsize=8) -> copy of B\n\ -\n\ -Return a copy of B where all tab characters are expanded using spaces.\n\ -If tabsize is not given, a tab size of 8 characters is assumed."); +Py_LOCAL_INLINE(PyObject *) +return_self(PyObject *self) +{ +#if !STRINGLIB_MUTABLE + if (STRINGLIB_CHECK_EXACT(self)) { + Py_INCREF(self); + return self; + } +#endif + return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); +} static PyObject* stringlib_expandtabs(PyObject *self, PyObject *args, PyObject *kwds) @@ -93,39 +155,25 @@ pad(PyObject *self, Py_ssize_t left, Py_ if (right < 0) right = 0; - if (left == 0 && right == 0 && STRINGLIB_CHECK_EXACT(self)) { -#if STRINGLIB_MUTABLE - /* We're defined as returning a copy; If the object is mutable - * that means we must make an identical copy. */ - return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -#else - Py_INCREF(self); - return (PyObject *)self; -#endif /* STRINGLIB_MUTABLE */ + if (left == 0 && right == 0) { + return return_self(self); } - u = STRINGLIB_NEW(NULL, - left + STRINGLIB_LEN(self) + right); + u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right); if (u) { if (left) memset(STRINGLIB_STR(u), fill, left); Py_MEMCPY(STRINGLIB_STR(u) + left, - STRINGLIB_STR(self), - STRINGLIB_LEN(self)); + STRINGLIB_STR(self), + STRINGLIB_LEN(self)); if (right) memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self), - fill, right); + fill, right); } return u; } -PyDoc_STRVAR(ljust__doc__, -"B.ljust(width[, fillchar]) -> copy of B\n" -"\n" -"Return B left justified in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)."); - static PyObject * stringlib_ljust(PyObject *self, PyObject *args) { @@ -135,27 +183,14 @@ stringlib_ljust(PyObject *self, PyObject if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) return NULL; - if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { -#if STRINGLIB_MUTABLE - /* We're defined as returning a copy; If the object is mutable - * that means we must make an identical copy. */ - return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -#else - Py_INCREF(self); - return (PyObject*) self; -#endif + if (STRINGLIB_LEN(self) >= width) { + return return_self(self); } return pad(self, 0, width - STRINGLIB_LEN(self), fillchar); } -PyDoc_STRVAR(rjust__doc__, -"B.rjust(width[, fillchar]) -> copy of B\n" -"\n" -"Return B right justified in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)"); - static PyObject * stringlib_rjust(PyObject *self, PyObject *args) { @@ -165,27 +200,14 @@ stringlib_rjust(PyObject *self, PyObject if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) return NULL; - if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { -#if STRINGLIB_MUTABLE - /* We're defined as returning a copy; If the object is mutable - * that means we must make an identical copy. */ - return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -#else - Py_INCREF(self); - return (PyObject*) self; -#endif + if (STRINGLIB_LEN(self) >= width) { + return return_self(self); } return pad(self, width - STRINGLIB_LEN(self), 0, fillchar); } -PyDoc_STRVAR(center__doc__, -"B.center(width[, fillchar]) -> copy of B\n" -"\n" -"Return B centered in a string of length width. Padding is\n" -"done using the specified fill character (default is a space)."); - static PyObject * stringlib_center(PyObject *self, PyObject *args) { @@ -196,15 +218,8 @@ stringlib_center(PyObject *self, PyObjec if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) return NULL; - if (STRINGLIB_LEN(self) >= width && STRINGLIB_CHECK_EXACT(self)) { -#if STRINGLIB_MUTABLE - /* We're defined as returning a copy; If the object is mutable - * that means we must make an identical copy. */ - return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -#else - Py_INCREF(self); - return (PyObject*) self; -#endif + if (STRINGLIB_LEN(self) >= width) { + return return_self(self); } marg = width - STRINGLIB_LEN(self); @@ -213,12 +228,6 @@ stringlib_center(PyObject *self, PyObjec return pad(self, left, marg - left, fillchar); } -PyDoc_STRVAR(zfill__doc__, -"B.zfill(width) -> copy of B\n" -"\n" -"Pad a numeric string B with zeros on the left, to fill a field\n" -"of the specified width. B is never truncated."); - static PyObject * stringlib_zfill(PyObject *self, PyObject *args) { @@ -231,21 +240,7 @@ stringlib_zfill(PyObject *self, PyObject return NULL; if (STRINGLIB_LEN(self) >= width) { - if (STRINGLIB_CHECK_EXACT(self)) { -#if STRINGLIB_MUTABLE - /* We're defined as returning a copy; If the object is mutable - * that means we must make an identical copy. */ - return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self)); -#else - Py_INCREF(self); - return (PyObject*) self; -#endif - } - else - return STRINGLIB_NEW( - STRINGLIB_STR(self), - STRINGLIB_LEN(self) - ); + return return_self(self); } fill = width - STRINGLIB_LEN(self); @@ -262,5 +257,489 @@ stringlib_zfill(PyObject *self, PyObject p[fill] = '0'; } - return (PyObject*) s; + return s; } + + +/* find and count characters and substrings */ + +#define findchar(target, target_len, c) \ + ((char *)memchr((const void *)(target), c, target_len)) + + +Py_LOCAL_INLINE(Py_ssize_t) +countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) +{ + Py_ssize_t count=0; + const char *start=target; + const char *end=target+target_len; + + while ( (start=findchar(start, end-start, c)) != NULL ) { + count++; + if (count >= maxcount) + break; + start += 1; + } + return count; +} + + +/* Algorithms for different cases of string replacement */ + +/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_interleave(PyObject *self, + const char *to_s, Py_ssize_t to_len, + Py_ssize_t maxcount) +{ + const char *self_s; + char *result_s; + Py_ssize_t self_len, result_len; + Py_ssize_t count, i; + PyObject *result; + + self_len = STRINGLIB_LEN(self); + + /* 1 at the end plus 1 after every character; + count = min(maxcount, self_len + 1) */ + if (maxcount <= self_len) + count = maxcount; + else + /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ + count = self_len + 1; + + /* Check for overflow */ + /* result_len = count * to_len + self_len; */ + assert(count > 0); + if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { + PyErr_SetString(PyExc_OverflowError, + "replace bytes are too long"); + return NULL; + } + result_len = count * to_len + self_len; + + if (! (result = STRINGLIB_NEW(NULL, result_len)) ) + return NULL; + + self_s = STRINGLIB_STR(self); + result_s = STRINGLIB_STR(result); + + if (to_len > 1) { + /* Lay the first one down (guaranteed this will occur) */ + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + count -= 1; + + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + } + } + else { + result_s[0] = to_s[0]; + result_s += to_len; + count -= 1; + for (i = 0; i < count; i++) { + *result_s++ = *self_s++; + result_s[0] = to_s[0]; + result_s += to_len; + } + } + + /* Copy the rest of the original string */ + Py_MEMCPY(result_s, self_s, self_len-i); + + return result; +} + +/* Special case for deleting a single character */ +/* len(self)>=1, len(from)==1, to="", maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_delete_single_character(PyObject *self, + char from_c, Py_ssize_t maxcount) +{ + const char *self_s, *start, *next, *end; + char *result_s; + Py_ssize_t self_len, result_len; + Py_ssize_t count; + PyObject *result; + + self_len = STRINGLIB_LEN(self); + self_s = STRINGLIB_STR(self); + + count = countchar(self_s, self_len, from_c, maxcount); + if (count == 0) { + return return_self(self); + } + + result_len = self_len - count; /* from_len == 1 */ + assert(result_len>=0); + + if ( (result = STRINGLIB_NEW(NULL, result_len)) == NULL) + return NULL; + result_s = STRINGLIB_STR(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + Py_MEMCPY(result_s, start, next-start); + result_s += (next-start); + start = next+1; + } + Py_MEMCPY(result_s, start, end-start); + + return result; +} + +/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ + +Py_LOCAL(PyObject *) +stringlib_replace_delete_substring(PyObject *self, + const char *from_s, Py_ssize_t from_len, + Py_ssize_t maxcount) +{ + const char *self_s, *start, *next, *end; + char *result_s; + Py_ssize_t self_len, result_len; + Py_ssize_t count, offset; + PyObject *result; + + self_len = STRINGLIB_LEN(self); + self_s = STRINGLIB_STR(self); + + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); + + if (count == 0) { + /* no matches */ + return return_self(self); + } + + result_len = self_len - (count * from_len); + assert (result_len>=0); + + if ( (result = STRINGLIB_NEW(NULL, result_len)) == NULL ) + return NULL; + + result_s = STRINGLIB_STR(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); + if (offset == -1) + break; + next = start + offset; + + Py_MEMCPY(result_s, start, next-start); + + result_s += (next-start); + start = next+from_len; + } + Py_MEMCPY(result_s, start, end-start); + return result; +} + +/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_single_character_in_place(PyObject *self, + char from_c, char to_c, + Py_ssize_t maxcount) +{ + const char *self_s, *end; + char *result_s, *start, *next; + Py_ssize_t self_len; + PyObject *result; + + /* The result string will be the same size */ + self_s = STRINGLIB_STR(self); + self_len = STRINGLIB_LEN(self); + + next = findchar(self_s, self_len, from_c); + + if (next == NULL) { + /* No matches; return the original bytes */ + return return_self(self); + } + + /* Need to make a new bytes */ + result = STRINGLIB_NEW(NULL, self_len); + if (result == NULL) + return NULL; + result_s = STRINGLIB_STR(result); + Py_MEMCPY(result_s, self_s, self_len); + + /* change everything in-place, starting with this one */ + start = result_s + (next-self_s); + *start = to_c; + start++; + end = result_s + self_len; + + while (--maxcount > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + *next = to_c; + start = next+1; + } + + return result; +} + +/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_substring_in_place(PyObject *self, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, + Py_ssize_t maxcount) +{ + const char *self_s, *end; + char *result_s, *start; + Py_ssize_t self_len, offset; + PyObject *result; + + /* The result bytes will be the same size */ + + self_s = STRINGLIB_STR(self); + self_len = STRINGLIB_LEN(self); + + offset = stringlib_find(self_s, self_len, + from_s, from_len, + 0); + if (offset == -1) { + /* No matches; return the original bytes */ + return return_self(self); + } + + /* Need to make a new bytes */ + result = STRINGLIB_NEW(NULL, self_len); + if (result == NULL) + return NULL; + result_s = STRINGLIB_STR(result); + Py_MEMCPY(result_s, self_s, self_len); + + /* change everything in-place, starting with this one */ + start = result_s + offset; + Py_MEMCPY(start, to_s, from_len); + start += from_len; + end = result_s + self_len; + + while ( --maxcount > 0) { + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); + if (offset==-1) + break; + Py_MEMCPY(start+offset, to_s, from_len); + start += offset+from_len; + } + + return result; +} + +/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_single_character(PyObject *self, + char from_c, + const char *to_s, Py_ssize_t to_len, + Py_ssize_t maxcount) +{ + const char *self_s, *start, *next, *end; + char *result_s; + Py_ssize_t self_len, result_len; + Py_ssize_t count; + PyObject *result; + + self_s = STRINGLIB_STR(self); + self_len = STRINGLIB_LEN(self); + + count = countchar(self_s, self_len, from_c, maxcount); + if (count == 0) { + /* no matches, return unchanged */ + return return_self(self); + } + + /* use the difference between current and new, hence the "-1" */ + /* result_len = self_len + count * (to_len-1) */ + assert(count > 0); + if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { + PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); + return NULL; + } + result_len = self_len + count * (to_len - 1); + + if ( (result = STRINGLIB_NEW(NULL, result_len)) == NULL) + return NULL; + result_s = STRINGLIB_STR(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + next = findchar(start, end-start, from_c); + if (next == NULL) + break; + + if (next == start) { + /* replace with the 'to' */ + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + start += 1; + } else { + /* copy the unchanged old then the 'to' */ + Py_MEMCPY(result_s, start, next-start); + result_s += (next-start); + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + start = next+1; + } + } + /* Copy the remainder of the remaining bytes */ + Py_MEMCPY(result_s, start, end-start); + + return result; +} + +/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ +Py_LOCAL(PyObject *) +stringlib_replace_substring(PyObject *self, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, + Py_ssize_t maxcount) +{ + const char *self_s, *start, *next, *end; + char *result_s; + Py_ssize_t self_len, result_len; + Py_ssize_t count, offset; + PyObject *result; + + self_s = STRINGLIB_STR(self); + self_len = STRINGLIB_LEN(self); + + count = stringlib_count(self_s, self_len, + from_s, from_len, + maxcount); + + if (count == 0) { + /* no matches, return unchanged */ + return return_self(self); + } + + /* Check for overflow */ + /* result_len = self_len + count * (to_len-from_len) */ + assert(count > 0); + if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { + PyErr_SetString(PyExc_OverflowError, "replace bytes is too long"); + return NULL; + } + result_len = self_len + count * (to_len - from_len); + + if ( (result = STRINGLIB_NEW(NULL, result_len)) == NULL) + return NULL; + result_s = STRINGLIB_STR(result); + + start = self_s; + end = self_s + self_len; + while (count-- > 0) { + offset = stringlib_find(start, end-start, + from_s, from_len, + 0); + if (offset == -1) + break; + next = start+offset; + if (next == start) { + /* replace with the 'to' */ + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + start += from_len; + } else { + /* copy the unchanged old then the 'to' */ + Py_MEMCPY(result_s, start, next-start); + result_s += (next-start); + Py_MEMCPY(result_s, to_s, to_len); + result_s += to_len; + start = next+from_len; + } + } + /* Copy the remainder of the remaining bytes */ + Py_MEMCPY(result_s, start, end-start); + + return result; +} + + +Py_LOCAL(PyObject *) +stringlib_replace(PyObject *self, + const char *from_s, Py_ssize_t from_len, + const char *to_s, Py_ssize_t to_len, + Py_ssize_t maxcount) +{ + if (maxcount < 0) { + maxcount = PY_SSIZE_T_MAX; + } else if (maxcount == 0 || STRINGLIB_LEN(self) == 0) { + /* nothing to do; return the original bytes */ + return return_self(self); + } + + if (maxcount == 0 || + (from_len == 0 && to_len == 0)) { + /* nothing to do; return the original bytes */ + return return_self(self); + } + + /* Handle zero-length special cases */ + + if (from_len == 0) { + /* insert the 'to' bytes everywhere. */ + /* >>> b"Python".replace(b"", b".") */ + /* b'.P.y.t.h.o.n.' */ + return stringlib_replace_interleave(self, to_s, to_len, maxcount); + } + + /* Except for b"".replace(b"", b"A") == b"A" there is no way beyond this */ + /* point for an empty self bytes to generate a non-empty bytes */ + /* Special case so the remaining code always gets a non-empty bytes */ + if (STRINGLIB_LEN(self) == 0) { + return return_self(self); + } + + if (to_len == 0) { + /* delete all occurrences of 'from' bytes */ + if (from_len == 1) { + return stringlib_replace_delete_single_character( + self, from_s[0], maxcount); + } else { + return stringlib_replace_delete_substring(self, from_s, from_len, maxcount); + } + } + + /* Handle special case where both bytes have the same length */ + + if (from_len == to_len) { + if (from_len == 1) { + return stringlib_replace_single_character_in_place( + self, + from_s[0], + to_s[0], + maxcount); + } else { + return stringlib_replace_substring_in_place( + self, from_s, from_len, to_s, to_len, maxcount); + } + } + + /* Otherwise use the more generic algorithms */ + if (from_len == 1) { + return stringlib_replace_single_character(self, from_s[0], + to_s, to_len, maxcount); + } else { + /* len('from')>=2, len('to')>=1 */ + return stringlib_replace_substring(self, from_s, from_len, to_s, to_len, maxcount); + } +} diff -r 570ada02d0f0 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Fri Apr 15 02:27:11 2016 +0000 +++ b/Objects/unicodeobject.c Fri Apr 15 11:00:35 2016 +0300 @@ -11244,6 +11244,25 @@ PyUnicode_AppendAndDel(PyObject **pleft, Py_XDECREF(right); } +/* +Wraps stringlib_parse_args_finds() and additionally ensures that the +first argument is a unicode object. +*/ + +Py_LOCAL_INLINE(int) +parse_args_finds_unicode(const char * function_name, PyObject *args, + PyObject **substring, + Py_ssize_t *start, Py_ssize_t *end) +{ + if(stringlib_parse_args_finds(function_name, args, substring, + start, end)) { + if (ensure_unicode(*substring) < 0) + return 0; + return 1; + } + return 0; +} + PyDoc_STRVAR(count__doc__, "S.count(sub[, start[, end]]) -> int\n\ \n\ @@ -11262,8 +11281,7 @@ unicode_count(PyObject *self, PyObject * void *buf1, *buf2; Py_ssize_t len1, len2, iresult; - if (!stringlib_parse_args_finds_unicode("count", args, &substring, - &start, &end)) + if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; kind1 = PyUnicode_KIND(self); @@ -11445,8 +11463,7 @@ unicode_find(PyObject *self, PyObject *a Py_ssize_t end = 0; Py_ssize_t result; - if (!stringlib_parse_args_finds_unicode("find", args, &substring, - &start, &end)) + if (!parse_args_finds_unicode("find", args, &substring, &start, &end)) return NULL; if (PyUnicode_READY(self) == -1) @@ -11525,8 +11542,7 @@ unicode_index(PyObject *self, PyObject * Py_ssize_t start = 0; Py_ssize_t end = 0; - if (!stringlib_parse_args_finds_unicode("index", args, &substring, - &start, &end)) + if (!parse_args_finds_unicode("index", args, &substring, &start, &end)) return NULL; if (PyUnicode_READY(self) == -1) @@ -12555,8 +12571,7 @@ unicode_rfind(PyObject *self, PyObject * Py_ssize_t end = 0; Py_ssize_t result; - if (!stringlib_parse_args_finds_unicode("rfind", args, &substring, - &start, &end)) + if (!parse_args_finds_unicode("rfind", args, &substring, &start, &end)) return NULL; if (PyUnicode_READY(self) == -1) @@ -12584,8 +12599,7 @@ unicode_rindex(PyObject *self, PyObject Py_ssize_t end = 0; Py_ssize_t result; - if (!stringlib_parse_args_finds_unicode("rindex", args, &substring, - &start, &end)) + if (!parse_args_finds_unicode("rindex", args, &substring, &start, &end)) return NULL; if (PyUnicode_READY(self) == -1)