diff -r 828c9b920532 Objects/bytearrayobject.c --- a/Objects/bytearrayobject.c Fri Nov 13 15:18:26 2015 +0200 +++ b/Objects/bytearrayobject.c Sat Nov 14 00:07:59 2015 +0200 @@ -1159,16 +1159,15 @@ bytearray_find_internal(PyByteArrayObjec ADJUST_INDICES(start, end, len); if (end - start < sub_len) res = -1; - else if (sub_len == 1 -#ifndef HAVE_MEMRCHR - && dir > 0 -#endif - ) { - unsigned char needle = *sub; - int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH; - res = stringlib_fastsearch_memchr_1char( - PyByteArray_AS_STRING(self) + start, end - start, - needle, needle, mode); + else if (sub_len == 1) { + if (dir > 0) + res = stringlib_find_char( + PyByteArray_AS_STRING(self) + start, end - start, + *sub); + else + res = stringlib_rfind_char( + PyByteArray_AS_STRING(self) + start, end - start, + *sub); if (res >= 0) res += start; } diff -r 828c9b920532 Objects/bytesobject.c --- a/Objects/bytesobject.c Fri Nov 13 15:18:26 2015 +0200 +++ b/Objects/bytesobject.c Sat Nov 14 00:07:59 2015 +0200 @@ -1937,16 +1937,15 @@ bytes_find_internal(PyBytesObject *self, ADJUST_INDICES(start, end, len); if (end - start < sub_len) res = -1; - else if (sub_len == 1 -#ifndef HAVE_MEMRCHR - && dir > 0 -#endif - ) { - unsigned char needle = *sub; - int mode = (dir > 0) ? FAST_SEARCH : FAST_RSEARCH; - res = stringlib_fastsearch_memchr_1char( - PyBytes_AS_STRING(self) + start, end - start, - needle, needle, mode); + else if (sub_len == 1) { + if (dir > 0) + res = stringlib_find_char( + PyBytes_AS_STRING(self) + start, end - start, + *sub); + else + res = stringlib_rfind_char( + PyBytes_AS_STRING(self) + start, end - start, + *sub); if (res >= 0) res += start; } diff -r 828c9b920532 Objects/stringlib/fastsearch.h --- a/Objects/stringlib/fastsearch.h Fri Nov 13 15:18:26 2015 +0200 +++ b/Objects/stringlib/fastsearch.h Sat Nov 14 00:07:59 2015 +0200 @@ -32,53 +32,135 @@ #define STRINGLIB_BLOOM(mask, ch) \ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) +#if STRINGLIB_SIZEOF_CHAR == 1 +# define MEMCHR_CUT_OFF 15 +#else +# define MEMCHR_CUT_OFF 40 +#endif Py_LOCAL_INLINE(Py_ssize_t) -STRINGLIB(fastsearch_memchr_1char)(const STRINGLIB_CHAR* s, Py_ssize_t n, - STRINGLIB_CHAR ch, unsigned char needle, - int mode) +STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) { - if (mode == FAST_SEARCH) { - const STRINGLIB_CHAR *ptr = s; - const STRINGLIB_CHAR *e = s + n; - while (ptr < e) { - void *candidate = memchr((const void *) ptr, needle, (e - ptr) * sizeof(STRINGLIB_CHAR)); + const STRINGLIB_CHAR *p, *e; +#if STRINGLIB_SIZEOF_CHAR > 1 + const STRINGLIB_CHAR *s1, *e1; + unsigned char needle; +#endif + +#if STRINGLIB_SIZEOF_CHAR == 1 + if (n > MEMCHR_CUT_OFF) { + p = memchr(s, ch, n); + if (p != NULL) + return (p - s); + return -1; + } +#endif + p = s; + e = s + n; +#if STRINGLIB_SIZEOF_CHAR > 1 + /* use memchr if we can choose a needle without two many likely + false positives */ + needle = ch & 0xff; + /* If looking for a multiple of 256, we'd have too + many false positives looking for the '\0' byte in UCS2 + and UCS4 representations. */ + if (needle != 0) + while (e - p > MEMCHR_CUT_OFF) { + void *candidate = memchr(p, needle, + (e - p) * sizeof(STRINGLIB_CHAR)); if (candidate == NULL) return -1; - ptr = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); - if (sizeof(STRINGLIB_CHAR) == 1 || *ptr == ch) - return (ptr - s); + s1 = p; + p = (const STRINGLIB_CHAR *) + _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); + if (*p == ch) + return (p - s); /* False positive */ - ptr++; + p++; + if (p - s1 > MEMCHR_CUT_OFF) + continue; + if (e - p <= MEMCHR_CUT_OFF) + break; + e1 = p + MEMCHR_CUT_OFF; + while (p != e1) { + if (*p == ch) + return (p - s); + p++; + } } - return -1; +#endif + while (p < e) { + if (*p == ch) + return (p - s); + p++; } + return -1; +} + +Py_LOCAL_INLINE(Py_ssize_t) +STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) +{ + const STRINGLIB_CHAR *p; #ifdef HAVE_MEMRCHR /* memrchr() is a GNU extension, available since glibc 2.1.91. it doesn't seem as optimized as memchr(), but is still quite - faster than our hand-written loop in FASTSEARCH below */ - else if (mode == FAST_RSEARCH) { - while (n > 0) { - const STRINGLIB_CHAR *found; - void *candidate = memrchr((const void *) s, needle, n * sizeof(STRINGLIB_CHAR)); + faster than our hand-written loop below */ +#if STRINGLIB_SIZEOF_CHAR > 1 + const STRINGLIB_CHAR *s1; + Py_ssize_t n1; + unsigned char needle; +#endif + +#if STRINGLIB_SIZEOF_CHAR == 1 + if (n > MEMCHR_CUT_OFF) { + p = memrchr(s, ch, n * sizeof(STRINGLIB_CHAR)); + if (p != NULL) + return (p - s); + return -1; + } +#else + /* use memrchr if we can choose a needle without two many likely + false positives */ + needle = ch & 0xff; + /* If looking for a multiple of 256, we'd have too + many false positives looking for the '\0' byte in UCS2 + and UCS4 representations. */ + if (needle != 0) + while (n > MEMCHR_CUT_OFF) { + void *candidate = memrchr(s, needle, n * sizeof(STRINGLIB_CHAR)); if (candidate == NULL) return -1; - found = (const STRINGLIB_CHAR *) _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); - n = found - s; - if (sizeof(STRINGLIB_CHAR) == 1 || *found == ch) + n1 = n; + p = (const STRINGLIB_CHAR *) + _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); + n = p - s; + if (*p == ch) return n; /* False positive */ + if (n1 - n > MEMCHR_CUT_OFF) + continue; + if (n <= MEMCHR_CUT_OFF) + break; + s1 = p - MEMCHR_CUT_OFF; + while (p > s1) { + p--; + if (*p == ch) + return (p - s); + } + n = p - s; } - return -1; +#endif +#endif /* HAVE_MEMRCHR */ + p = s + n; + while (p > s) { + p--; + if (*p == ch) + return (p - s); } -#endif - else { - assert(0); /* Should never get here */ - return 0; - } + return -1; +} -#undef DO_MEMCHR -} +#undef MEMCHR_CUT_OFF Py_LOCAL_INLINE(Py_ssize_t) FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, @@ -99,25 +181,11 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_s if (m <= 0) return -1; /* use special case for 1-character strings */ - if (n > 10 && (mode == FAST_SEARCH -#ifdef HAVE_MEMRCHR - || mode == FAST_RSEARCH -#endif - )) { - /* use memchr if we can choose a needle without two many likely - false positives */ - unsigned char needle; - needle = p[0] & 0xff; -#if STRINGLIB_SIZEOF_CHAR > 1 - /* If looking for a multiple of 256, we'd have too - many false positives looking for the '\0' byte in UCS2 - and UCS4 representations. */ - if (needle != 0) -#endif - return STRINGLIB(fastsearch_memchr_1char) - (s, n, p[0], needle, mode); - } - if (mode == FAST_COUNT) { + if (mode == FAST_SEARCH) + return STRINGLIB(find_char)(s, n, p[0]); + else if (mode == FAST_RSEARCH) + return STRINGLIB(rfind_char)(s, n, p[0]); + else { /* FAST_COUNT */ for (i = 0; i < n; i++) if (s[i] == p[0]) { count++; @@ -125,14 +193,6 @@ FASTSEARCH(const STRINGLIB_CHAR* s, Py_s return maxcount; } return count; - } else if (mode == FAST_SEARCH) { - for (i = 0; i < n; i++) - if (s[i] == p[0]) - return i; - } else { /* FAST_RSEARCH */ - for (i = n - 1; i > -1; i--) - if (s[i] == p[0]) - return i; } return -1; } diff -r 828c9b920532 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Fri Nov 13 15:18:26 2015 +0200 +++ b/Objects/unicodeobject.c Sat Nov 14 00:07:59 2015 +0200 @@ -811,27 +811,26 @@ Py_LOCAL_INLINE(Py_ssize_t) findchar(con Py_ssize_t size, Py_UCS4 ch, int direction) { - int mode = (direction == 1) ? FAST_SEARCH : FAST_RSEARCH; - switch (kind) { case PyUnicode_1BYTE_KIND: - { - Py_UCS1 ch1 = (Py_UCS1) ch; - if (ch1 == ch) - return ucs1lib_fastsearch((Py_UCS1 *) s, size, &ch1, 1, 0, mode); - else - return -1; - } + if ((Py_UCS1) ch != ch) + return -1; + if (direction > 0) + return ucs1lib_find_char((Py_UCS1 *) s, size, (Py_UCS1) ch); + else + return ucs1lib_rfind_char((Py_UCS1 *) s, size, (Py_UCS1) ch); case PyUnicode_2BYTE_KIND: - { - Py_UCS2 ch2 = (Py_UCS2) ch; - if (ch2 == ch) - return ucs2lib_fastsearch((Py_UCS2 *) s, size, &ch2, 1, 0, mode); - else - return -1; - } + if ((Py_UCS2) ch != ch) + return -1; + if (direction > 0) + return ucs2lib_find_char((Py_UCS2 *) s, size, (Py_UCS2) ch); + else + return ucs2lib_rfind_char((Py_UCS2 *) s, size, (Py_UCS2) ch); case PyUnicode_4BYTE_KIND: - return ucs4lib_fastsearch((Py_UCS4 *) s, size, &ch, 1, 0, mode); + if (direction > 0) + return ucs4lib_find_char((Py_UCS4 *) s, size, ch); + else + return ucs4lib_rfind_char((Py_UCS4 *) s, size, ch); default: assert(0); return -1;