diff -r 6fb1fb550319 Objects/stringlib/find_max_char.h --- a/Objects/stringlib/find_max_char.h Thu Oct 11 18:59:34 2012 -0700 +++ b/Objects/stringlib/find_max_char.h Fri Oct 12 10:41:04 2012 +0300 @@ -2,6 +2,8 @@ #if STRINGLIB_IS_UNICODE +#if STRINGLIB_SIZEOF_CHAR == 1 + /* Mask to quickly check whether a C 'long' contains a non-ASCII, UTF8-encoded char. */ #if (SIZEOF_LONG == 8) @@ -12,8 +14,6 @@ # error C 'long' size should be either 4 or 8! #endif -#if STRINGLIB_SIZEOF_CHAR == 1 - Py_LOCAL_INLINE(Py_UCS4) STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) { @@ -41,93 +41,73 @@ return 127; } -#undef ASCII_CHAR_MASK +#undef UCS1_ASCII_CHAR_MASK #else /* STRINGLIB_SIZEOF_CHAR == 1 */ -#define MASK_ASCII 0xFFFFFF80 -#define MASK_UCS1 0xFFFFFF00 -#define MASK_UCS2 0xFFFF0000 - -#define MAX_CHAR_ASCII 0x7f -#define MAX_CHAR_UCS1 0xff -#define MAX_CHAR_UCS2 0xffff -#define MAX_CHAR_UCS4 0x10ffff - Py_LOCAL_INLINE(Py_UCS4) STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end) { + register Py_UCS4 max_char; + Py_ssize_t n = end - begin; + const STRINGLIB_CHAR *p = begin; + const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); + + max_char = 0x7F; + while (p < unrolled_end) { + Py_UCS4 ch = p[0] | p[1] | p[2] | p[3]; + p += 4; + if (ch > max_char) { #if STRINGLIB_SIZEOF_CHAR == 2 - const Py_UCS4 mask_limit = MASK_UCS1; - const Py_UCS4 max_char_limit = MAX_CHAR_UCS2; + if (ch > 0xFF) { + /* Limit reached */ + return 0xFFFF; + } #elif STRINGLIB_SIZEOF_CHAR == 4 - const Py_UCS4 mask_limit = MASK_UCS2; - const Py_UCS4 max_char_limit = MAX_CHAR_UCS4; + if (ch > 0xFFFF) { + /* Limit reached */ + return 0x10FFFF; + } + else if (ch > 0xFF) { + max_char = 0xFFFF; + } #else #error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) #endif - register Py_UCS4 mask; - Py_ssize_t n = end - begin; - const STRINGLIB_CHAR *p = begin; - const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4); - Py_UCS4 max_char; - - max_char = MAX_CHAR_ASCII; - mask = MASK_ASCII; - while (p < unrolled_end) { - STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3]; - if (bits & mask) { - if (mask == mask_limit) { - /* Limit reached */ - return max_char_limit; + else { + assert(max_char == 0x7F); + max_char = 0xFF; } - if (mask == MASK_ASCII) { - max_char = MAX_CHAR_UCS1; - mask = MASK_UCS1; - } - else { - /* mask can't be MASK_UCS2 because of mask_limit above */ - assert(mask == MASK_UCS1); - max_char = MAX_CHAR_UCS2; - mask = MASK_UCS2; - } - /* We check the new mask on the same chars in the next iteration */ - continue; } - p += 4; } while (p < end) { - if (p[0] & mask) { - if (mask == mask_limit) { + Py_UCS4 ch = *p++; + if (ch > max_char) { +#if STRINGLIB_SIZEOF_CHAR == 2 + if (ch > 0xFF) { /* Limit reached */ - return max_char_limit; + return 0xFFFF; } - if (mask == MASK_ASCII) { - max_char = MAX_CHAR_UCS1; - mask = MASK_UCS1; +#elif STRINGLIB_SIZEOF_CHAR == 4 + if (ch > 0xFFFF) { + /* Limit reached */ + return 0x10FFFF; } + else if (ch > 0xFF) { + max_char = 0xFFFF; + } +#else +#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4) +#endif else { - /* mask can't be MASK_UCS2 because of mask_limit above */ - assert(mask == MASK_UCS1); - max_char = MAX_CHAR_UCS2; - mask = MASK_UCS2; + assert(max_char == 0x7F); + max_char = 0xFF; } - /* We check the new mask on the same chars in the next iteration */ - continue; } - p++; } return max_char; } -#undef MASK_ASCII -#undef MASK_UCS1 -#undef MASK_UCS2 -#undef MAX_CHAR_ASCII -#undef MAX_CHAR_UCS1 -#undef MAX_CHAR_UCS2 -#undef MAX_CHAR_UCS4 - #endif /* STRINGLIB_SIZEOF_CHAR == 1 */ #endif /* STRINGLIB_IS_UNICODE */