diff -r b9a6592c6250 Objects/stringlib/codecs.h --- a/Objects/stringlib/codecs.h Fri Jun 22 14:11:58 2012 -0400 +++ b/Objects/stringlib/codecs.h Sat Jun 23 01:30:56 2012 +0300 @@ -15,6 +15,13 @@ # error C 'long' size should be either 4 or 8! #endif +/* 10xxxxxx + Fast path used on platforms with 8-bit chars and two's complement integers. + */ +#define IS_CONTINUATION_BYTE(ch) ((unsigned char)-1 == 0xFF ? \ + (signed char)(ch) < -0x40 : \ + ((ch) & 0xC0) == 0x80) + Py_LOCAL_INLINE(Py_UCS4) STRINGLIB(utf8_decode)(const char **inptr, const char *end, STRINGLIB_CHAR *dest, @@ -107,7 +114,7 @@ break; } ch2 = (unsigned char)s[1]; - if ((ch2 & 0xC0) != 0x80) + if (!IS_CONTINUATION_BYTE(ch2)) /* invalid continuation byte */ goto InvalidContinuation; ch = (ch << 6) + ch2 - @@ -131,8 +138,8 @@ } ch2 = (unsigned char)s[1]; ch3 = (unsigned char)s[2]; - if ((ch2 & 0xC0) != 0x80 || - (ch3 & 0xC0) != 0x80) { + if (!IS_CONTINUATION_BYTE(ch2) || + !IS_CONTINUATION_BYTE(ch3)) { /* invalid continuation byte */ goto InvalidContinuation; } @@ -172,9 +179,9 @@ ch2 = (unsigned char)s[1]; ch3 = (unsigned char)s[2]; ch4 = (unsigned char)s[3]; - if ((ch2 & 0xC0) != 0x80 || - (ch3 & 0xC0) != 0x80 || - (ch4 & 0xC0) != 0x80) { + if (!IS_CONTINUATION_BYTE(ch2) || + !IS_CONTINUATION_BYTE(ch3) || + !IS_CONTINUATION_BYTE(ch4)) { /* invalid continuation byte */ goto InvalidContinuation; } @@ -216,6 +223,7 @@ } #undef ASCII_CHAR_MASK +#undef IS_CONTINUATION_BYTE /* UTF-8 encoder specialized for a Unicode kind to avoid the slow