diff -r 402a227564f5 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sun Dec 11 14:48:44 2016 -0800 +++ b/Objects/unicodeobject.c Mon Dec 12 19:21:38 2016 +0800 @@ -9214,9 +9214,7 @@ void *buf1, *buf2; Py_ssize_t len1, len2, result; - kind1 = PyUnicode_KIND(s1); - kind2 = PyUnicode_KIND(s2); - if (kind1 < kind2) + if (PyUnicode_MAX_CHAR_VALUE(s1) < PyUnicode_MAX_CHAR_VALUE(s2)) return -1; len1 = PyUnicode_GET_LENGTH(s1); @@ -9227,6 +9225,9 @@ buf1 = PyUnicode_DATA(s1); buf2 = PyUnicode_DATA(s2); + kind1 = PyUnicode_KIND(s1); + kind2 = PyUnicode_KIND(s2); + assert(kind1 >= kind2); if (len2 == 1) { Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0); result = findchar((const char *)buf1 + kind1*start, @@ -9246,7 +9247,7 @@ if (direction > 0) { switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(s1) && PyUnicode_IS_ASCII(s2)) + if (PyUnicode_IS_ASCII(s1)) result = asciilib_find_slice(buf1, len1, buf2, len2, start, end); else result = ucs1lib_find_slice(buf1, len1, buf2, len2, start, end); @@ -9384,9 +9385,7 @@ if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) return -1; - kind1 = PyUnicode_KIND(str); - kind2 = PyUnicode_KIND(substr); - if (kind1 < kind2) + if (PyUnicode_MAX_CHAR_VALUE(str) < PyUnicode_MAX_CHAR_VALUE(substr)) return 0; len1 = PyUnicode_GET_LENGTH(str); @@ -9397,6 +9396,9 @@ buf1 = PyUnicode_DATA(str); buf2 = PyUnicode_DATA(substr); + kind1 = PyUnicode_KIND(str); + kind2 = PyUnicode_KIND(substr); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(substr, kind1); if (!buf2) @@ -9405,7 +9407,7 @@ switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr)) + if (PyUnicode_IS_ASCII(str)) result = asciilib_count( ((Py_UCS1*)buf1) + start, end - start, buf2, len2, PY_SSIZE_T_MAX @@ -10248,11 +10250,10 @@ if (PyUnicode_READY(substring) == -1) return NULL; - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); - if (kind1 < kind2 || len1 < len2) { + if (PyUnicode_MAX_CHAR_VALUE(self) < PyUnicode_MAX_CHAR_VALUE(substring) || + len1 < len2) { out = PyList_New(1); if (out == NULL) return NULL; @@ -10262,6 +10263,9 @@ } buf1 = PyUnicode_DATA(self); buf2 = PyUnicode_DATA(substring); + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(substring, kind1); if (!buf2) @@ -10270,7 +10274,7 @@ switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) + if (PyUnicode_IS_ASCII(self)) out = asciilib_split( self, buf1, len1, buf2, len2, maxcount); else @@ -10340,11 +10344,10 @@ if (PyUnicode_READY(substring) == -1) return NULL; - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); - if (kind1 < kind2 || len1 < len2) { + if (PyUnicode_MAX_CHAR_VALUE(self) < PyUnicode_MAX_CHAR_VALUE(substring) || + len1 < len2) { out = PyList_New(1); if (out == NULL) return NULL; @@ -10354,6 +10357,9 @@ } buf1 = PyUnicode_DATA(self); buf2 = PyUnicode_DATA(substring); + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(substring, kind1); if (!buf2) @@ -10362,7 +10368,7 @@ switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(self) && PyUnicode_IS_ASCII(substring)) + if (PyUnicode_IS_ASCII(self)) out = asciilib_rsplit( self, buf1, len1, buf2, len2, maxcount); else @@ -11206,10 +11212,7 @@ return -1; if (ensure_unicode(str) < 0) return -1; - - kind1 = PyUnicode_KIND(str); - kind2 = PyUnicode_KIND(substr); - if (kind1 < kind2) + if (PyUnicode_MAX_CHAR_VALUE(str) < PyUnicode_MAX_CHAR_VALUE(substr)) return 0; len1 = PyUnicode_GET_LENGTH(str); len2 = PyUnicode_GET_LENGTH(substr); @@ -11217,6 +11220,9 @@ return 0; buf1 = PyUnicode_DATA(str); buf2 = PyUnicode_DATA(substr); + kind1 = PyUnicode_KIND(str); + kind2 = PyUnicode_KIND(substr); + assert(kind1 >= kind2); if (len2 == 1) { Py_UCS4 ch = PyUnicode_READ(kind2, buf2, 0); result = findchar((const char *)buf1, kind1, len1, ch, 1) != -1; @@ -11418,9 +11424,7 @@ if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) return NULL; - kind1 = PyUnicode_KIND(self); - kind2 = PyUnicode_KIND(substring); - if (kind1 < kind2) + if (PyUnicode_MAX_CHAR_VALUE(self) < PyUnicode_MAX_CHAR_VALUE(substring)) return PyLong_FromLong(0); len1 = PyUnicode_GET_LENGTH(self); @@ -11431,6 +11435,9 @@ buf1 = PyUnicode_DATA(self); buf2 = PyUnicode_DATA(substring); + kind1 = PyUnicode_KIND(self); + kind2 = PyUnicode_KIND(substring); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(substring, kind1); if (!buf2) @@ -12828,11 +12835,10 @@ if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0) return NULL; - kind1 = PyUnicode_KIND(str_obj); - kind2 = PyUnicode_KIND(sep_obj); len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); - if (kind1 < kind2 || len1 < len2) { + if (PyUnicode_MAX_CHAR_VALUE(str_obj) < PyUnicode_MAX_CHAR_VALUE(sep_obj) + || len1 < len2) { _Py_INCREF_UNICODE_EMPTY(); if (!unicode_empty) out = NULL; @@ -12844,6 +12850,9 @@ } buf1 = PyUnicode_DATA(str_obj); buf2 = PyUnicode_DATA(sep_obj); + kind1 = PyUnicode_KIND(str_obj); + kind2 = PyUnicode_KIND(sep_obj); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(sep_obj, kind1); if (!buf2) @@ -12852,7 +12861,7 @@ switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) + if (PyUnicode_IS_ASCII(str_obj)) out = asciilib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); else out = ucs1lib_partition(str_obj, buf1, len1, sep_obj, buf2, len2); @@ -12886,11 +12895,10 @@ if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0) return NULL; - kind1 = PyUnicode_KIND(str_obj); - kind2 = PyUnicode_KIND(sep_obj); len1 = PyUnicode_GET_LENGTH(str_obj); len2 = PyUnicode_GET_LENGTH(sep_obj); - if (kind1 < kind2 || len1 < len2) { + if (PyUnicode_MAX_CHAR_VALUE(str_obj) < PyUnicode_MAX_CHAR_VALUE(sep_obj) + || len1 < len2) { _Py_INCREF_UNICODE_EMPTY(); if (!unicode_empty) out = NULL; @@ -12902,6 +12910,9 @@ } buf1 = PyUnicode_DATA(str_obj); buf2 = PyUnicode_DATA(sep_obj); + kind1 = PyUnicode_KIND(str_obj); + kind2 = PyUnicode_KIND(sep_obj); + assert(kind1 >= kind2); if (kind2 != kind1) { buf2 = _PyUnicode_AsKind(sep_obj, kind1); if (!buf2) @@ -12910,7 +12921,7 @@ switch (kind1) { case PyUnicode_1BYTE_KIND: - if (PyUnicode_IS_ASCII(str_obj) && PyUnicode_IS_ASCII(sep_obj)) + if (PyUnicode_IS_ASCII(str_obj)) out = asciilib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2); else out = ucs1lib_rpartition(str_obj, buf1, len1, sep_obj, buf2, len2);