diff -r a8a238cf59c7 Objects/stringlib/eq.h --- a/Objects/stringlib/eq.h Wed Apr 03 13:20:02 2013 -0400 +++ b/Objects/stringlib/eq.h Wed Apr 03 23:05:20 2013 +0200 @@ -4,31 +4,71 @@ * unicode_eq() is called when the hash of two unicode objects is equal. */ Py_LOCAL_INLINE(int) -unicode_eq(PyObject *aa, PyObject *bb) +unicode_eq(PyObject *a, PyObject *b) { - register PyUnicodeObject *a = (PyUnicodeObject *)aa; - register PyUnicodeObject *b = (PyUnicodeObject *)bb; + Py_ssize_t len; + int kind; + Py_UCS1 *data1, *data2; if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) { assert(0 && "unicode_eq ready fail"); return 0; } - if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) + /* a string is equal to itself */ + if (a == b) + return 1; + + len = PyUnicode_GET_LENGTH(a); + if (len != PyUnicode_GET_LENGTH(b)) return 0; - if (PyUnicode_GET_LENGTH(a) == 0) + if (len == 0) return 1; - if (PyUnicode_KIND(a) != PyUnicode_KIND(b)) + + kind = PyUnicode_KIND(a); + if (kind != PyUnicode_KIND(b)) return 0; - /* Just comparing the first byte is enough to see if a and b differ. - * If they are 2 byte or 4 byte character most differences will happen in - * the lower bytes anyways. - */ - if (PyUnicode_1BYTE_DATA(a)[0] != PyUnicode_1BYTE_DATA(b)[0]) - return 0; - if (PyUnicode_KIND(a) == PyUnicode_1BYTE_KIND && - PyUnicode_GET_LENGTH(a) == 1) + + data1 = PyUnicode_1BYTE_DATA(a); + data2 = PyUnicode_1BYTE_DATA(b); + switch(kind) + { + default: + assert(0 && "invalid kind"); + case PyUnicode_1BYTE_KIND: + { + if (data1[0] != data2[0]) + return 0; + if (data1[len-1] != data2[len-1]) + return 0; + break; + } + case PyUnicode_2BYTE_KIND: + { + Py_UCS2 *ucs2a, *ucs2b; + ucs2a = (Py_UCS2 *)data1; + ucs2b = (Py_UCS2 *)data2; + + if (ucs2a[0] != ucs2b[0]) + return 0; + if (ucs2a[len-1] != ucs2b[len-1]) + return 0; + break; + } + case PyUnicode_4BYTE_KIND: + { + Py_UCS4 *ucs4a, *ucs4b; + ucs4a = (Py_UCS4 *)data1; + ucs4b = (Py_UCS4 *)data2; + + if (ucs4a[0] != ucs4b[0]) + return 0; + if (ucs4a[len-1] != ucs4b[len-1]) + return 0; + break; + } + } + if (len <= 2) return 1; - return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b), - PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0; + return memcmp(data1 + kind, data2 + kind, (len - 2) * kind) == 0; } diff -r a8a238cf59c7 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed Apr 03 13:20:02 2013 -0400 +++ b/Objects/unicodeobject.c Wed Apr 03 23:05:20 2013 +0200 @@ -613,6 +613,7 @@ make_bloom_mask(int kind, void* ptr, Py_ #include "stringlib/fastsearch.h" #include "stringlib/count.h" #include "stringlib/find.h" +#include "stringlib/eq.h" #include "stringlib/undef.h" /* --- Unicode Object ----------------------------------------------------- */ @@ -10285,32 +10286,6 @@ unicode_compare(PyObject *str1, PyObject return 1; } -static int -unicode_compare_eq(PyObject *str1, PyObject *str2) -{ - int kind; - void *data1, *data2; - Py_ssize_t len; - int cmp; - - /* a string is equal to itself */ - if (str1 == str2) - return 1; - - len = PyUnicode_GET_LENGTH(str1); - if (PyUnicode_GET_LENGTH(str2) != len) - return 0; - kind = PyUnicode_KIND(str1); - if (PyUnicode_KIND(str2) != kind) - return 0; - data1 = PyUnicode_DATA(str1); - data2 = PyUnicode_DATA(str2); - - cmp = memcmp(data1, data2, len * kind); - return (cmp == 0); -} - - int PyUnicode_Compare(PyObject *left, PyObject *right) { @@ -10371,7 +10346,7 @@ PyUnicode_RichCompare(PyObject *left, Py return NULL; if (op == Py_EQ || op == Py_NE) { - result = unicode_compare_eq(left, right); + result = unicode_eq(left, right); if (op == Py_EQ) v = TEST_COND(result); else