Index: Include/stringobject.h =================================================================== --- Include/stringobject.h (Revision 59441) +++ Include/stringobject.h (Arbeitskopie) @@ -58,7 +58,6 @@ PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, int, char**, int*); Index: Objects/dictobject.c =================================================================== --- Objects/dictobject.c (Revision 59441) +++ Objects/dictobject.c (Arbeitskopie) @@ -8,6 +8,7 @@ */ #include "Python.h" +#include "stringlib/eq.h" /* Set a key error with the specified argument, wrapping it in a @@ -327,25 +328,6 @@ return 0; } -/* Return 1 if two unicode objects are equal, 0 if not. */ -static int -unicode_eq(PyObject *aa, PyObject *bb) -{ - PyUnicodeObject *a = (PyUnicodeObject *)aa; - PyUnicodeObject *b = (PyUnicodeObject *)bb; - - if (a->length != b->length) - return 0; - if (a->length == 0) - return 1; - if (a->str[0] != b->str[0]) - return 0; - if (a->length == 1) - return 1; - return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; -} - - /* * Hacked up version of lookdict which can assume keys are always * unicodes; this assumption allows testing for errors during Index: Objects/stringlib/eq.h =================================================================== --- Objects/stringlib/eq.h (Revision 0) +++ Objects/stringlib/eq.h (Revision 0) @@ -0,0 +1,21 @@ +/* Fast unicode equal function optimized for dictobject.c and setobject.c */ + +/* Return 1 if two unicode objects are equal, 0 if not. + * unicode_eq() is called when the hash of two unicode objects is equal. + */ +Py_LOCAL_INLINE(int) +unicode_eq(PyObject *aa, PyObject *bb) +{ + register PyUnicodeObject *a = (PyUnicodeObject *)aa; + register PyUnicodeObject *b = (PyUnicodeObject *)bb; + + if (a->length != b->length) + return 0; + if (a->length == 0) + return 1; + if (a->str[0] != b->str[0]) + return 0; + if (a->length == 1) + return 1; + return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; +} Eigenschaftsänderungen: Objects/stringlib/eq.h ___________________________________________________________________ Name: svn:keywords + Id Name: svn:eol-style + native Index: Objects/setobject.c =================================================================== --- Objects/setobject.c (Revision 59441) +++ Objects/setobject.c (Arbeitskopie) @@ -9,6 +9,7 @@ #include "Python.h" #include "structmember.h" +#include "stringlib/eq.h" /* Set a key error with the specified argument, wrapping it in a * tuple automatically so that tuple keys are not unpacked as the @@ -55,6 +56,7 @@ static PySetObject *free_sets[MAXFREESETS]; static int num_free_sets = 0; + /* The basic lookup function used by all operations. This is based on Algorithm D from Knuth Vol. 3, Sec. 6.4. @@ -144,12 +146,12 @@ } /* - * Hacked up version of set_lookkey which can assume keys are always strings; - * This means we can always use _PyString_Eq directly and not have to check to + * Hacked up version of set_lookkey which can assume keys are always unicode; + * This means we can always use unicode_eq directly and not have to check to * see if the comparison altered the table. */ static setentry * -set_lookkey_string(PySetObject *so, PyObject *key, register long hash) +set_lookkey_unicode(PySetObject *so, PyObject *key, register long hash) { register Py_ssize_t i; register size_t perturb; @@ -158,11 +160,11 @@ setentry *table = so->table; register setentry *entry; - /* Make sure this function doesn't have to handle non-string keys, + /* Make sure this function doesn't have to handle non-unicode keys, including subclasses of str; e.g., one reason to subclass strings is to override __eq__, and for speed we don't cater to that here. */ - if (!PyString_CheckExact(key)) { + if (!PyUnicode_CheckExact(key)) { so->lookup = set_lookkey; return set_lookkey(so, key, hash); } @@ -173,7 +175,7 @@ if (entry->key == dummy) freeslot = entry; else { - if (entry->hash == hash && _PyString_Eq(entry->key, key)) + if (entry->hash == hash && unicode_eq(entry->key, key)) return entry; freeslot = NULL; } @@ -188,7 +190,7 @@ if (entry->key == key || (entry->hash == hash && entry->key != dummy - && _PyString_Eq(entry->key, key))) + && unicode_eq(entry->key, key))) return entry; if (entry->key == dummy && freeslot == NULL) freeslot = entry; @@ -375,8 +377,9 @@ register long hash; register Py_ssize_t n_used; - if (!PyString_CheckExact(key) || - (hash = ((PyStringObject *) key)->ob_shash) == -1) { + if (!PyUnicode_CheckExact(key) || + (hash = ((PyUnicodeObject *) key)->hash) == -1) + { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -422,8 +425,10 @@ PyObject *old_key; assert (PyAnySet_Check(so)); - if (!PyString_CheckExact(key) || - (hash = ((PyStringObject *) key)->ob_shash) == -1) { + + if (!PyUnicode_CheckExact(key) || + (hash = ((PyUnicodeObject *) key)->hash) == -1) + { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -668,8 +673,9 @@ long hash; setentry *entry; - if (!PyString_CheckExact(key) || - (hash = ((PyStringObject *) key)->ob_shash) == -1) { + if (!PyUnicode_CheckExact(key) || + (hash = ((PyUnicodeObject *) key)->hash) == -1) + { hash = PyObject_Hash(key); if (hash == -1) return -1; @@ -989,7 +995,7 @@ INIT_NONZERO_SET_SLOTS(so); } - so->lookup = set_lookkey_string; + so->lookup = set_lookkey_unicode; so->weakreflist = NULL; if (iterable != NULL) { @@ -1231,8 +1237,13 @@ while ((key = PyIter_Next(it)) != NULL) { int rv; setentry entry; - long hash = PyObject_Hash(key); + long hash; + if (!PyUnicode_CheckExact(key) || + (hash = ((PyUnicodeObject *) key)->hash) == -1) + { + hash = PyObject_Hash(key); + } if (hash == -1) { Py_DECREF(it); Py_DECREF(result); @@ -1352,8 +1363,13 @@ while ((key = PyIter_Next(it)) != NULL) { int rv; setentry entry; - long hash = PyObject_Hash(key); + long hash; + if (!PyUnicode_CheckExact(key) || + (hash = ((PyUnicodeObject *) key)->hash) == -1) + { + hash = PyObject_Hash(key); + } if (hash == -1) { Py_DECREF(key); Py_DECREF(it); Index: Objects/stringobject.c =================================================================== --- Objects/stringobject.c (Revision 59441) +++ Objects/stringobject.c (Arbeitskopie) @@ -877,16 +877,6 @@ return result; } -int -_PyString_Eq(PyObject *o1, PyObject *o2) -{ - PyStringObject *a = (PyStringObject*) o1; - PyStringObject *b = (PyStringObject*) o2; - return Py_Size(a) == Py_Size(b) - && *a->ob_sval == *b->ob_sval - && memcmp(a->ob_sval, b->ob_sval, Py_Size(a)) == 0; -} - static long string_hash(PyStringObject *a) { Index: Lib/test/test_set.py =================================================================== --- Lib/test/test_set.py (Revision 59441) +++ Lib/test/test_set.py (Arbeitskopie) @@ -7,6 +7,7 @@ import os from random import randrange, shuffle import sys +import warnings class PassThru(Exception): pass @@ -817,6 +818,44 @@ self.length = 3 self.repr = None +#------------------------------------------------------------------------------ + +class TestBasicOpsString(TestBasicOps): + def setUp(self): + self.case = "string set" + self.values = ["a", "b", "c"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 3 + self.repr = "{'a', 'c', 'b'}" + +#------------------------------------------------------------------------------ + +class TestBasicOpsBytes(TestBasicOps): + def setUp(self): + self.case = "string set" + self.values = [b"a", b"b", b"c"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 3 + self.repr = "{b'a', b'c', b'b'}" + +#------------------------------------------------------------------------------ + +class TestBasicOpsMixedStringBytes(TestBasicOps): + def setUp(self): + self.warning_filters = warnings.filters[:] + warnings.simplefilter('ignore', BytesWarning) + self.case = "string and bytes set" + self.values = ["a", "b", b"a", b"b"] + self.set = set(self.values) + self.dup = set(self.values) + self.length = 4 + self.repr = "{'a', b'a', 'b', b'b'}" + + def tearDown(self): + warnings.filters = self.warning_filters + #============================================================================== def baditer(): @@ -1581,6 +1620,9 @@ TestBasicOpsSingleton, TestBasicOpsTuple, TestBasicOpsTriple, + TestBasicOpsString, + TestBasicOpsBytes, + TestBasicOpsMixedStringBytes, TestBinaryOps, TestUpdateOps, TestMutate,