diff -r 11804c1747c7 Include/unicodeobject.h --- a/Include/unicodeobject.h Sat Jan 26 16:09:57 2008 +0100 +++ b/Include/unicodeobject.h Sat Jan 26 23:41:53 2008 +0100 @@ -397,8 +397,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE; valid, and the substring must not be empty */ #define Py_UNICODE_MATCH(string, offset, substring) \ ((*((string)->str + (offset)) == *((substring)->str)) && \ - ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ - !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) + ((*((string)->str + (offset) + Py_SIZE(substring)-1) == *((substring)->str + Py_SIZE(substring)-1))) && \ + !memcmp((string)->str + (offset), (substring)->str, Py_SIZE(substring)*sizeof(Py_UNICODE))) #ifdef __cplusplus extern "C" { @@ -407,9 +407,7 @@ extern "C" { /* --- Unicode Type ------------------------------------------------------- */ typedef struct { - PyObject_HEAD - Py_ssize_t length; /* Length of raw Unicode data in buffer */ - Py_UNICODE *str; /* Raw Unicode buffer */ + PyObject_VAR_HEAD long hash; /* Hash value; -1 if not set */ int state; /* != 0 if interned. In this case the two * references from the dictionary to this object @@ -417,7 +415,9 @@ typedef struct { PyObject *defenc; /* (Default) Encoded version as Python string, or NULL; this is used for implementing the buffer protocol */ + Py_UNICODE str[1]; /* Raw Unicode buffer */ } PyUnicodeObject; + PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; @@ -432,9 +432,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T /* Fast access macros */ #define PyUnicode_GET_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length)) + (assert(PyUnicode_Check(op)), Py_SIZE(op)) #define PyUnicode_GET_DATA_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))) + (assert(PyUnicode_Check(op)), Py_SIZE(op) * sizeof(Py_UNICODE)) #define PyUnicode_AS_UNICODE(op) \ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str)) #define PyUnicode_AS_DATA(op) \ diff -r 11804c1747c7 Objects/stringlib/eq.h --- a/Objects/stringlib/eq.h Sat Jan 26 16:09:57 2008 +0100 +++ b/Objects/stringlib/eq.h Sat Jan 26 23:41:53 2008 +0100 @@ -9,13 +9,13 @@ unicode_eq(PyObject *aa, PyObject *bb) register PyUnicodeObject *a = (PyUnicodeObject *)aa; register PyUnicodeObject *b = (PyUnicodeObject *)bb; - if (a->length != b->length) + if (Py_SIZE(a) != Py_SIZE(b)) return 0; - if (a->length == 0) + if (Py_SIZE(a) == 0) return 1; if (a->str[0] != b->str[0]) return 0; - if (a->length == 1) + if (Py_SIZE(a) == 1) return 1; - return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; + return memcmp(a->str, b->str, Py_SIZE(a) * sizeof(Py_UNICODE)) == 0; } diff -r 11804c1747c7 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Sat Jan 26 16:09:57 2008 +0100 +++ b/Objects/unicodeobject.c Sat Jan 26 23:41:53 2008 +0100 @@ -52,28 +52,27 @@ OF OR IN CONNECTION WITH THE USE OR PERF #include #endif -/* Limit for the Unicode object free list */ - -#define MAX_UNICODE_FREELIST_SIZE 1024 - -/* Limit for the Unicode object free list stay alive optimization. +#define LENGTH(uniobj) \ + Py_SIZE(uniobj) + +/* Number of free lists, one per unicode object size. The implementation will keep allocated Unicode memory intact for - all objects on the free list having a size less than this - limit. This reduces malloc() overhead for small Unicode objects. - - At worst this will result in MAX_UNICODE_FREELIST_SIZE * - (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + - malloc()-overhead) bytes of unused garbage. + objects having a size less than this limit, within a certain number + of objects for each size (as defined by the CAN_SAVE macro below). Setting the limit to 0 effectively turns the feature off. - Note: This is an experimental feature ! If you get core dumps when - using Unicode objects, turn this feature off. - -*/ - -#define KEEPALIVE_SIZE_LIMIT 9 +*/ + +#define MAX_SAVED_SIZE 150 + +/* We keep lots of small objects in the free lists, but less larger ones. */ + +#define CAN_SAVE(obj_length, list_size) \ + ((obj_length < 20 && list_size < 100) \ + || (obj_length < 60 && list_size < 6) \ + || (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -105,9 +104,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *unicode_freelist; -static int unicode_freelist_size; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -185,204 +183,174 @@ Py_LOCAL_INLINE(int) unicode_member(Py_U /* --- Unicode Object ----------------------------------------------------- */ static -int unicode_resize(register PyUnicodeObject *unicode, - Py_ssize_t length) -{ - void *oldstr; - - /* Shortcut if there's nothing much to do. */ - if (unicode->length == length) - goto reset; - - /* Resizing shared object (unicode_empty or single character - objects) in-place is not allowed. Use PyUnicode_Resize() - instead ! */ - - if (unicode == unicode_empty || - (unicode->length == 1 && - unicode->str[0] < 256U && - unicode_latin1[unicode->str[0]] == unicode)) { - PyErr_SetString(PyExc_SystemError, - "can't resize shared unicode objects"); - return -1; - } - - /* We allocate one more byte to make sure the string is Ux0000 terminated. - The overallocation is also used by fastsearch, which assumes that it's - safe to look at str[length] (without making any assumptions about what - it contains). */ - - oldstr = unicode->str; - PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1); - if (!unicode->str) { - unicode->str = (Py_UNICODE *)oldstr; - PyErr_NoMemory(); - return -1; - } - unicode->str[length] = 0; - unicode->length = length; - - reset: - /* Reset the object caches */ - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - unicode->hash = -1; - - return 0; -} +PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); + +static +PyUnicodeObject *unicode_resize(register PyUnicodeObject *unicode, + Py_ssize_t length) +{ + PyUnicodeObject *v; + + /* Optimization for empty strings; yes, this sometimes happens. */ + if (length == 0 && unicode_empty != NULL) { + Py_DECREF(unicode); + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Resizing unicode_empty and single character objects is not + possible since these are being shared. We simply return a fresh + copy with the same Unicode content. */ + if (LENGTH(unicode) != length && + (unicode == unicode_empty || LENGTH(unicode) == 1)) { + v = _PyUnicode_New(length); + if (v == NULL) + return NULL; + Py_UNICODE_COPY(v->str, unicode->str, + length < LENGTH(unicode) ? length : LENGTH(unicode)); + Py_DECREF(unicode); + return v; + } + + /* PyObject_REALLOC will almost always return a new memory block, so try + to find an existing one instead */ + if (length < MAX_SAVED_SIZE && (v = unicode_freelist[length])) { + unicode_freelist[length] = (PyUnicodeObject *) v->defenc; + v->defenc = NULL; + v->state = 0; + Py_UNICODE_COPY(v->str, unicode->str, + length < LENGTH(unicode) ? length : LENGTH(unicode)); + Py_DECREF(unicode); + goto reset; + } + + /* Adapted from similar code in tupleobject */ + _Py_DEC_REFTOTAL; + _Py_ForgetReference(unicode); + v = (PyUnicodeObject *) PyObject_REALLOC((char *) unicode, + sizeof(PyUnicodeObject) + length * sizeof(Py_UNICODE)); + if (v == NULL) { + PyObject_Del(unicode); + PyErr_NoMemory(); + return NULL; + } + Py_CLEAR(v->defenc); +reset: + LENGTH(v) = length; + v->str[length] = 0; + v->hash = -1; + _Py_NewReference(v); + return v; +} + /* We allocate one more byte to make sure the string is Ux0000 terminated; some code (e.g. new_identifier) relies on that. - XXX This allocator could further be enhanced by assuring that the - free list never reduces its size below 1. - */ static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length) { - register PyUnicodeObject *unicode; - - /* Optimization for empty strings */ - if (length == 0 && unicode_empty != NULL) { - Py_INCREF(unicode_empty); - return unicode_empty; - } - - /* Unicode freelist & memory allocation */ - if (unicode_freelist) { - unicode = unicode_freelist; - unicode_freelist = *(PyUnicodeObject **)unicode; - unicode_freelist_size--; - if (unicode->str) { - /* Keep-Alive optimization: we only upsize the buffer, - never downsize it. */ - if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyMem_DEL(unicode->str); - goto onError; - } - } - else { - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - PyObject_INIT(unicode, &PyUnicode_Type); - } - else { - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); - if (unicode == NULL) - return NULL; - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - - if (!unicode->str) { - PyErr_NoMemory(); - goto onError; - } - /* Initialize the first element to guard against cases where - * the caller fails before initializing str -- unicode_resize() - * reads str[0], and the Keep-Alive optimization can keep memory - * allocated for str alive across a call to unicode_dealloc(unicode). - * We don't want unicode_resize to read uninitialized memory in - * that case. - */ - unicode->str[0] = 0; - unicode->str[length] = 0; - unicode->length = length; - unicode->hash = -1; - unicode->state = 0; - unicode->defenc = NULL; - return unicode; - - onError: - _Py_ForgetReference((PyObject *)unicode); - PyObject_Del(unicode); - return NULL; + register PyUnicodeObject *unicode; + + /* Optimization for empty strings */ + if (length == 0 && unicode_empty != NULL) { + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Unicode freelist & memory allocation */ + if (length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc; + } + else { + unicode = PyObject_NEW_VAR(PyUnicodeObject, &PyUnicode_Type, length); + if (!unicode) { + PyErr_NoMemory(); + return NULL; + } + } + + /* Initialize the first element to guard against cases where + * the caller fails before initializing str -- unicode_resize() + * reads str[0], and the Keep-Alive optimization can keep memory + * allocated for str alive across a call to unicode_dealloc(unicode). + * We don't want unicode_resize to read uninitialized memory in + * that case. + */ + unicode->str[0] = 0; + unicode->str[length] = 0; + LENGTH(unicode) = length; + unicode->hash = -1; + unicode->state = 0; + unicode->defenc = NULL; + return unicode; } static void unicode_dealloc(register PyUnicodeObject *unicode) { - switch (PyUnicode_CHECK_INTERNED(unicode)) { - case SSTATE_NOT_INTERNED: - break; - - case SSTATE_INTERNED_MORTAL: - /* revive dead object temporarily for DelItem */ - Py_REFCNT(unicode) = 3; - if (PyDict_DelItem(interned, (PyObject *)unicode) != 0) - Py_FatalError( - "deletion of interned unicode string failed"); - break; - - case SSTATE_INTERNED_IMMORTAL: - Py_FatalError("Immortal interned unicode string died."); - - default: - Py_FatalError("Inconsistent interned unicode string state."); - } - - if (PyUnicode_CheckExact(unicode) && - unicode_freelist_size < MAX_UNICODE_FREELIST_SIZE) { - /* Keep-Alive optimization */ - if (unicode->length >= KEEPALIVE_SIZE_LIMIT) { - PyMem_DEL(unicode->str); - unicode->str = NULL; - unicode->length = 0; - } - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - /* Add to free list */ - *(PyUnicodeObject **)unicode = unicode_freelist; - unicode_freelist = unicode; - unicode_freelist_size++; - } - else { - PyMem_DEL(unicode->str); - Py_XDECREF(unicode->defenc); + Py_ssize_t length = LENGTH(unicode); + + switch (PyUnicode_CHECK_INTERNED(unicode)) { + case SSTATE_NOT_INTERNED: + break; + + case SSTATE_INTERNED_MORTAL: + /* revive dead object temporarily for DelItem */ + Py_REFCNT(unicode) = 3; + if (PyDict_DelItem(interned, (PyObject *)unicode) != 0) + Py_FatalError( + "deletion of interned unicode string failed"); + break; + + case SSTATE_INTERNED_IMMORTAL: + Py_FatalError("Immortal interned unicode string died."); + + default: + Py_FatalError("Inconsistent interned unicode string state."); + } + + Py_CLEAR(unicode->defenc); + + if (PyUnicode_CheckExact(unicode) && length < MAX_SAVED_SIZE) { + PyUnicodeObject *v = unicode_freelist[length]; + if (!v || CAN_SAVE(length, LENGTH(v))) { + /* Keep track of number of items stacked on the freelist */ + LENGTH(unicode) = v ? LENGTH(v) + 1 : 1; + unicode->defenc = (PyObject *) v; + unicode_freelist[length] = unicode; + return; + } + } + Py_TYPE(unicode)->tp_free((PyObject *)unicode); - } } int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) { - register PyUnicodeObject *v; - - /* Argument checks */ - if (unicode == NULL) { - PyErr_BadInternalCall(); - return -1; - } - v = (PyUnicodeObject *)*unicode; - if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { - PyErr_BadInternalCall(); - return -1; - } - - /* Resizing unicode_empty and single character objects is not - possible since these are being shared. We simply return a fresh - copy with the same Unicode content. */ - if (v->length != length && - (v == unicode_empty || v->length == 1)) { - PyUnicodeObject *w = _PyUnicode_New(length); - if (w == NULL) - return -1; - Py_UNICODE_COPY(w->str, v->str, - length < v->length ? length : v->length); - Py_DECREF(*unicode); - *unicode = (PyObject *)w; + register PyUnicodeObject *v; + + /* Argument checks */ + if (unicode == NULL) { + PyErr_BadInternalCall(); + return -1; + } + v = (PyUnicodeObject *)*unicode; + if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { + PyErr_BadInternalCall(); + return -1; + } + + v = unicode_resize(v, length); + if (v == NULL) + return -1; + *unicode = (PyObject *) v; return 0; - } - - /* Note that we don't have to modify *unicode for unshared Unicode - objects, since we can modify them in-place. */ - return unicode_resize(v, length); } /* Internal API for use in unicodeobject.c only ! */ @@ -5123,13 +5091,13 @@ int PyUnicode_EncodeDecimal(Py_UNICODE * /* helper macro to fixup start/end slice values */ #define FIX_START_END(obj) \ if (start < 0) \ - start += (obj)->length; \ + start += LENGTH(obj); \ if (start < 0) \ start = 0; \ - if (end > (obj)->length) \ - end = (obj)->length; \ + if (end > LENGTH(obj)) \ + end = LENGTH(obj); \ if (end < 0) \ - end += (obj)->length; \ + end += LENGTH(obj); \ if (end < 0) \ end = 0; @@ -5154,7 +5122,7 @@ Py_ssize_t PyUnicode_Count(PyObject *str FIX_START_END(str_obj); result = stringlib_count( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length + str_obj->str + start, end - start, sub_obj->str, LENGTH(sub_obj) ); Py_DECREF(sub_obj); @@ -5206,12 +5174,12 @@ int tailmatch(PyUnicodeObject *self, Py_ssize_t end, int direction) { - if (substring->length == 0) + if (LENGTH(substring) == 0) return 1; FIX_START_END(self); - end -= substring->length; + end -= LENGTH(substring); if (end < start) return 0; @@ -5261,11 +5229,11 @@ PyObject *fixup(PyUnicodeObject *self, PyUnicodeObject *u; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (u == NULL) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); if (!fixfct(u) && PyUnicode_CheckExact(self)) { /* fixfct should return TRUE if it modified the buffer. If @@ -5281,7 +5249,7 @@ static static int fixupper(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5302,7 +5270,7 @@ static static int fixlower(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5323,7 +5291,7 @@ static static int fixswapcase(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5344,7 +5312,7 @@ static static int fixcapitalize(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5570,13 +5538,13 @@ PyUnicodeObject *pad(PyUnicodeObject *se return self; } - u = _PyUnicode_New(left + self->length + right); + u = _PyUnicode_New(left + LENGTH(self) + right); if (u) { if (left) Py_UNICODE_FILL(u->str, fill, left); - Py_UNICODE_COPY(u->str + left, self->str, self->length); + Py_UNICODE_COPY(u->str + left, self->str, LENGTH(self)); if (right) - Py_UNICODE_FILL(u->str + left + self->length, fill, right); + Py_UNICODE_FILL(u->str + left + LENGTH(self), fill, right); } return u; @@ -5600,7 +5568,7 @@ PyObject *split_whitespace(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; for (i = j = 0; i < len; ) { @@ -5691,7 +5659,7 @@ PyObject *split_char(PyUnicodeObject *se { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; for (i = j = 0; i < len; ) { @@ -5721,8 +5689,8 @@ PyObject *split_substring(PyUnicodeObjec { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = LENGTH(self); + Py_ssize_t sublen = LENGTH(substring); PyObject *str; for (i = j = 0; i <= len - sublen; ) { @@ -5751,7 +5719,7 @@ PyObject *rsplit_whitespace(PyUnicodeObj { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; for (i = j = len - 1; i >= 0; ) { @@ -5790,7 +5758,7 @@ PyObject *rsplit_char(PyUnicodeObject *s { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; for (i = j = len - 1; i >= 0; ) { @@ -5822,8 +5790,8 @@ PyObject *rsplit_substring(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = LENGTH(self); + Py_ssize_t sublen = LENGTH(substring); PyObject *str; for (i = len - sublen, j = len; i >= 0; ) { @@ -5867,10 +5835,10 @@ PyObject *split(PyUnicodeObject *self, if (substring == NULL) return split_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (LENGTH(substring) == 1) return split_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (LENGTH(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -5896,10 +5864,10 @@ PyObject *rsplit(PyUnicodeObject *self, if (substring == NULL) return rsplit_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (LENGTH(substring) == 1) return rsplit_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (LENGTH(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -5919,21 +5887,21 @@ PyObject *replace(PyUnicodeObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (str1->length == str2->length) { + if (LENGTH(str1) == LENGTH(str2)) { /* same length */ Py_ssize_t i; - if (str1->length == 1) { + if (LENGTH(str1) == 1) { /* replace characters */ Py_UNICODE u1, u2; - if (!findchar(self->str, self->length, str1->str[0])) + if (!findchar(self->str, LENGTH(self), str1->str[0])) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); u1 = str1->str[0]; u2 = str2->str[0]; - for (i = 0; i < u->length; i++) + for (i = 0; i < LENGTH(u); i++) if (u->str[i] == u1) { if (--maxcount < 0) break; @@ -5941,20 +5909,20 @@ PyObject *replace(PyUnicodeObject *self, } } else { i = fastsearch( - self->str, self->length, str1->str, str1->length, FAST_SEARCH + self->str, LENGTH(self), str1->str, LENGTH(str1), FAST_SEARCH ); if (i < 0) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); - while (i <= self->length - str1->length) + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); + while (i <= LENGTH(self) - LENGTH(str1)) if (Py_UNICODE_MATCH(self, i, str1)) { if (--maxcount < 0) break; - Py_UNICODE_COPY(u->str+i, str2->str, str2->length); - i += str1->length; + Py_UNICODE_COPY(u->str+i, str2->str, LENGTH(str2)); + i += LENGTH(str1); } else i++; } @@ -5965,23 +5933,23 @@ PyObject *replace(PyUnicodeObject *self, Py_UNICODE *p; /* replace strings */ - n = stringlib_count(self->str, self->length, str1->str, str1->length); + n = stringlib_count(self->str, LENGTH(self), str1->str, LENGTH(str1)); if (n > maxcount) n = maxcount; if (n == 0) goto nothing; - /* new_size = self->length + n * (str2->length - str1->length)); */ - delta = (str2->length - str1->length); + /* new_size = LENGTH(self) + n * (LENGTH(str2) - LENGTH(str1))); */ + delta = (LENGTH(str2) - LENGTH(str1)); if (delta == 0) { - new_size = self->length; + new_size = LENGTH(self); } else { - product = n * (str2->length - str1->length); - if ((product / (str2->length - str1->length)) != n) { + product = n * (LENGTH(str2) - LENGTH(str1)); + if ((product / (LENGTH(str2) - LENGTH(str1))) != n) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } - new_size = self->length + product; + new_size = LENGTH(self) + product; if (new_size < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); @@ -5993,8 +5961,8 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str; - e = self->length - str1->length; - if (str1->length > 0) { + e = LENGTH(self) - LENGTH(str1); + if (LENGTH(str1) > 0) { while (n-- > 0) { /* look for next match */ j = i; @@ -6011,25 +5979,25 @@ PyObject *replace(PyUnicodeObject *self, p += j - i; } /* copy substitution string */ - if (str2->length > 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; - } - i = j + str1->length; - } - if (i < self->length) + if (LENGTH(str2) > 0) { + Py_UNICODE_COPY(p, str2->str, LENGTH(str2)); + p += LENGTH(str2); + } + i = j + LENGTH(str1); + } + if (i < LENGTH(self)) /* copy tail [i:] */ - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, LENGTH(self)-i); } else { /* interleave */ while (n > 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; + Py_UNICODE_COPY(p, str2->str, LENGTH(str2)); + p += LENGTH(str2); if (--n <= 0) break; *p++ = self->str[i++]; } - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, LENGTH(self)-i); } } return (PyObject *) u; @@ -6040,7 +6008,7 @@ nothing: Py_INCREF(self); return (PyObject *) self; } - return PyUnicode_FromUnicode(self->str, self->length); + return PyUnicode_FromUnicode(self->str, LENGTH(self)); } /* --- Unicode Object Methods --------------------------------------------- */ @@ -6150,12 +6118,12 @@ unicode_center(PyUnicodeObject *self, Py if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - marg = width - self->length; + marg = width - LENGTH(self); left = marg / 2 + (marg & width & 1); return (PyObject*) pad(self, left, marg - left, fillchar); @@ -6187,8 +6155,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = LENGTH(str1); + len2 = LENGTH(str2); while (len1 > 0 && len2 > 0) { Py_UNICODE c1, c2; @@ -6221,8 +6189,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = LENGTH(str1); + len2 = LENGTH(str2); while (len1 > 0 && len2 > 0) { Py_UNICODE c1, c2; @@ -6404,11 +6372,11 @@ PyObject *PyUnicode_Concat(PyObject *lef } /* Concat the two Unicode strings */ - w = _PyUnicode_New(u->length + v->length); + w = _PyUnicode_New(LENGTH(u) + LENGTH(v)); if (w == NULL) goto onError; - Py_UNICODE_COPY(w->str, u->str, u->length); - Py_UNICODE_COPY(w->str + u->length, v->str, v->length); + Py_UNICODE_COPY(w->str, u->str, LENGTH(u)); + Py_UNICODE_COPY(w->str + LENGTH(u), v->str, LENGTH(v)); Py_DECREF(u); Py_DECREF(v); @@ -6471,7 +6439,7 @@ unicode_count(PyUnicodeObject *self, PyO result = PyLong_FromSsize_t( stringlib_count(self->str + start, end - start, - substring->str, substring->length) + substring->str, LENGTH(substring)) ); Py_DECREF(substring); @@ -6536,7 +6504,7 @@ unicode_expandtabs(PyUnicodeObject *self /* First pass: determine size of output string */ i = j = old_j = 0; - e = self->str + self->length; + e = self->str + LENGTH(self); for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize > 0) { @@ -6628,7 +6596,7 @@ static PyObject * static PyObject * unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) { - if (index < 0 || index >= self->length) { + if (index < 0 || index >= LENGTH(self)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } @@ -7037,7 +7005,7 @@ static Py_ssize_t static Py_ssize_t unicode_length(PyUnicodeObject *self) { - return self->length; + return LENGTH(self); } PyDoc_STRVAR(ljust__doc__, @@ -7055,12 +7023,12 @@ unicode_ljust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, 0, width - self->length, fillchar); + return (PyObject*) pad(self, 0, width - LENGTH(self), fillchar); } PyDoc_STRVAR(lower__doc__, @@ -7244,8 +7212,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ /* ensure # of chars needed doesn't overflow int and # of bytes * needed doesn't overflow size_t */ - nchars = len * str->length; - if (len && nchars / len != str->length) { + nchars = len * LENGTH(str); + if (len && nchars / len != LENGTH(str)) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); return NULL; @@ -7262,13 +7230,13 @@ unicode_repeat(PyUnicodeObject *str, Py_ p = u->str; - if (str->length == 1 && len > 0) { + if (LENGTH(str) == 1 && len > 0) { Py_UNICODE_FILL(p, str->str[0], len); } else { Py_ssize_t done = 0; /* number of characters copied this far */ if (done < nchars) { - Py_UNICODE_COPY(p, str->str, str->length); - done = str->length; + Py_UNICODE_COPY(p, str->str, LENGTH(str)); + done = LENGTH(str); } while (done < nchars) { int n = (done <= nchars-done) ? done : nchars-done; @@ -7563,12 +7531,12 @@ unicode_rjust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, width - self->length, 0, fillchar); + return (PyObject*) pad(self, width - LENGTH(self), 0, fillchar); } PyObject *PyUnicode_Split(PyObject *s, @@ -7908,7 +7876,7 @@ static PyObject* static PyObject* unicode_translate(PyUnicodeObject *self, PyObject *table) { - return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore"); + return PyUnicode_TranslateCharmap(self->str, LENGTH(self), table, "ignore"); } PyDoc_STRVAR(upper__doc__, @@ -7938,7 +7906,7 @@ unicode_zfill(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n:zfill", &width)) return NULL; - if (self->length >= width) { + if (LENGTH(self) >= width) { if (PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; @@ -7950,7 +7918,7 @@ unicode_zfill(PyUnicodeObject *self, PyO ); } - fill = width - self->length; + fill = width - LENGTH(self); u = pad(self, fill, 0, '0'); @@ -7965,14 +7933,6 @@ unicode_zfill(PyUnicodeObject *self, PyO return (PyObject*) u; } - -#if 0 -static PyObject* -unicode_freelistsize(PyUnicodeObject *self) -{ - return PyLong_FromLong(unicode_freelist_size); -} -#endif PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ @@ -8080,7 +8040,7 @@ static PyObject * static PyObject * unicode_getnewargs(PyUnicodeObject *v) { - return Py_BuildValue("(u#)", v->str, v->length); + return Py_BuildValue("(u#)", v->str, LENGTH(v)); } @@ -8138,11 +8098,6 @@ static PyMethodDef unicode_methods[] = { {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif -#if 0 - /* This one is just used for debugging the implementation. */ - {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS}, -#endif - {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, {NULL, NULL} }; @@ -8198,7 +8153,7 @@ unicode_subscript(PyUnicodeObject* self, if (slicelength <= 0) { return PyUnicode_FromUnicode(NULL, 0); - } else if (start == 0 && step == 1 && slicelength == self->length && + } else if (start == 0 && step == 1 && slicelength == LENGTH(self) && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject *)self; @@ -8936,22 +8891,13 @@ unicode_subtype_new(PyTypeObject *type, if (tmp == NULL) return NULL; assert(PyUnicode_Check(tmp)); - pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length); - if (pnew == NULL) { + pnew = (PyUnicodeObject *) type->tp_alloc(type, n = LENGTH(tmp)); + if (pnew != NULL) { + Py_UNICODE_COPY(pnew->str, tmp->str, n+1); + LENGTH(pnew) = n; + pnew->hash = tmp->hash; Py_DECREF(tmp); - return NULL; - } - pnew->str = PyMem_NEW(Py_UNICODE, n+1); - if (pnew->str == NULL) { - _Py_ForgetReference((PyObject *)pnew); - PyObject_Del(pnew); - Py_DECREF(tmp); - return PyErr_NoMemory(); - } - Py_UNICODE_COPY(pnew->str, tmp->str, n+1); - pnew->length = n; - pnew->hash = tmp->hash; - Py_DECREF(tmp); + } return (PyObject *)pnew; } @@ -8968,7 +8914,7 @@ PyTypeObject PyUnicode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "str", /* tp_name */ sizeof(PyUnicodeObject), /* tp_size */ - 0, /* tp_itemsize */ + sizeof(Py_UNICODE), /* tp_itemsize */ /* Slots */ (destructor)unicode_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -9027,8 +8973,6 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - unicode_freelist = NULL; - unicode_freelist_size = 0; unicode_empty = _PyUnicode_New(0); if (!unicode_empty) return; @@ -9051,29 +8995,29 @@ void void _PyUnicode_Fini(void) { - PyUnicodeObject *u; - int i; - - Py_XDECREF(unicode_empty); - unicode_empty = NULL; - - for (i = 0; i < 256; i++) { + PyUnicodeObject *u; + Py_ssize_t i; + + Py_XDECREF(unicode_empty); + unicode_empty = NULL; + + for (i = 0; i < 256; i++) { if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } - - for (u = unicode_freelist; u != NULL;) { - PyUnicodeObject *v = u; - u = *(PyUnicodeObject **)u; - if (v->str) - PyMem_DEL(v->str); - Py_XDECREF(v->defenc); - PyObject_Del(v); - } - unicode_freelist = NULL; - unicode_freelist_size = 0; + Py_DECREF(unicode_latin1[i]); + unicode_latin1[i] = NULL; + } + } + + for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc; + LENGTH(u) = i; + PyObject_Del(u); + u = v; + } + } } void @@ -9175,11 +9119,11 @@ void _Py_ReleaseInternedUnicodeStrings(v break; case SSTATE_INTERNED_IMMORTAL: Py_REFCNT(s) += 1; - immortal_size += s->length; + immortal_size += LENGTH(s); break; case SSTATE_INTERNED_MORTAL: Py_REFCNT(s) += 2; - mortal_size += s->length; + mortal_size += LENGTH(s); break; default: Py_FatalError("Inconsistent interned string state.");