diff -r c16405370731 Include/unicodeobject.h --- a/Include/unicodeobject.h Tue Feb 12 18:44:23 2008 +0100 +++ b/Include/unicodeobject.h Tue Feb 12 20:53:31 2008 +0100 @@ -406,8 +406,8 @@ extern const unsigned char _Py_ascii_whi valid, and the substring must not be empty */ #define Py_UNICODE_MATCH(string, offset, substring) \ ((*((string)->str + (offset)) == *((substring)->str)) && \ - ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ - !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) + ((*((string)->str + (offset) + Py_SIZE(substring)-1) == *((substring)->str + Py_SIZE(substring)-1))) && \ + !memcmp((string)->str + (offset), (substring)->str, Py_SIZE(substring)*sizeof(Py_UNICODE))) #ifdef __cplusplus extern "C" { @@ -416,9 +416,7 @@ extern "C" { /* --- Unicode Type ------------------------------------------------------- */ typedef struct { - PyObject_HEAD - Py_ssize_t length; /* Length of raw Unicode data in buffer */ - Py_UNICODE *str; /* Raw Unicode buffer */ + PyObject_VAR_HEAD long hash; /* Hash value; -1 if not set */ int state; /* != 0 if interned. In this case the two * references from the dictionary to this object @@ -426,7 +424,9 @@ typedef struct { PyObject *defenc; /* (Default) Encoded version as Python string, or NULL; this is used for implementing the buffer protocol */ + Py_UNICODE str[1]; /* Raw Unicode buffer */ } PyUnicodeObject; + PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; @@ -441,9 +441,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T /* Fast access macros */ #define PyUnicode_GET_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length)) + (assert(PyUnicode_Check(op)), Py_SIZE(op)) #define PyUnicode_GET_DATA_SIZE(op) \ - (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))) + (assert(PyUnicode_Check(op)), Py_SIZE(op) * sizeof(Py_UNICODE)) #define PyUnicode_AS_UNICODE(op) \ (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str)) #define PyUnicode_AS_DATA(op) \ diff -r c16405370731 Objects/stringlib/eq.h --- a/Objects/stringlib/eq.h Tue Feb 12 18:44:23 2008 +0100 +++ b/Objects/stringlib/eq.h Tue Feb 12 20:53:31 2008 +0100 @@ -9,13 +9,13 @@ unicode_eq(PyObject *aa, PyObject *bb) register PyUnicodeObject *a = (PyUnicodeObject *)aa; register PyUnicodeObject *b = (PyUnicodeObject *)bb; - if (a->length != b->length) + if (Py_SIZE(a) != Py_SIZE(b)) return 0; - if (a->length == 0) + if (Py_SIZE(a) == 0) return 1; if (a->str[0] != b->str[0]) return 0; - if (a->length == 1) + if (Py_SIZE(a) == 1) return 1; - return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0; + return memcmp(a->str, b->str, Py_SIZE(a) * sizeof(Py_UNICODE)) == 0; } diff -r c16405370731 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Feb 12 18:44:23 2008 +0100 +++ b/Objects/unicodeobject.c Tue Feb 12 20:53:31 2008 +0100 @@ -52,28 +52,27 @@ OF OR IN CONNECTION WITH THE USE OR PERF #include #endif -/* Limit for the Unicode object free list */ - -#define PyUnicode_MAXFREELIST 1024 - -/* Limit for the Unicode object free list stay alive optimization. +/* Macro for getting the character length of an unicode object. */ +#define LENGTH(uniobj) \ + Py_SIZE(uniobj) + +/* Number of free lists, one per unicode object size. The implementation will keep allocated Unicode memory intact for - all objects on the free list having a size less than this - limit. This reduces malloc() overhead for small Unicode objects. - - At worst this will result in PyUnicode_MAXFREELIST * - (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT + - malloc()-overhead) bytes of unused garbage. + objects having a size less than this limit, within a certain number + of objects for each size (as defined by the CAN_SAVE macro below). Setting the limit to 0 effectively turns the feature off. - - Note: This is an experimental feature ! If you get core dumps when - using Unicode objects, turn this feature off. - -*/ - -#define KEEPALIVE_SIZE_LIMIT 9 +*/ + +#define MAX_SAVED_SIZE 150 + +/* We keep lots of small objects in the free lists, but less larger ones. */ + +#define CAN_SAVE(obj_length, list_size) \ + ((obj_length < 20 && list_size < 100) \ + || (obj_length < 60 && list_size < 6) \ + || (list_size < 1)) /* Endianness switches; defaults to little endian */ @@ -105,9 +104,8 @@ extern "C" { */ static PyObject *interned; -/* Free list for Unicode objects */ -static PyUnicodeObject *free_list; -static int numfree; +/* Free lists for Unicode objects */ +static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE]; /* The empty Unicode object is shared to improve performance. */ static PyUnicodeObject *unicode_empty; @@ -244,60 +242,70 @@ Py_LOCAL_INLINE(int) unicode_member(Py_U /* --- Unicode Object ----------------------------------------------------- */ static -int unicode_resize(register PyUnicodeObject *unicode, - Py_ssize_t length) -{ - void *oldstr; - - /* Shortcut if there's nothing much to do. */ - if (unicode->length == length) - goto reset; - - /* Resizing shared object (unicode_empty or single character - objects) in-place is not allowed. Use PyUnicode_Resize() - instead ! */ - - if (unicode == unicode_empty || - (unicode->length == 1 && - unicode->str[0] < 256U && - unicode_latin1[unicode->str[0]] == unicode)) { - PyErr_SetString(PyExc_SystemError, - "can't resize shared unicode objects"); - return -1; - } - - /* We allocate one more byte to make sure the string is Ux0000 terminated. - The overallocation is also used by fastsearch, which assumes that it's - safe to look at str[length] (without making any assumptions about what - it contains). */ - - oldstr = unicode->str; - PyMem_RESIZE(unicode->str, Py_UNICODE, length + 1); - if (!unicode->str) { - unicode->str = (Py_UNICODE *)oldstr; +PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); + +static +PyUnicodeObject *unicode_resize(register PyUnicodeObject *unicode, + Py_ssize_t length) +{ + PyUnicodeObject *v; + + /* Optimization for empty strings; yes, this sometimes happens. */ + if (length == 0 && unicode_empty != NULL) { + Py_DECREF(unicode); + Py_INCREF(unicode_empty); + return unicode_empty; + } + + /* Resizing unicode_empty and single character objects is not + possible since these are being shared. We simply return a fresh + copy with the same Unicode content. */ + if (LENGTH(unicode) != length && + (unicode == unicode_empty || LENGTH(unicode) == 1)) { + v = _PyUnicode_New(length); + if (v == NULL) + return NULL; + Py_UNICODE_COPY(v->str, unicode->str, + length < LENGTH(unicode) ? length : LENGTH(unicode)); + Py_DECREF(unicode); + return v; + } + + /* PyObject_REALLOC will almost always return a new memory block, so try + to find an existing one instead */ + if (length < MAX_SAVED_SIZE && (v = unicode_freelist[length])) { + unicode_freelist[length] = (PyUnicodeObject *) v->defenc; + v->defenc = NULL; + v->state = 0; + Py_UNICODE_COPY(v->str, unicode->str, + length < LENGTH(unicode) ? length : LENGTH(unicode)); + Py_DECREF(unicode); + goto reset; + } + + /* Adapted from similar code in tupleobject */ + _Py_DEC_REFTOTAL; + _Py_ForgetReference(unicode); + v = (PyUnicodeObject *) PyObject_REALLOC((char *) unicode, + sizeof(PyUnicodeObject) + length * sizeof(Py_UNICODE)); + if (v == NULL) { + PyObject_Del(unicode); PyErr_NoMemory(); - return -1; - } - unicode->str[length] = 0; - unicode->length = length; - - reset: - /* Reset the object caches */ - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - unicode->hash = -1; - - return 0; -} + return NULL; + } + Py_CLEAR(v->defenc); +reset: + LENGTH(v) = length; + v->str[length] = 0; + v->hash = -1; + _Py_NewReference(v); + return v; +} + /* We allocate one more byte to make sure the string is Ux0000 terminated; some code (e.g. new_identifier) relies on that. - - XXX This allocator could further be enhanced by assuring that the - free list never reduces its size below 1. */ @@ -313,35 +321,19 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize } /* Unicode freelist & memory allocation */ - if (free_list) { - unicode = free_list; - free_list = *(PyUnicodeObject **)unicode; - numfree--; - if (unicode->str) { - /* Keep-Alive optimization: we only upsize the buffer, - never downsize it. */ - if ((unicode->length < length) && - unicode_resize(unicode, length) < 0) { - PyMem_DEL(unicode->str); - goto onError; - } - } - else { - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - PyObject_INIT(unicode, &PyUnicode_Type); - } - else { - unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type); - if (unicode == NULL) + if (length < MAX_SAVED_SIZE + && (unicode = unicode_freelist[length])) { + _Py_NewReference(unicode); + unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc; + } + else { + unicode = PyObject_NEW_VAR(PyUnicodeObject, &PyUnicode_Type, length); + if (!unicode) { + PyErr_NoMemory(); return NULL; - unicode->str = PyMem_NEW(Py_UNICODE, length + 1); - } - - if (!unicode->str) { - PyErr_NoMemory(); - goto onError; - } + } + } + /* Initialize the first element to guard against cases where * the caller fails before initializing str -- unicode_resize() * reads str[0], and the Keep-Alive optimization can keep memory @@ -351,21 +343,18 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize */ unicode->str[0] = 0; unicode->str[length] = 0; - unicode->length = length; + LENGTH(unicode) = length; unicode->hash = -1; unicode->state = 0; unicode->defenc = NULL; return unicode; - - onError: - _Py_ForgetReference((PyObject *)unicode); - PyObject_Del(unicode); - return NULL; } static void unicode_dealloc(register PyUnicodeObject *unicode) { + Py_ssize_t length = LENGTH(unicode); + switch (PyUnicode_CHECK_INTERNED(unicode)) { case SSTATE_NOT_INTERNED: break; @@ -385,28 +374,20 @@ void unicode_dealloc(register PyUnicodeO Py_FatalError("Inconsistent interned unicode string state."); } - if (PyUnicode_CheckExact(unicode) && - numfree < PyUnicode_MAXFREELIST) { - /* Keep-Alive optimization */ - if (unicode->length >= KEEPALIVE_SIZE_LIMIT) { - PyMem_DEL(unicode->str); - unicode->str = NULL; - unicode->length = 0; - } - if (unicode->defenc) { - Py_DECREF(unicode->defenc); - unicode->defenc = NULL; - } - /* Add to free list */ - *(PyUnicodeObject **)unicode = free_list; - free_list = unicode; - numfree++; - } - else { - PyMem_DEL(unicode->str); - Py_XDECREF(unicode->defenc); - Py_TYPE(unicode)->tp_free((PyObject *)unicode); - } + Py_CLEAR(unicode->defenc); + + if (PyUnicode_CheckExact(unicode) && length < MAX_SAVED_SIZE) { + PyUnicodeObject *v = unicode_freelist[length]; + if (!v || CAN_SAVE(length, LENGTH(v))) { + /* Keep track of number of items stacked on the freelist */ + LENGTH(unicode) = v ? LENGTH(v) + 1 : 1; + unicode->defenc = (PyObject *) v; + unicode_freelist[length] = unicode; + return; + } + } + + Py_TYPE(unicode)->tp_free((PyObject *)unicode); } int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length) @@ -420,28 +401,15 @@ int PyUnicode_Resize(PyObject **unicode, } v = (PyUnicodeObject *)*unicode; if (v == NULL || !PyUnicode_Check(v) || Py_REFCNT(v) != 1 || length < 0) { - PyErr_BadInternalCall(); - return -1; - } - - /* Resizing unicode_empty and single character objects is not - possible since these are being shared. We simply return a fresh - copy with the same Unicode content. */ - if (v->length != length && - (v == unicode_empty || v->length == 1)) { - PyUnicodeObject *w = _PyUnicode_New(length); - if (w == NULL) - return -1; - Py_UNICODE_COPY(w->str, v->str, - length < v->length ? length : v->length); - Py_DECREF(*unicode); - *unicode = (PyObject *)w; - return 0; - } - - /* Note that we don't have to modify *unicode for unshared Unicode - objects, since we can modify them in-place. */ - return unicode_resize(v, length); + PyErr_BadInternalCall(); + return -1; + } + + v = unicode_resize(v, length); + if (v == NULL) + return -1; + *unicode = (PyObject *) v; + return 0; } /* Internal API for use in unicodeobject.c only ! */ @@ -5182,13 +5150,13 @@ int PyUnicode_EncodeDecimal(Py_UNICODE * /* helper macro to fixup start/end slice values */ #define FIX_START_END(obj) \ if (start < 0) \ - start += (obj)->length; \ + start += LENGTH(obj); \ if (start < 0) \ start = 0; \ - if (end > (obj)->length) \ - end = (obj)->length; \ + if (end > LENGTH(obj)) \ + end = LENGTH(obj); \ if (end < 0) \ - end += (obj)->length; \ + end += LENGTH(obj); \ if (end < 0) \ end = 0; @@ -5213,7 +5181,7 @@ Py_ssize_t PyUnicode_Count(PyObject *str FIX_START_END(str_obj); result = stringlib_count( - str_obj->str + start, end - start, sub_obj->str, sub_obj->length + str_obj->str + start, end - start, sub_obj->str, LENGTH(sub_obj) ); Py_DECREF(sub_obj); @@ -5265,12 +5233,12 @@ int tailmatch(PyUnicodeObject *self, Py_ssize_t end, int direction) { - if (substring->length == 0) + if (LENGTH(substring) == 0) return 1; FIX_START_END(self); - end -= substring->length; + end -= LENGTH(substring); if (end < start) return 0; @@ -5320,11 +5288,11 @@ PyObject *fixup(PyUnicodeObject *self, PyUnicodeObject *u; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (u == NULL) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); if (!fixfct(u) && PyUnicode_CheckExact(self)) { /* fixfct should return TRUE if it modified the buffer. If @@ -5340,7 +5308,7 @@ static static int fixupper(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5361,7 +5329,7 @@ static static int fixlower(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5382,7 +5350,7 @@ static static int fixswapcase(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5403,7 +5371,7 @@ static static int fixcapitalize(PyUnicodeObject *self) { - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); Py_UNICODE *s = self->str; int status = 0; @@ -5629,13 +5597,13 @@ PyUnicodeObject *pad(PyUnicodeObject *se return self; } - u = _PyUnicode_New(left + self->length + right); + u = _PyUnicode_New(left + LENGTH(self) + right); if (u) { if (left) Py_UNICODE_FILL(u->str, fill, left); - Py_UNICODE_COPY(u->str + left, self->str, self->length); + Py_UNICODE_COPY(u->str + left, self->str, LENGTH(self)); if (right) - Py_UNICODE_FILL(u->str + left + self->length, fill, right); + Py_UNICODE_FILL(u->str + left + LENGTH(self), fill, right); } return u; @@ -5659,7 +5627,7 @@ PyObject *split_whitespace(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -5751,7 +5719,7 @@ PyObject *split_char(PyUnicodeObject *se { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -5782,8 +5750,8 @@ PyObject *split_substring(PyUnicodeObjec { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = LENGTH(self); + Py_ssize_t sublen = LENGTH(substring); PyObject *str; for (i = j = 0; i <= len - sublen; ) { @@ -5812,7 +5780,7 @@ PyObject *rsplit_whitespace(PyUnicodeObj { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -5852,7 +5820,7 @@ PyObject *rsplit_char(PyUnicodeObject *s { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; + Py_ssize_t len = LENGTH(self); PyObject *str; register const Py_UNICODE *buf = self->str; @@ -5885,8 +5853,8 @@ PyObject *rsplit_substring(PyUnicodeObje { register Py_ssize_t i; register Py_ssize_t j; - Py_ssize_t len = self->length; - Py_ssize_t sublen = substring->length; + Py_ssize_t len = LENGTH(self); + Py_ssize_t sublen = LENGTH(substring); PyObject *str; for (i = len - sublen, j = len; i >= 0; ) { @@ -5930,10 +5898,10 @@ PyObject *split(PyUnicodeObject *self, if (substring == NULL) return split_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (LENGTH(substring) == 1) return split_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (LENGTH(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -5959,10 +5927,10 @@ PyObject *rsplit(PyUnicodeObject *self, if (substring == NULL) return rsplit_whitespace(self,list,maxcount); - else if (substring->length == 1) + else if (LENGTH(substring) == 1) return rsplit_char(self,list,substring->str[0],maxcount); - else if (substring->length == 0) { + else if (LENGTH(substring) == 0) { Py_DECREF(list); PyErr_SetString(PyExc_ValueError, "empty separator"); return NULL; @@ -5982,21 +5950,21 @@ PyObject *replace(PyUnicodeObject *self, if (maxcount < 0) maxcount = PY_SSIZE_T_MAX; - if (str1->length == str2->length) { + if (LENGTH(str1) == LENGTH(str2)) { /* same length */ Py_ssize_t i; - if (str1->length == 1) { + if (LENGTH(str1) == 1) { /* replace characters */ Py_UNICODE u1, u2; - if (!findchar(self->str, self->length, str1->str[0])) + if (!findchar(self->str, LENGTH(self), str1->str[0])) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); u1 = str1->str[0]; u2 = str2->str[0]; - for (i = 0; i < u->length; i++) + for (i = 0; i < LENGTH(u); i++) if (u->str[i] == u1) { if (--maxcount < 0) break; @@ -6004,20 +5972,20 @@ PyObject *replace(PyUnicodeObject *self, } } else { i = fastsearch( - self->str, self->length, str1->str, str1->length, FAST_SEARCH + self->str, LENGTH(self), str1->str, LENGTH(str1), FAST_SEARCH ); if (i < 0) goto nothing; - u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length); + u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, LENGTH(self)); if (!u) return NULL; - Py_UNICODE_COPY(u->str, self->str, self->length); - while (i <= self->length - str1->length) + Py_UNICODE_COPY(u->str, self->str, LENGTH(self)); + while (i <= LENGTH(self) - LENGTH(str1)) if (Py_UNICODE_MATCH(self, i, str1)) { if (--maxcount < 0) break; - Py_UNICODE_COPY(u->str+i, str2->str, str2->length); - i += str1->length; + Py_UNICODE_COPY(u->str+i, str2->str, LENGTH(str2)); + i += LENGTH(str1); } else i++; } @@ -6028,23 +5996,23 @@ PyObject *replace(PyUnicodeObject *self, Py_UNICODE *p; /* replace strings */ - n = stringlib_count(self->str, self->length, str1->str, str1->length); + n = stringlib_count(self->str, LENGTH(self), str1->str, LENGTH(str1)); if (n > maxcount) n = maxcount; if (n == 0) goto nothing; - /* new_size = self->length + n * (str2->length - str1->length)); */ - delta = (str2->length - str1->length); + /* new_size = LENGTH(self) + n * (LENGTH(str2) - LENGTH(str1))); */ + delta = (LENGTH(str2) - LENGTH(str1)); if (delta == 0) { - new_size = self->length; + new_size = LENGTH(self); } else { - product = n * (str2->length - str1->length); - if ((product / (str2->length - str1->length)) != n) { + product = n * (LENGTH(str2) - LENGTH(str1)); + if ((product / (LENGTH(str2) - LENGTH(str1))) != n) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); return NULL; } - new_size = self->length + product; + new_size = LENGTH(self) + product; if (new_size < 0) { PyErr_SetString(PyExc_OverflowError, "replace string is too long"); @@ -6056,8 +6024,8 @@ PyObject *replace(PyUnicodeObject *self, return NULL; i = 0; p = u->str; - e = self->length - str1->length; - if (str1->length > 0) { + e = LENGTH(self) - LENGTH(str1); + if (LENGTH(str1) > 0) { while (n-- > 0) { /* look for next match */ j = i; @@ -6074,25 +6042,25 @@ PyObject *replace(PyUnicodeObject *self, p += j - i; } /* copy substitution string */ - if (str2->length > 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; - } - i = j + str1->length; - } - if (i < self->length) + if (LENGTH(str2) > 0) { + Py_UNICODE_COPY(p, str2->str, LENGTH(str2)); + p += LENGTH(str2); + } + i = j + LENGTH(str1); + } + if (i < LENGTH(self)) /* copy tail [i:] */ - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, LENGTH(self)-i); } else { /* interleave */ while (n > 0) { - Py_UNICODE_COPY(p, str2->str, str2->length); - p += str2->length; + Py_UNICODE_COPY(p, str2->str, LENGTH(str2)); + p += LENGTH(str2); if (--n <= 0) break; *p++ = self->str[i++]; } - Py_UNICODE_COPY(p, self->str+i, self->length-i); + Py_UNICODE_COPY(p, self->str+i, LENGTH(self)-i); } } return (PyObject *) u; @@ -6103,7 +6071,7 @@ nothing: Py_INCREF(self); return (PyObject *) self; } - return PyUnicode_FromUnicode(self->str, self->length); + return PyUnicode_FromUnicode(self->str, LENGTH(self)); } /* --- Unicode Object Methods --------------------------------------------- */ @@ -6213,12 +6181,12 @@ unicode_center(PyUnicodeObject *self, Py if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - marg = width - self->length; + marg = width - LENGTH(self); left = marg / 2 + (marg & width & 1); return (PyObject*) pad(self, left, marg - left, fillchar); @@ -6250,8 +6218,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = LENGTH(str1); + len2 = LENGTH(str2); while (len1 > 0 && len2 > 0) { Py_UNICODE c1, c2; @@ -6284,8 +6252,8 @@ unicode_compare(PyUnicodeObject *str1, P Py_UNICODE *s1 = str1->str; Py_UNICODE *s2 = str2->str; - len1 = str1->length; - len2 = str2->length; + len1 = LENGTH(str1); + len2 = LENGTH(str2); while (len1 > 0 && len2 > 0) { Py_UNICODE c1, c2; @@ -6467,11 +6435,11 @@ PyObject *PyUnicode_Concat(PyObject *lef } /* Concat the two Unicode strings */ - w = _PyUnicode_New(u->length + v->length); + w = _PyUnicode_New(LENGTH(u) + LENGTH(v)); if (w == NULL) goto onError; - Py_UNICODE_COPY(w->str, u->str, u->length); - Py_UNICODE_COPY(w->str + u->length, v->str, v->length); + Py_UNICODE_COPY(w->str, u->str, LENGTH(u)); + Py_UNICODE_COPY(w->str + LENGTH(u), v->str, LENGTH(v)); Py_DECREF(u); Py_DECREF(v); @@ -6534,7 +6502,7 @@ unicode_count(PyUnicodeObject *self, PyO result = PyLong_FromSsize_t( stringlib_count(self->str + start, end - start, - substring->str, substring->length) + substring->str, LENGTH(substring)) ); Py_DECREF(substring); @@ -6599,7 +6567,7 @@ unicode_expandtabs(PyUnicodeObject *self /* First pass: determine size of output string */ i = j = old_j = 0; - e = self->str + self->length; + e = self->str + LENGTH(self); for (p = self->str; p < e; p++) if (*p == '\t') { if (tabsize > 0) { @@ -6691,7 +6659,7 @@ static PyObject * static PyObject * unicode_getitem(PyUnicodeObject *self, Py_ssize_t index) { - if (index < 0 || index >= self->length) { + if (index < 0 || index >= LENGTH(self)) { PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } @@ -7100,7 +7068,7 @@ static Py_ssize_t static Py_ssize_t unicode_length(PyUnicodeObject *self) { - return self->length; + return LENGTH(self); } PyDoc_STRVAR(ljust__doc__, @@ -7118,12 +7086,12 @@ unicode_ljust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, 0, width - self->length, fillchar); + return (PyObject*) pad(self, 0, width - LENGTH(self), fillchar); } PyDoc_STRVAR(lower__doc__, @@ -7307,8 +7275,8 @@ unicode_repeat(PyUnicodeObject *str, Py_ /* ensure # of chars needed doesn't overflow int and # of bytes * needed doesn't overflow size_t */ - nchars = len * str->length; - if (len && nchars / len != str->length) { + nchars = len * LENGTH(str); + if (len && nchars / len != LENGTH(str)) { PyErr_SetString(PyExc_OverflowError, "repeated string is too long"); return NULL; @@ -7325,13 +7293,13 @@ unicode_repeat(PyUnicodeObject *str, Py_ p = u->str; - if (str->length == 1 && len > 0) { + if (LENGTH(str) == 1 && len > 0) { Py_UNICODE_FILL(p, str->str[0], len); } else { Py_ssize_t done = 0; /* number of characters copied this far */ if (done < nchars) { - Py_UNICODE_COPY(p, str->str, str->length); - done = str->length; + Py_UNICODE_COPY(p, str->str, LENGTH(str)); + done = LENGTH(str); } while (done < nchars) { int n = (done <= nchars-done) ? done : nchars-done; @@ -7626,12 +7594,12 @@ unicode_rjust(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar)) return NULL; - if (self->length >= width && PyUnicode_CheckExact(self)) { + if (LENGTH(self) >= width && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; } - return (PyObject*) pad(self, width - self->length, 0, fillchar); + return (PyObject*) pad(self, width - LENGTH(self), 0, fillchar); } PyObject *PyUnicode_Split(PyObject *s, @@ -7971,7 +7939,7 @@ static PyObject* static PyObject* unicode_translate(PyUnicodeObject *self, PyObject *table) { - return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore"); + return PyUnicode_TranslateCharmap(self->str, LENGTH(self), table, "ignore"); } PyDoc_STRVAR(upper__doc__, @@ -8001,7 +7969,7 @@ unicode_zfill(PyUnicodeObject *self, PyO if (!PyArg_ParseTuple(args, "n:zfill", &width)) return NULL; - if (self->length >= width) { + if (LENGTH(self) >= width) { if (PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject*) self; @@ -8013,7 +7981,7 @@ unicode_zfill(PyUnicodeObject *self, PyO ); } - fill = width - self->length; + fill = width - LENGTH(self); u = pad(self, fill, 0, '0'); @@ -8028,14 +7996,6 @@ unicode_zfill(PyUnicodeObject *self, PyO return (PyObject*) u; } - -#if 0 -static PyObject* -unicode_freelistsize(PyUnicodeObject *self) -{ - return PyLong_FromLong(numfree); -} -#endif PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ @@ -8143,7 +8103,7 @@ static PyObject * static PyObject * unicode_getnewargs(PyUnicodeObject *v) { - return Py_BuildValue("(u#)", v->str, v->length); + return Py_BuildValue("(u#)", v->str, LENGTH(v)); } @@ -8201,11 +8161,6 @@ static PyMethodDef unicode_methods[] = { {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif -#if 0 - /* This one is just used for debugging the implementation. */ - {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS}, -#endif - {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, {NULL, NULL} }; @@ -8261,7 +8216,7 @@ unicode_subscript(PyUnicodeObject* self, if (slicelength <= 0) { return PyUnicode_FromUnicode(NULL, 0); - } else if (start == 0 && step == 1 && slicelength == self->length && + } else if (start == 0 && step == 1 && slicelength == LENGTH(self) && PyUnicode_CheckExact(self)) { Py_INCREF(self); return (PyObject *)self; @@ -8999,22 +8954,13 @@ unicode_subtype_new(PyTypeObject *type, if (tmp == NULL) return NULL; assert(PyUnicode_Check(tmp)); - pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length); - if (pnew == NULL) { + pnew = (PyUnicodeObject *) type->tp_alloc(type, n = LENGTH(tmp)); + if (pnew != NULL) { + Py_UNICODE_COPY(pnew->str, tmp->str, n+1); + LENGTH(pnew) = n; + pnew->hash = tmp->hash; Py_DECREF(tmp); - return NULL; - } - pnew->str = PyMem_NEW(Py_UNICODE, n+1); - if (pnew->str == NULL) { - _Py_ForgetReference((PyObject *)pnew); - PyObject_Del(pnew); - Py_DECREF(tmp); - return PyErr_NoMemory(); - } - Py_UNICODE_COPY(pnew->str, tmp->str, n+1); - pnew->length = n; - pnew->hash = tmp->hash; - Py_DECREF(tmp); + } return (PyObject *)pnew; } @@ -9031,7 +8977,7 @@ PyTypeObject PyUnicode_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) "str", /* tp_name */ sizeof(PyUnicodeObject), /* tp_size */ - 0, /* tp_itemsize */ + sizeof(Py_UNICODE), /* tp_itemsize */ /* Slots */ (destructor)unicode_dealloc, /* tp_dealloc */ 0, /* tp_print */ @@ -9090,8 +9036,6 @@ void _PyUnicode_Init(void) }; /* Init the implementation */ - free_list = NULL; - numfree = 0; unicode_empty = _PyUnicode_New(0); if (!unicode_empty) return; @@ -9115,28 +9059,28 @@ _PyUnicode_Fini(void) _PyUnicode_Fini(void) { PyUnicodeObject *u; - int i; + Py_ssize_t i; Py_XDECREF(unicode_empty); unicode_empty = NULL; for (i = 0; i < 256; i++) { - if (unicode_latin1[i]) { - Py_DECREF(unicode_latin1[i]); - unicode_latin1[i] = NULL; - } - } - - for (u = free_list; u != NULL;) { - PyUnicodeObject *v = u; - u = *(PyUnicodeObject **)u; - if (v->str) - PyMem_DEL(v->str); - Py_XDECREF(v->defenc); - PyObject_Del(v); - } - free_list = NULL; - numfree = 0; + if (unicode_latin1[i]) { + Py_DECREF(unicode_latin1[i]); + unicode_latin1[i] = NULL; + } + } + + for (i = 0; i < MAX_SAVED_SIZE; i++) { + PyUnicodeObject *v; + u = unicode_freelist[i]; + while (u != NULL) { + v = (PyUnicodeObject *) u->defenc; + LENGTH(u) = i; + PyObject_Del(u); + u = v; + } + } } void @@ -9238,11 +9182,11 @@ void _Py_ReleaseInternedUnicodeStrings(v break; case SSTATE_INTERNED_IMMORTAL: Py_REFCNT(s) += 1; - immortal_size += s->length; + immortal_size += LENGTH(s); break; case SSTATE_INTERNED_MORTAL: Py_REFCNT(s) += 2; - mortal_size += s->length; + mortal_size += LENGTH(s); break; default: Py_FatalError("Inconsistent interned string state.");