Index: Python/ceval.c =================================================================== --- Python/ceval.c (revision 52618) +++ Python/ceval.c (working copy) @@ -4225,21 +4225,8 @@ PyErr_Format(exc, format_str, obj_str); } -static PyObject * -string_concatenate(PyObject *v, PyObject *w, - PyFrameObject *f, unsigned char *next_instr) +static void string_pre_drop_reference(PyObject *v, PyFrameObject *f, unsigned char *next_instr) { - /* This function implements 'variable += expr' when both arguments - are strings. */ - Py_ssize_t v_len = PyString_GET_SIZE(v); - Py_ssize_t w_len = PyString_GET_SIZE(w); - Py_ssize_t new_len = v_len + w_len; - if (new_len < 0) { - PyErr_SetString(PyExc_OverflowError, - "strings are too large to concat"); - return NULL; - } - if (v->ob_refcnt == 2) { /* In the common case, there are 2 references to the value * stored in 'variable' when the += is performed: one on the @@ -4278,8 +4265,28 @@ } } } +} - if (v->ob_refcnt == 1 && !PyString_CHECK_INTERNED(v)) { +static PyObject * +string_concatenate(PyObject *v, PyObject *w, + PyFrameObject *f, unsigned char *next_instr) +{ + /* This function implements 'variable += expr' when both arguments + are strings. */ + Py_ssize_t v_len = PyString_GET_SIZE(v); + Py_ssize_t w_len = PyString_GET_SIZE(w); + Py_ssize_t new_len = v_len + w_len; + if (new_len < 0) { + PyErr_SetString(PyExc_OverflowError, + "strings are too large to concat"); + return NULL; + } + + string_pre_drop_reference(v, f, next_instr); + + if (v->ob_refcnt == 1 + && !PyString_CHECK_INTERNED(v) + && !PyString_CHECK_SLICE_OR_CONCATENATED(v)) { /* Now we own the last reference to 'v', so we can resize it * in-place. */ @@ -4299,6 +4306,7 @@ } else { /* When in-place resizing is not an option. */ + string_pre_drop_reference(w, f, next_instr); PyString_Concat(&v, w); return v; } Index: Python/pythonrun.c =================================================================== --- Python/pythonrun.c (revision 52618) +++ Python/pythonrun.c (working copy) @@ -37,6 +37,12 @@ #define PRINT_TOTAL_REFS() fprintf(stderr, \ "[%" PY_FORMAT_SIZE_T "d refs]\n", \ _Py_GetRefTotal()) +/* +extern Py_ssize_t slicesLiveUnrendered, slicesLiveRendered, slicesDeadUnrendered, slicesDeadRendered; +#define PRINT_TOTAL_REFS() fprintf(stderr, \ + "[%" PY_FORMAT_SIZE_T "d refs, slices: %" PY_FORMAT_SIZE_T "dlu %" PY_FORMAT_SIZE_T "dlr %" PY_FORMAT_SIZE_T "ddu %" PY_FORMAT_SIZE_T "ddr]\n", \ + _Py_GetRefTotal(), slicesLiveUnrendered, slicesLiveRendered, slicesDeadUnrendered, slicesDeadRendered) +*/ #endif #ifdef __cplusplus Index: Include/stringobject.h =================================================================== --- Include/stringobject.h (revision 52618) +++ Include/stringobject.h (working copy) @@ -36,22 +36,62 @@ PyObject_VAR_HEAD long ob_shash; int ob_sstate; - char ob_sval[1]; + char *ob_sval; + char ob_svalstorage[1]; /* Invariants: - * ob_sval contains space for 'ob_size+1' elements. - * ob_sval[ob_size] == 0. + * if ob_sval is not NULL: + * the memory ob_sval points to has space for 'ob_size+1' elements. + * ob_sval[ob_size] == 0. + * if ob_sval is NULL: + * calling PyString_AsString() on it will return non-NULL, and + * change ob_sval to non-NULL. * ob_shash is the hash of the string or -1 if not computed yet. - * ob_sstate != 0 iff the string object is in stringobject.c's + * (ob_sstate & 3) != 0 iff the string object is in stringobject.c's * 'interned' dictionary; in this case the two references * from 'interned' to this object are *not counted* in ob_refcnt. */ } PyStringObject; -#define SSTATE_NOT_INTERNED 0 -#define SSTATE_INTERNED_MORTAL 1 -#define SSTATE_INTERNED_IMMORTAL 2 +#define PYSTRING_CONCATENATIONS (8) +#define PYSTRING_RIGHTRECURSIONDEPTH (12 * 1024) +typedef struct { + PyObject_VAR_HEAD + long ob_shash; + int ob_sstate; + char *ob_sval; /* this object matches a PyStringObject to this point */ + unsigned short ob_srightrecursiondepth; + unsigned short ob_sstringsindex; + PyStringObject *ob_sstrings[PYSTRING_CONCATENATIONS]; +} PyStringConcatenationObject; + +/* + * Please don't make this smaller than + * sizeof(PyStringSliceObject) - offsetof(PyStringObject, ob_svalstorage) + */ +#define PYSTRING_MINIMUM_SIZE_FOR_SLICE_OBJECT (20) + + +typedef struct { + PyObject_VAR_HEAD + long ob_shash; + int ob_sstate; + char *ob_sval; + unsigned short ob_srightrecursiondepth; /* this object matches a PyStringConcatenationObject to this point */ + PyStringObject *ob_slchild; + Py_ssize_t ob_slstart; + Py_ssize_t ob_slend; +} PyStringSliceObject; + + +#define SSTATE_NOT_INTERNED ( 0) +#define SSTATE_INTERNED_MORTAL (1<<0) /* 1 */ +#define SSTATE_INTERNED_IMMORTAL (1<<1) /* 2 */ +#define SSTATE_CONCATENATION (1<<2) /* 4 */ +#define SSTATE_SLICE (1<<3) /* 8 */ + + PyAPI_DATA(PyTypeObject) PyBaseString_Type; PyAPI_DATA(PyTypeObject) PyString_Type; @@ -83,12 +123,23 @@ PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *); PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void); -/* Use only if you know it's a string */ -#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) +/* Use these only if you know it's a string */ +#define __PyString_STATE(op) (((PyStringObject *)(op))->ob_sstate) +#define PyString_CHECK_INTERNED(op) (__PyString_STATE(op) & ( SSTATE_INTERNED_MORTAL | SSTATE_INTERNED_IMMORTAL)) +#define PyString_CHECK_CONCATENATED(op) (__PyString_STATE(op) & SSTATE_CONCATENATION) +#define PyString_CHECK_SLICE(op) (__PyString_STATE(op) & SSTATE_SLICE) +#define PyString_CHECK_SLICE_OR_CONCATENATED(op) (__PyString_STATE(op) & (SSTATE_SLICE | SSTATE_CONCATENATION) ) +#define PyString_SET_INTERNED(op, val) (__PyString_STATE(op) = (__PyString_STATE(op) & ~(SSTATE_INTERNED_MORTAL | SSTATE_INTERNED_IMMORTAL)) | (val & (SSTATE_INTERNED_MORTAL | SSTATE_INTERNED_IMMORTAL))) +#define PyString_SET_CONCATENATED(op, val) (__PyString_STATE(op) = (__PyString_STATE(op) & ~SSTATE_CONCATENATION) | (val & SSTATE_CONCATENATION)) +#define PyString_SET_SLICE(op, val) (__PyString_STATE(op) = (__PyString_STATE(op) & ~SSTATE_SLICE) | (val & SSTATE_SLICE)) + + /* Macro, trading safety for speed */ -#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) +#define PyString_AS_STRING_DIRECT(op) (((PyStringObject *)(op))->ob_sval) +#define PyString_AS_STRING(op) ( PyString_AS_STRING_DIRECT(op) ? PyString_AS_STRING_DIRECT(op) : PyString_AsString((struct _object *)op) ) #define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size) +#define PyString_GET_RIGHT_RECURSION_DEPTH(op) (PyString_CHECK_SLICE_OR_CONCATENATED(op) ? (((PyStringConcatenationObject *)(op))->ob_srightrecursiondepth) : 0) /* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*, x must be an iterable object. */ Index: Objects/codeobject.c =================================================================== --- Objects/codeobject.c (revision 52618) +++ Objects/codeobject.c (working copy) @@ -71,7 +71,8 @@ /* Intern selected string constants */ for (i = PyTuple_Size(consts); --i >= 0; ) { PyObject *v = PyTuple_GetItem(consts, i); - if (!PyString_Check(v)) + if (!PyString_Check(v) + || (PyString_AS_STRING_DIRECT(v) == NULL)) continue; if (!all_name_chars((unsigned char *)PyString_AS_STRING(v))) continue; Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 52618) +++ Objects/unicodeobject.c (working copy) @@ -4252,6 +4252,7 @@ #define STRINGLIB_LEN PyUnicode_GET_SIZE #define STRINGLIB_NEW PyUnicode_FromUnicode +#define STRINGLIB_SLICE(s, i, j) PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(s) + (i), (j) - (i)) #define STRINGLIB_STR PyUnicode_AS_UNICODE Py_LOCAL_INLINE(int) Index: Objects/stringlib/partition.h =================================================================== --- Objects/stringlib/partition.h (revision 52618) +++ Objects/stringlib/partition.h (working copy) @@ -37,11 +37,11 @@ return out; } - PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + PyTuple_SET_ITEM(out, 0, STRINGLIB_SLICE(str_obj, 0, pos)); Py_INCREF(sep_obj); PyTuple_SET_ITEM(out, 1, sep_obj); pos += sep_len; - PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + PyTuple_SET_ITEM(out, 2, STRINGLIB_SLICE(str_obj, pos, str_len)); if (PyErr_Occurred()) { Py_DECREF(out); @@ -87,11 +87,11 @@ return out; } - PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos)); + PyTuple_SET_ITEM(out, 0, STRINGLIB_SLICE(str_obj, 0, pos)); Py_INCREF(sep_obj); PyTuple_SET_ITEM(out, 1, sep_obj); pos += sep_len; - PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos)); + PyTuple_SET_ITEM(out, 2, STRINGLIB_SLICE(str_obj, pos, str_len)); if (PyErr_Occurred()) { Py_DECREF(out); Index: Objects/stringlib/README.txt =================================================================== --- Objects/stringlib/README.txt (revision 52618) +++ Objects/stringlib/README.txt (working copy) @@ -28,6 +28,12 @@ creates a new string object +PyObject* STRINGLIB_SLICE(PyObject*, Py_ssize_t, Py_ssize_t) + + creates a new string object representing a slice of the object + passed in (must be a PyStringObject * or a PyUnicode *, matching + which module it's being used from) + STRINGLIB_CHAR* STRINGLIB_STR(PyObject*) returns the pointer to the character data for the given string Index: Objects/stringobject.c =================================================================== --- Objects/stringobject.c (revision 52618) +++ Objects/stringobject.c (working copy) @@ -3,16 +3,36 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +/* for offsetof() macro on platforms that don't define it themselves */ +#include "../Include/structmember.h" #include +#ifndef min +#define min(a, b) ( ((a) < (b)) ? (a) : (b) ) +#endif /* max */ + +#ifndef max +#define max(a, b) ( ((a) > (b)) ? (a) : (b) ) +#endif /* max */ + + #ifdef COUNT_ALLOCS int null_strings, one_strings; #endif +#define PYSTRING_OBJECT_BASE_SIZE (offsetof(PyStringObject, ob_svalstorage)) + static PyStringObject *characters[UCHAR_MAX + 1]; static PyStringObject *nullstring; +/* +Py_ssize_t slicesLiveUnrendered = 0; +Py_ssize_t slicesLiveRendered = 0; +Py_ssize_t slicesDeadUnrendered = 0; +Py_ssize_t slicesDeadRendered = 0; +*/ + /* This dictionary holds all interned strings. Note that references to strings in this dictionary are *not* counted in the string's ob_refcnt. When the interned string reaches a refcnt of 0 the string deallocation @@ -72,12 +92,14 @@ } /* Inline PyObject_NewVar */ - op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); + op = (PyStringObject *)PyObject_MALLOC( + PYSTRING_OBJECT_BASE_SIZE + size + 1); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; + op->ob_sval = op->ob_svalstorage; if (str != NULL) Py_MEMCPY(op->ob_sval, str, size); op->ob_sval[size] = '\0'; @@ -127,12 +149,14 @@ } /* Inline PyObject_NewVar */ - op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); + op = (PyStringObject *)PyObject_MALLOC( + PYSTRING_OBJECT_BASE_SIZE + size + 1); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; + op->ob_sval = op->ob_svalstorage; Py_MEMCPY(op->ob_sval, str, size+1); /* share short strings */ if (size == 0) { @@ -231,7 +255,7 @@ if (!string) return NULL; - s = PyString_AsString(string); + s = PyString_AS_STRING_DIRECT(string); for (f = format; *f; f++) { if (*f == '%') { @@ -512,6 +536,66 @@ return NULL; } + +/* + * Internal, used only by stringobject.c. + * Call this when it's okay if the string is not zero-terminated + * (ends with a '\0'). This means we don't need to render string + * slice objects. + * + * Note: in release builds, this function blindly assumes the + * object you passed in *is* some kind of PyStringObject *! + * + * Note: if the string is a concatenation object, this *will* + * render it. + * + * Note: if the string is zero length, you always *will* + * get a terminating zero. (Zero-length strings slices don't + * bother using the slice object.) + */ +#define PYSTRING_AS_UNTERMINATED_STRING(x) (PyString_AS_STRING_DIRECT(x) ? PyString_AS_STRING_DIRECT(x) : pystring_as_unterminated_string(x) ) +static /* const */ char * +pystring_as_unterminated_string(register PyStringObject *op) +{ +#ifdef Py_DEBUG + assert(PyString_Check(op)); +#endif /* Py_DEBUG */ + if (PyString_AS_STRING_DIRECT(op)) + return PyString_AS_STRING_DIRECT(op); + if (PyString_CHECK_SLICE(op) && ((PyStringSliceObject *)op)->ob_slchild != NULL) + return PyString_AS_STRING(((PyStringSliceObject *)op)->ob_slchild) + ((PyStringSliceObject *)op)->ob_slstart; + return PyString_AsString((PyObject *)op); +} + + + +/* + * *Carefully* deallocate the recursive tree of concatenation objects, + * being careful to *iterate* (*not* recurse) down the left-hand side. + */ +static void recursive_dealloc(PyStringConcatenationObject *concat) +{ + for (;;) { + PyStringConcatenationObject *next; + + if (concat == NULL) + return; + + if ((concat->ob_refcnt == 1) + && PyString_CHECK_CONCATENATED(concat) + && (concat->ob_sstringsindex)) { + next = (PyStringConcatenationObject *) + *concat->ob_sstrings; + *concat->ob_sstrings = NULL; + } + else + next = NULL; + + Py_DECREF(concat); + concat = next; + } +} + static void string_dealloc(PyObject *op) { @@ -533,6 +617,67 @@ default: Py_FatalError("Inconsistent interned string state."); } + + if (PyString_CHECK_SLICE(op)) { + PyStringSliceObject *slice = (PyStringSliceObject *)op; +#ifdef Py_DEBUG + /* exactly one should be non-NULL at all times. */ + assert( (slice->ob_slchild != NULL) ^ (slice->ob_sval != NULL) ); +#endif /* Py_DEBUG */ + if (slice->ob_slchild != NULL) { + Py_DECREF(slice->ob_slchild); +#ifdef Py_DEBUG + slice->ob_slchild = NULL; +/* + slicesLiveUnrendered--; + slicesDeadUnrendered++; +*/ +#endif /* Py_DEBUG */ + } + + if (slice->ob_sval != NULL) { + PyObject_Free(slice->ob_sval); +#ifdef Py_DEBUG + slice->ob_sval = NULL; +/* + slicesLiveRendered--; + slicesDeadRendered++; +*/ +#endif /* Py_DEBUG */ + } + } + + if (PyString_CHECK_CONCATENATED(op)) { + PyStringConcatenationObject *concat + = (PyStringConcatenationObject *)op; + register PyStringObject **i; + if (concat->ob_sstringsindex) { + for (i = concat->ob_sstrings + concat->ob_sstringsindex - 1; + i > concat->ob_sstrings; + i--) { + if (*i) { + Py_DECREF(*i); +#ifdef Py_DEBUG + *i = NULL; +#endif /* Py_DEBUG */ + } + } + + if (*i) { + recursive_dealloc((PyStringConcatenationObject *)*i); +#ifdef Py_DEBUG + *i = NULL; +#endif /* Py_DEBUG */ + } + } + + if (concat->ob_sval != NULL) { + PyObject_Free(concat->ob_sval); +#ifdef Py_DEBUG + concat->ob_sval = NULL; +#endif /* Py_DEBUG */ + } + } op->ob_type->tp_free(op); } @@ -555,7 +700,7 @@ v = PyString_FromStringAndSize((char *)NULL, newlen); if (v == NULL) return NULL; - p = buf = PyString_AsString(v); + p = buf = PyString_AS_STRING_DIRECT(v); end = s + len; while (s < end) { if (*s != '\\') { @@ -695,8 +840,8 @@ static Py_ssize_t string_getsize(register PyObject *op) { - char *s; - Py_ssize_t len; + char *s; + Py_ssize_t len; if (PyString_AsStringAndSize(op, &s, &len)) return -1; return len; @@ -720,14 +865,117 @@ return ((PyStringObject *)op) -> ob_size; } + +static void recursiveConcatenate(char *buffer, + Py_ssize_t length, PyStringConcatenationObject *s) { + register PyStringObject **i; + + for (;;) { + /* + * optimized for the general case of 'a'+'b'+'c'+'d'+'e': + * in this case, we will never actually recurse, we will iterate + */ + if ((s->ob_sval != NULL) || PyString_CHECK_SLICE(s)) { + memcpy(buffer, PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)s), s->ob_size); + return; + } + + for (i = s->ob_sstrings + s->ob_sstringsindex - 1; + i >= s->ob_sstrings + 1; + i--) { + PyStringObject *child = *i; + char *childDestination; + length -= child->ob_size; + childDestination = buffer + length; + if ((child->ob_sval != NULL) || PyString_CHECK_SLICE(child)) + memcpy(childDestination, PYSTRING_AS_UNTERMINATED_STRING(child), child->ob_size); + else + recursiveConcatenate(childDestination, child->ob_size, + (PyStringConcatenationObject *)child); + } + + s = (PyStringConcatenationObject *)*s->ob_sstrings; + } +} + + /*const*/ char * PyString_AsString(register PyObject *op) { if (!PyString_Check(op)) return string_getbuffer(op); - return ((PyStringObject *)op) -> ob_sval; + + if (PyString_AS_STRING_DIRECT(op) == NULL) { + if (PyString_CHECK_SLICE(op)) { + register PyStringSliceObject *s + = (PyStringSliceObject *)op; + Py_ssize_t length = s->ob_slend - s->ob_slstart; + char *string = (char *)PyObject_Malloc(s->ob_size + 1); + /* NEEDSWORK check for failure here! */ + + memcpy(string, PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)op), PyString_GET_SIZE(op)); + string[length] = 0; + + Py_DECREF(s->ob_slchild); + + s->ob_sval = string; + s->ob_slchild = NULL; + s->ob_srightrecursiondepth = 0; + /* + slicesLiveUnrendered--; + slicesLiveRendered++; + */ + } + else if (PyString_CHECK_CONCATENATED(op)) { + register PyStringConcatenationObject *s + = (PyStringConcatenationObject *)op; + register PyStringObject **i; + /* + * if the string is small, we'll just overwrite + * the concatenation parts of the structure with the string + */ + char smallStackBuffer[sizeof(PyStringConcatenationObject) + - PYSTRING_OBJECT_BASE_SIZE]; + int smallEnough = (s->ob_size + 1) < sizeof(smallStackBuffer); + register char *string; + + if (smallEnough) + string = smallStackBuffer; + else + string = (char *)PyObject_Malloc(s->ob_size + 1); + /* NEEDSWORK check for failure here! */ + + recursiveConcatenate(string, s->ob_size, s); + + string[s->ob_size] = 0; + + for (i = s->ob_sstrings + s->ob_sstringsindex - 1; + i >= s->ob_sstrings; + i--) { + Py_DECREF(*i); +#ifdef Py_DEBUG + *i = NULL; +#endif /* Py_DEBUG */ + } + + if (smallEnough) { + s->ob_sval = ((PyStringObject *)s)->ob_svalstorage; + memcpy(s->ob_sval, smallStackBuffer, s->ob_size + 1); + /* s is no longer a concatenation object! */ + PyString_SET_CONCATENATED(s, 0); + } + else { + s->ob_sval = string; + s->ob_sstringsindex = 0; + s->ob_srightrecursiondepth = 0; + } + } + } + + return PyString_AS_STRING_DIRECT(op); } + int PyString_AsStringAndSize(register PyObject *obj, register char **s, @@ -766,6 +1014,72 @@ return 0; } + +/* String slice a[i:j] consists of characters a[i] ... a[j-1] */ + +static PyObject * +string_slice(register PyStringObject *a, register Py_ssize_t i, + register Py_ssize_t j) + /* j -- may be negative! */ +{ + PyStringSliceObject *op; + + if (i < 0) + i = 0; + if (j < 0) + j = 0; /* Avoid signed/unsigned bug in next line */ + if (j > a->ob_size) + j = a->ob_size; + if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) { + /* It's the same as a */ + Py_INCREF(a); + return (PyObject *)a; + } + if (j < i) + j = i; + + if (!a->ob_sval && PyString_CHECK_SLICE(a)) { + /* + * if you take the slice of an unrendered slice, + * just slice further into the original. + * (a rendered slice will have dropped its reference + * to the grandparent.) + */ + PyStringSliceObject *child = (PyStringSliceObject *)a; + PyStringObject *grandchild = child->ob_slchild; + i += child->ob_slstart; + i = min(i, grandchild->ob_size); + j += child->ob_slstart; + j = min(j, grandchild->ob_size); + a = grandchild; + } + if (i == j) + return PyString_FromStringAndSize(NULL, 0); + + if ( ((j-i) < PYSTRING_MINIMUM_SIZE_FOR_SLICE_OBJECT) + || ((PyString_GET_RIGHT_RECURSION_DEPTH(a) + 1) >= PYSTRING_RIGHTRECURSIONDEPTH) ) + return PyString_FromStringAndSize(PyString_AS_STRING(a) + i, j-i); + + op = (PyStringSliceObject *)PyObject_MALLOC(sizeof(PyStringSliceObject)); + if (op == NULL) + return PyErr_NoMemory(); + PyObject_INIT_VAR(op, &PyString_Type, j-i); + op->ob_shash = -1; + op->ob_sstate = SSTATE_NOT_INTERNED | SSTATE_SLICE; + op->ob_sval = NULL; + + op->ob_slstart = i; + op->ob_slend = j; + op->ob_slchild = a; + op->ob_srightrecursiondepth = PyString_GET_RIGHT_RECURSION_DEPTH(a) + 1; + Py_INCREF(a); +/* + slicesLiveUnrendered++; +*/ + return (PyObject *) op; +} + + /* -------------------------------------------------------------------- */ /* Methods */ @@ -774,7 +1088,8 @@ #define STRINGLIB_CMP memcmp #define STRINGLIB_LEN PyString_GET_SIZE #define STRINGLIB_NEW PyString_FromStringAndSize -#define STRINGLIB_STR PyString_AS_STRING +#define STRINGLIB_SLICE(s, i, j) string_slice((PyStringObject *)(s), (i), (j)) +#define STRINGLIB_STR(x) PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)x) #define STRINGLIB_EMPTY nullstring @@ -791,6 +1106,8 @@ Py_ssize_t i; char c; int quote; + char *data; + Py_ssize_t size; /* XXX Ought to check for interrupts when writing long strings */ if (! PyString_CheckExact(op)) { @@ -803,9 +1120,9 @@ Py_DECREF(op); return ret; } + data = PYSTRING_AS_UNTERMINATED_STRING(op); + size = op->ob_size; if (flags & Py_PRINT_RAW) { - char *data = op->ob_sval; - Py_ssize_t size = op->ob_size; while (size > INT_MAX) { /* Very long strings cannot be written atomically. * But don't write exactly INT_MAX bytes at a time @@ -816,23 +1133,24 @@ data += chunk_size; size -= chunk_size; } + if (size) #ifdef __VMS - if (size) fwrite(data, (int)size, 1, fp); + fwrite(data, (int)size, 1, fp); #else - fwrite(data, 1, (int)size, fp); + fwrite(data, 1, (int)size, fp); #endif return 0; } /* figure out which quote to use; single is preferred */ quote = '\''; - if (memchr(op->ob_sval, '\'', op->ob_size) && - !memchr(op->ob_sval, '"', op->ob_size)) + if (memchr(data, '\'', size) && + !memchr(data, '"', size)) quote = '"'; fputc(quote, fp); - for (i = 0; i < op->ob_size; i++) { - c = op->ob_sval[i]; + for (i = 0; i < size; i++) { + c = data[i]; if (c == quote || c == '\\') fprintf(fp, "\\%c", c); else if (c == '\t') @@ -868,22 +1186,25 @@ register Py_ssize_t i; register char c; register char *p; + register char *pStart; + register char *opValue; int quote; + opValue = PYSTRING_AS_UNTERMINATED_STRING(op); /* figure out which quote to use; single is preferred */ quote = '\''; if (smartquotes && - memchr(op->ob_sval, '\'', op->ob_size) && - !memchr(op->ob_sval, '"', op->ob_size)) + memchr(opValue, '\'', op->ob_size) && + !memchr(opValue, '"', op->ob_size)) quote = '"'; - p = PyString_AS_STRING(v); + p = pStart = PyString_AS_STRING_DIRECT(v); *p++ = quote; for (i = 0; i < op->ob_size; i++) { /* There's at least enough room for a hex escape and a closing quote. */ - assert(newsize - (p - PyString_AS_STRING(v)) >= 5); - c = op->ob_sval[i]; + assert(newsize - (p - pStart) >= 5); + c = opValue[i]; if (c == quote || c == '\\') *p++ = '\\', *p++ = c; else if (c == '\t') @@ -902,11 +1223,10 @@ else *p++ = c; } - assert(newsize - (p - PyString_AS_STRING(v)) >= 1); + assert(newsize - (p - pStart) >= 1); *p++ = quote; *p = '\0'; - _PyString_Resize( - &v, (p - PyString_AS_STRING(v))); + _PyString_Resize(&v, (p - pStart)); return v; } } @@ -928,7 +1248,8 @@ else { /* Subtype -- return genuine string with the same value. */ PyStringObject *t = (PyStringObject *) s; - return PyString_FromStringAndSize(t->ob_sval, t->ob_size); + return PyString_FromStringAndSize(PyString_AS_STRING(t), + t->ob_size); } } @@ -938,11 +1259,21 @@ return a->ob_size; } + +static void +string_render_if_too_deep(register PyStringConcatenationObject *op) +{ + if (PyString_GET_RIGHT_RECURSION_DEPTH(op) + >= PYSTRING_RIGHTRECURSIONDEPTH) { + PyString_AsString((PyObject *)op); + } +} + static PyObject * string_concat(register PyStringObject *a, register PyObject *bb) { register Py_ssize_t size; - register PyStringObject *op; + register PyStringConcatenationObject *op; if (!PyString_Check(bb)) { #ifdef Py_USING_UNICODE if (PyUnicode_Check(bb)) @@ -970,17 +1301,85 @@ "strings are too large to concat"); return NULL; } - - /* Inline PyObject_NewVar */ - op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size); + + + /* + * if *left* side is already a concatenation object, + * and hasn't been rendered yet, + * and only has one reference, + * and has room, + * just append to it. + */ + if (PyString_CHECK_CONCATENATED(a) + && (a->ob_sval == NULL) + && (a->ob_refcnt == 1)) { + op = (PyStringConcatenationObject *)a; + if (op->ob_sstringsindex < PYSTRING_CONCATENATIONS) { + Py_INCREF(b); + op->ob_sstrings[op->ob_sstringsindex++] = b; + op->ob_size += b->ob_size; + + op->ob_srightrecursiondepth + = max(op->ob_srightrecursiondepth, + PyString_GET_RIGHT_RECURSION_DEPTH(b) + 1); + string_render_if_too_deep(op); + + Py_INCREF(op); + return (PyObject *)op; + } + } + + /* + * else, + * if *right* side is already a concatenation object, + * and hasn't been rendered yet, + * and only has one reference, + * and has room, + * just append to it. + */ + if (PyString_CHECK_CONCATENATED(b) + && (b->ob_sval == NULL) + && (b->ob_refcnt == 1)) { + op = (PyStringConcatenationObject *)b; + if (op->ob_sstringsindex < PYSTRING_CONCATENATIONS) { + memmove(op->ob_sstrings + 1, op->ob_sstrings, + op->ob_sstringsindex * sizeof(PyStringObject *)); + Py_INCREF(a); + op->ob_sstrings[0] = a; + op->ob_sstringsindex++; + op->ob_size += a->ob_size; + + op->ob_srightrecursiondepth + = max(op->ob_srightrecursiondepth, + PyString_GET_RIGHT_RECURSION_DEPTH(op->ob_sstrings[1]) + + 1); + string_render_if_too_deep(op); + + Py_INCREF(op); + return (PyObject *)op; + } + } + + op = (PyStringConcatenationObject *)PyObject_MALLOC( + sizeof(PyStringConcatenationObject)); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; - op->ob_sstate = SSTATE_NOT_INTERNED; - Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size); - Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size); - op->ob_sval[size] = '\0'; + op->ob_sstate = SSTATE_NOT_INTERNED | SSTATE_CONCATENATION; +#ifdef Py_DEBUG + memset(op->ob_sstrings, 0, sizeof(op->ob_sstrings)); +#endif /* Py_DEBUG */ + op->ob_sstringsindex = 2; + op->ob_sval = NULL; + op->ob_sstrings[0] = a; + op->ob_sstrings[1] = b; + Py_INCREF(a); + Py_INCREF(b); + + op->ob_srightrecursiondepth = PyString_GET_RIGHT_RECURSION_DEPTH(b) + 1; + string_render_if_too_deep(op); + return (PyObject *) op; #undef b } @@ -992,6 +1391,7 @@ register Py_ssize_t j; register Py_ssize_t size; register PyStringObject *op; + register char *avalue; size_t nbytes; if (n < 0) n = 0; @@ -1015,20 +1415,22 @@ return NULL; } op = (PyStringObject *) - PyObject_MALLOC(sizeof(PyStringObject) + nbytes); + PyObject_MALLOC(PYSTRING_OBJECT_BASE_SIZE + nbytes + 1); if (op == NULL) return PyErr_NoMemory(); PyObject_INIT_VAR(op, &PyString_Type, size); op->ob_shash = -1; op->ob_sstate = SSTATE_NOT_INTERNED; + op->ob_sval = op->ob_svalstorage; op->ob_sval[size] = '\0'; + avalue = PYSTRING_AS_UNTERMINATED_STRING(a); if (a->ob_size == 1 && n > 0) { - memset(op->ob_sval, a->ob_sval[0] , n); + memset(op->ob_sval, *avalue , n); return (PyObject *) op; } i = 0; if (i < size) { - Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size); + Py_MEMCPY(op->ob_sval, avalue, a->ob_size); i = a->ob_size; } while (i < size) { @@ -1039,29 +1441,7 @@ return (PyObject *) op; } -/* String slice a[i:j] consists of characters a[i] ... a[j-1] */ -static PyObject * -string_slice(register PyStringObject *a, register Py_ssize_t i, - register Py_ssize_t j) - /* j -- may be negative! */ -{ - if (i < 0) - i = 0; - if (j < 0) - j = 0; /* Avoid signed/unsigned bug in next line */ - if (j > a->ob_size) - j = a->ob_size; - if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) { - /* It's the same as a */ - Py_INCREF(a); - return (PyObject *)a; - } - if (j < i) - j = i; - return PyString_FromStringAndSize(a->ob_sval + i, j-i); -} - static int string_contains(PyObject *str_obj, PyObject *sub_obj) { @@ -1089,7 +1469,7 @@ PyErr_SetString(PyExc_IndexError, "string index out of range"); return NULL; } - pchar = a->ob_sval[i]; + pchar = PYSTRING_AS_UNTERMINATED_STRING(a)[i]; v = (PyObject *)characters[pchar & UCHAR_MAX]; if (v == NULL) v = PyString_FromStringAndSize(&pchar, 1); @@ -1109,6 +1489,8 @@ Py_ssize_t len_a, len_b; Py_ssize_t min_len; PyObject *result; + char *aString; + char *bString; /* Make sure both arguments are strings. */ if (!(PyString_Check(a) && PyString_Check(b))) { @@ -1125,13 +1507,14 @@ goto out; } } + aString = PYSTRING_AS_UNTERMINATED_STRING(a); + bString = PYSTRING_AS_UNTERMINATED_STRING(b); if (op == Py_EQ) { /* Supporting Py_NE here as well does not save much time, since Py_NE is rarely used. */ if (a->ob_size == b->ob_size - && (a->ob_sval[0] == b->ob_sval[0] - && memcmp(a->ob_sval, b->ob_sval, - a->ob_size) == 0)) { + && (aString[0] == bString[0] + && !memcmp(aString, bString, a->ob_size))) { result = Py_True; } else { result = Py_False; @@ -1141,9 +1524,9 @@ len_a = a->ob_size; len_b = b->ob_size; min_len = (len_a < len_b) ? len_a : len_b; if (min_len > 0) { - c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); + c = Py_CHARMASK(*aString) - Py_CHARMASK(*bString); if (c==0) - c = memcmp(a->ob_sval, b->ob_sval, min_len); + c = memcmp(aString, bString, min_len); }else c = 0; if (c == 0) @@ -1170,9 +1553,14 @@ { PyStringObject *a = (PyStringObject*) o1; PyStringObject *b = (PyStringObject*) o2; - return a->ob_size == b->ob_size - && *a->ob_sval == *b->ob_sval - && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0; + char *aString; + char *bString; + if (a->ob_size != b->ob_size) + return 0; + aString = PYSTRING_AS_UNTERMINATED_STRING(a); + bString = PYSTRING_AS_UNTERMINATED_STRING(b); + return (*aString == *bString) + && !memcmp(aString, bString, a->ob_size); } static long @@ -1185,7 +1573,7 @@ if (a->ob_shash != -1) return a->ob_shash; len = a->ob_size; - p = (unsigned char *) a->ob_sval; + p = (unsigned char *)PYSTRING_AS_UNTERMINATED_STRING(a); x = *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; @@ -1222,8 +1610,10 @@ if (slicelength <= 0) { return PyString_FromStringAndSize("", 0); } + else if (step == 1) + return string_slice(self, start, stop); else { - source_buf = PyString_AsString((PyObject*)self); + source_buf = PYSTRING_AS_UNTERMINATED_STRING(self); result_buf = (char *)PyMem_Malloc(slicelength); if (result_buf == NULL) return PyErr_NoMemory(); @@ -1254,7 +1644,7 @@ "accessing non-existent string segment"); return -1; } - *ptr = (void *)self->ob_sval; + *ptr = (void *)PYSTRING_AS_UNTERMINATED_STRING(self); return self->ob_size; } @@ -1282,7 +1672,7 @@ "accessing non-existent string segment"); return -1; } - *ptr = self->ob_sval; + *ptr = PyString_AS_STRING(self); return self->ob_size; } @@ -1343,8 +1733,7 @@ (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) #define SPLIT_APPEND(data, left, right) \ - str = PyString_FromStringAndSize((data) + (left), \ - (right) - (left)); \ + str = string_slice((data), (left), (right)); \ if (str == NULL) \ goto onError; \ if (PyList_Append(list, str)) { \ @@ -1355,8 +1744,7 @@ Py_DECREF(str); #define SPLIT_ADD(data, left, right) { \ - str = PyString_FromStringAndSize((data) + (left), \ - (right) - (left)); \ + str = string_slice((data), (left), (right)); \ if (str == NULL) \ goto onError; \ if (count < MAX_PREALLOC) { \ @@ -1380,7 +1768,7 @@ #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; } Py_LOCAL_INLINE(PyObject *) -split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) +split_whitespace(PyStringObject *self, const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { Py_ssize_t i, j, count=0; PyObject *str; @@ -1396,7 +1784,7 @@ if (i==len) break; j = i; i++; SKIP_NONSPACE(s, i, len); - SPLIT_ADD(s, j, i); + SPLIT_ADD(self, j, i); } if (i < len) { @@ -1404,7 +1792,7 @@ /* Skip any remaining whitespace and copy to end of string */ SKIP_SPACE(s, i, len); if (i != len) - SPLIT_ADD(s, i, len); + SPLIT_ADD(self, i, len); } FIX_PREALLOC_SIZE(list); return list; @@ -1414,7 +1802,7 @@ } Py_LOCAL_INLINE(PyObject *) -split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) +split_char(PyStringObject *self, const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) { register Py_ssize_t i, j, count=0; PyObject *str; @@ -1428,14 +1816,14 @@ for(; j 0)) { for (; j+n <= len; j++) { if (Py_STRING_MATCH(s, j, sub, n)) { - SPLIT_ADD(s, i, j); + SPLIT_ADD(self, i, j); i = j = j + n; break; } } } #endif - SPLIT_ADD(s, i, len); + SPLIT_ADD(self, i, len); FIX_PREALLOC_SIZE(list); return list; @@ -1537,7 +1925,7 @@ Py_ssize_t sep_len; if (PyString_Check(sep_obj)) { - sep = PyString_AS_STRING(sep_obj); + sep = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)sep_obj); sep_len = PyString_GET_SIZE(sep_obj); } #ifdef Py_USING_UNICODE @@ -1549,7 +1937,7 @@ return stringlib_partition( (PyObject*) self, - PyString_AS_STRING(self), PyString_GET_SIZE(self), + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self), sep_obj, sep, sep_len ); } @@ -1568,7 +1956,7 @@ Py_ssize_t sep_len; if (PyString_Check(sep_obj)) { - sep = PyString_AS_STRING(sep_obj); + sep = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)sep_obj); sep_len = PyString_GET_SIZE(sep_obj); } #ifdef Py_USING_UNICODE @@ -1580,13 +1968,13 @@ return stringlib_rpartition( (PyObject*) self, - PyString_AS_STRING(self), PyString_GET_SIZE(self), + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self), sep_obj, sep, sep_len ); } Py_LOCAL_INLINE(PyObject *) -rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit) +rsplit_whitespace(PyStringObject *self, const char *s, Py_ssize_t len, Py_ssize_t maxsplit) { Py_ssize_t i, j, count=0; PyObject *str; @@ -1602,14 +1990,14 @@ if (i<0) break; j = i; i--; RSKIP_NONSPACE(s, i); - SPLIT_ADD(s, i + 1, j + 1); + SPLIT_ADD(self, i + 1, j + 1); } if (i >= 0) { /* Only occurs when maxsplit was reached */ /* Skip any remaining whitespace and copy to beginning of string */ RSKIP_SPACE(s, i); if (i >= 0) - SPLIT_ADD(s, 0, i + 1); + SPLIT_ADD(self, 0, i + 1); } FIX_PREALLOC_SIZE(list); @@ -1622,7 +2010,7 @@ } Py_LOCAL_INLINE(PyObject *) -rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) +rsplit_char(PyStringObject *self, const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount) { register Py_ssize_t i, j, count=0; PyObject *str; @@ -1635,14 +2023,14 @@ while ((i >= 0) && (maxcount-- > 0)) { for (; i >= 0; i--) { if (s[i] == ch) { - SPLIT_ADD(s, i + 1, j + 1); + SPLIT_ADD(self, i + 1, j + 1); j = i = i - 1; break; } } } if (j >= -1) { - SPLIT_ADD(s, 0, j + 1); + SPLIT_ADD(self, 0, j + 1); } FIX_PREALLOC_SIZE(list); if (PyList_Reverse(list) < 0) @@ -1668,7 +2056,7 @@ { Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; Py_ssize_t maxsplit = -1, count=0; - const char *s = PyString_AS_STRING(self), *sub; + const char *s = PYSTRING_AS_UNTERMINATED_STRING(self), *sub; PyObject *list, *str, *subobj = Py_None; if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) @@ -1676,9 +2064,9 @@ if (maxsplit < 0) maxsplit = PY_SSIZE_T_MAX; if (subobj == Py_None) - return rsplit_whitespace(s, len, maxsplit); + return rsplit_whitespace(self, s, len, maxsplit); if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); + sub = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)subobj); n = PyString_GET_SIZE(subobj); } #ifdef Py_USING_UNICODE @@ -1693,7 +2081,7 @@ return NULL; } else if (n == 1) - return rsplit_char(s, len, sub[0], maxsplit); + return rsplit_char(self, s, len, sub[0], maxsplit); list = PyList_New(PREALLOC_SIZE(maxsplit)); if (list == NULL) @@ -1705,14 +2093,14 @@ while ( (i >= 0) && (maxsplit-- > 0) ) { for (; i>=0; i--) { if (Py_STRING_MATCH(s, i, sub, n)) { - SPLIT_ADD(s, i + n, j); + SPLIT_ADD(self, i + n, j); j = i; i -= n; break; } } } - SPLIT_ADD(s, 0, j); + SPLIT_ADD(self, 0, j); FIX_PREALLOC_SIZE(list); if (PyList_Reverse(list) < 0) goto onError; @@ -1733,7 +2121,7 @@ static PyObject * string_join(PyStringObject *self, PyObject *orig) { - char *sep = PyString_AS_STRING(self); + char *sep = PYSTRING_AS_UNTERMINATED_STRING(self); const Py_ssize_t seplen = PyString_GET_SIZE(self); PyObject *res = NULL; char *p; @@ -1810,18 +2198,19 @@ } /* Catenate everything. */ - p = PyString_AS_STRING(res); + p = PyString_AS_STRING_DIRECT(res); for (i = 0; i < seqlen; ++i) { size_t n; item = PySequence_Fast_GET_ITEM(seq, i); n = PyString_GET_SIZE(item); - Py_MEMCPY(p, PyString_AS_STRING(item), n); + Py_MEMCPY(p, PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)item), n); p += n; if (i < seqlen - 1) { Py_MEMCPY(p, sep, seplen); p += seplen; } } + *p = 0; Py_DECREF(seq); return res; @@ -1862,7 +2251,7 @@ _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return -2; if (PyString_Check(subobj)) { - sub = PyString_AS_STRING(subobj); + sub = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)subobj); sub_len = PyString_GET_SIZE(subobj); } #ifdef Py_USING_UNICODE @@ -1877,11 +2266,11 @@ if (dir > 0) return stringlib_find_slice( - PyString_AS_STRING(self), PyString_GET_SIZE(self), + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self), sub, sub_len, start, end); else return stringlib_rfind_slice( - PyString_AS_STRING(self), PyString_GET_SIZE(self), + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self), sub, sub_len, start, end); } @@ -1967,9 +2356,9 @@ Py_LOCAL_INLINE(PyObject *) do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) { - char *s = PyString_AS_STRING(self); + char *s = PYSTRING_AS_UNTERMINATED_STRING(self); Py_ssize_t len = PyString_GET_SIZE(self); - char *sep = PyString_AS_STRING(sepobj); + char *sep = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)sepobj); Py_ssize_t seplen = PyString_GET_SIZE(sepobj); Py_ssize_t i, j; @@ -1993,14 +2382,14 @@ return (PyObject*)self; } else - return PyString_FromStringAndSize(s+i, j-i); + return string_slice(self, i, j); } Py_LOCAL_INLINE(PyObject *) do_strip(PyStringObject *self, int striptype) { - char *s = PyString_AS_STRING(self); + char *s = PYSTRING_AS_UNTERMINATED_STRING(self); Py_ssize_t len = PyString_GET_SIZE(self), i, j; i = 0; @@ -2023,7 +2412,7 @@ return (PyObject*)self; } else - return PyString_FromStringAndSize(s+i, j-i); + return string_slice(self, i, j); } @@ -2139,7 +2528,7 @@ s = PyString_AS_STRING(newobj); - Py_MEMCPY(s, PyString_AS_STRING(self), n); + Py_MEMCPY(s, PYSTRING_AS_UNTERMINATED_STRING(self), n); for (i = 0; i < n; i++) { int c = Py_CHARMASK(s[i]); @@ -2172,7 +2561,7 @@ s = PyString_AS_STRING(newobj); - Py_MEMCPY(s, PyString_AS_STRING(self), n); + Py_MEMCPY(s, PYSTRING_AS_UNTERMINATED_STRING(self), n); for (i = 0; i < n; i++) { int c = Py_CHARMASK(s[i]); @@ -2192,7 +2581,7 @@ static PyObject* string_title(PyStringObject *self) { - char *s = PyString_AS_STRING(self), *s_new; + char *s, *s_new; Py_ssize_t i, n = PyString_GET_SIZE(self); int previous_is_cased = 0; PyObject *newobj; @@ -2200,6 +2589,7 @@ newobj = PyString_FromStringAndSize(NULL, n); if (newobj == NULL) return NULL; + s = PYSTRING_AS_UNTERMINATED_STRING(self); s_new = PyString_AsString(newobj); for (i = 0; i < n; i++) { int c = Py_CHARMASK(*s++); @@ -2227,13 +2617,14 @@ static PyObject * string_capitalize(PyStringObject *self) { - char *s = PyString_AS_STRING(self), *s_new; + char *s, *s_new; Py_ssize_t i, n = PyString_GET_SIZE(self); PyObject *newobj; newobj = PyString_FromStringAndSize(NULL, n); if (newobj == NULL) return NULL; + s = PYSTRING_AS_UNTERMINATED_STRING(self); s_new = PyString_AsString(newobj); if (0 < n) { int c = Py_CHARMASK(*s++); @@ -2266,7 +2657,7 @@ string_count(PyStringObject *self, PyObject *args) { PyObject *sub_obj; - const char *str = PyString_AS_STRING(self), *sub; + const char *str, *sub; Py_ssize_t sub_len; Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; @@ -2293,6 +2684,7 @@ string_adjust_indices(&start, &end, PyString_GET_SIZE(self)); + str = PYSTRING_AS_UNTERMINATED_STRING(self); return PyInt_FromSsize_t( stringlib_count(str + start, end - start, sub, sub_len) ); @@ -2307,7 +2699,7 @@ static PyObject * string_swapcase(PyStringObject *self) { - char *s = PyString_AS_STRING(self), *s_new; + char *s = PYSTRING_AS_UNTERMINATED_STRING(self), *s_new; Py_ssize_t i, n = PyString_GET_SIZE(self); PyObject *newobj; @@ -2357,7 +2749,7 @@ return NULL; if (PyString_Check(tableobj)) { - table1 = PyString_AS_STRING(tableobj); + table1 = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)tableobj); tablen = PyString_GET_SIZE(tableobj); } #ifdef Py_USING_UNICODE @@ -2384,7 +2776,7 @@ if (delobj != NULL) { if (PyString_Check(delobj)) { - del_table = PyString_AS_STRING(delobj); + del_table = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)delobj); dellen = PyString_GET_SIZE(delobj); } #ifdef Py_USING_UNICODE @@ -2408,7 +2800,10 @@ if (result == NULL) return NULL; output_start = output = PyString_AsString(result); - input = PyString_AS_STRING(input_obj); +#ifdef Py_DEBUG + assert(PyString_Check(input_obj)); +#endif /* Py_DEBUG */ + input = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)input_obj); if (dellen == 0) { /* If no deletions are required, use faster code */ @@ -2467,7 +2862,7 @@ return self; } return (PyStringObject *)PyString_FromStringAndSize( - PyString_AS_STRING(self), + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self)); } @@ -2614,8 +3009,8 @@ PyString_FromStringAndSize(NULL, result_len)) ) return NULL; - self_s = PyString_AS_STRING(self); - result_s = PyString_AS_STRING(result); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); + result_s = PyString_AS_STRING_DIRECT(result); /* TODO: special case single character, which doesn't need memcpy */ @@ -2649,7 +3044,7 @@ PyStringObject *result; self_len = PyString_GET_SIZE(self); - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); count = countchar(self_s, self_len, from_c, maxcount); if (count == 0) { @@ -2662,7 +3057,7 @@ if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; - result_s = PyString_AS_STRING(result); + result_s = PyString_AS_STRING_DIRECT(result); start = self_s; end = self_s + self_len; @@ -2692,7 +3087,7 @@ PyStringObject *result; self_len = PyString_GET_SIZE(self); - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); count = countstring(self_s, self_len, from_s, from_len, @@ -2711,7 +3106,7 @@ PyString_FromStringAndSize(NULL, result_len)) == NULL ) return NULL; - result_s = PyString_AS_STRING(result); + result_s = PyString_AS_STRING_DIRECT(result); start = self_s; end = self_s + self_len; @@ -2743,7 +3138,7 @@ PyStringObject *result; /* The result string will be the same size */ - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); self_len = PyString_GET_SIZE(self); next = findchar(self_s, self_len, from_c); @@ -2757,7 +3152,7 @@ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) return NULL; - result_s = PyString_AS_STRING(result); + result_s = PyString_AS_STRING_DIRECT(result); Py_MEMCPY(result_s, self_s, self_len); /* change everything in-place, starting with this one */ @@ -2791,7 +3186,7 @@ /* The result string will be the same size */ - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); self_len = PyString_GET_SIZE(self); offset = findstring(self_s, self_len, @@ -2806,7 +3201,7 @@ result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); if (result == NULL) return NULL; - result_s = PyString_AS_STRING(result); + result_s = PyString_AS_STRING_DIRECT(result); Py_MEMCPY(result_s, self_s, self_len); /* change everything in-place, starting with this one */ @@ -2841,7 +3236,7 @@ Py_ssize_t count, product; PyStringObject *result; - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); self_len = PyString_GET_SIZE(self); count = countchar(self_s, self_len, from_c, maxcount); @@ -2907,7 +3302,7 @@ Py_ssize_t count, offset, product; PyStringObject *result; - self_s = PyString_AS_STRING(self); + self_s = PYSTRING_AS_UNTERMINATED_STRING(self); self_len = PyString_GET_SIZE(self); count = countstring(self_s, self_len, @@ -2934,7 +3329,7 @@ if ( (result = (PyStringObject *) PyString_FromStringAndSize(NULL, result_len)) == NULL) return NULL; - result_s = PyString_AS_STRING(result); + result_s = PyString_AS_STRING_DIRECT(result); start = self_s; end = self_s + self_len; @@ -3055,7 +3450,7 @@ return NULL; if (PyString_Check(from)) { - from_s = PyString_AS_STRING(from); + from_s = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)from); from_len = PyString_GET_SIZE(from); } #ifdef Py_USING_UNICODE @@ -3067,7 +3462,7 @@ return NULL; if (PyString_Check(to)) { - to_s = PyString_AS_STRING(to); + to_s = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)to); to_len = PyString_GET_SIZE(to); } #ifdef Py_USING_UNICODE @@ -3099,7 +3494,7 @@ const char* str; if (PyString_Check(substr)) { - sub = PyString_AS_STRING(substr); + sub = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)substr); slen = PyString_GET_SIZE(substr); } #ifdef Py_USING_UNICODE @@ -3109,7 +3504,7 @@ #endif else if (PyObject_AsCharBuffer(substr, &sub, &slen)) return -1; - str = PyString_AS_STRING(self); + str = PYSTRING_AS_UNTERMINATED_STRING(self); string_adjust_indices(&start, &end, len); @@ -3307,12 +3702,13 @@ /* First pass: determine size of output string */ i = j = 0; - e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); - for (p = PyString_AS_STRING(self); p < e; p++) + p = PYSTRING_AS_UNTERMINATED_STRING(self); + e = p + PyString_GET_SIZE(self); + for (; p < e; p++) { if (*p == '\t') { - if (tabsize > 0) - j += tabsize - (j % tabsize); - } + if (tabsize > 0) + j += tabsize - (j % tabsize); + } else { j++; if (*p == '\n' || *p == '\r') { @@ -3320,6 +3716,7 @@ j = 0; } } + } /* Second pass: create output string and fill it */ u = PyString_FromStringAndSize(NULL, i + j); @@ -3327,23 +3724,25 @@ return NULL; j = 0; - q = PyString_AS_STRING(u); + q = PyString_AS_STRING_DIRECT(u); - for (p = PyString_AS_STRING(self); p < e; p++) + for (p = PYSTRING_AS_UNTERMINATED_STRING(self); p < e; p++) + { if (*p == '\t') { - if (tabsize > 0) { - i = tabsize - (j % tabsize); - j += i; - while (i--) - *q++ = ' '; - } - } - else { + if (tabsize > 0) { + i = tabsize - (j % tabsize); + j += i; + while (i--) + *q++ = ' '; + } + } + else { j++; - *q++ = *p; + *q++ = *p; if (*p == '\n' || *p == '\r') j = 0; } + } return u; } @@ -3364,12 +3763,12 @@ } u = PyString_FromStringAndSize(NULL, - left + PyString_GET_SIZE(self) + right); + left + PyString_GET_SIZE(self) + right); if (u) { if (left) - memset(PyString_AS_STRING(u), fill, left); - Py_MEMCPY(PyString_AS_STRING(u) + left, - PyString_AS_STRING(self), + memset(PyString_AS_STRING_DIRECT(u), fill, left); + Py_MEMCPY(PyString_AS_STRING_DIRECT(u) + left, + PYSTRING_AS_UNTERMINATED_STRING(self), PyString_GET_SIZE(self)); if (right) memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), @@ -3472,15 +3871,7 @@ return NULL; if (PyString_GET_SIZE(self) >= width) { - if (PyString_CheckExact(self)) { - Py_INCREF(self); - return (PyObject*) self; - } - else - return PyString_FromStringAndSize( - PyString_AS_STRING(self), - PyString_GET_SIZE(self) - ); + return (PyObject *)return_self(self); } fill = width - PyString_GET_SIZE(self); @@ -3490,7 +3881,7 @@ if (s == NULL) return NULL; - p = PyString_AS_STRING(s); + p = PyString_AS_STRING_DIRECT(s); if (p[fill] == '+' || p[fill] == '-') { /* move sign to beginning of string */ p[0] = p[fill]; @@ -3510,23 +3901,22 @@ string_isspace(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - isspace(*p)) - return PyBool_FromLong(1); - /* Special case for empty strings */ if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); + return PyBool_FromLong(0); + /* Shortcut for single character strings */ + if (PyString_GET_SIZE(self) == 1) + return PyBool_FromLong(isspace(*p)); + e = p + PyString_GET_SIZE(self); for (; p < e; p++) { - if (!isspace(*p)) - return PyBool_FromLong(0); - } + if (!isspace(*p)) + return PyBool_FromLong(0); + } return PyBool_FromLong(1); } @@ -3541,23 +3931,22 @@ string_isalpha(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - isalpha(*p)) - return PyBool_FromLong(1); - /* Special case for empty strings */ if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); + return PyBool_FromLong(0); + /* Shortcut for single character strings */ + if (PyString_GET_SIZE(self) == 1) + return PyBool_FromLong(isalpha(*p)); + e = p + PyString_GET_SIZE(self); for (; p < e; p++) { - if (!isalpha(*p)) - return PyBool_FromLong(0); - } + if (!isalpha(*p)) + return PyBool_FromLong(0); + } return PyBool_FromLong(1); } @@ -3572,23 +3961,22 @@ string_isalnum(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - isalnum(*p)) - return PyBool_FromLong(1); - /* Special case for empty strings */ if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); + return PyBool_FromLong(0); + /* Shortcut for single character strings */ + if (PyString_GET_SIZE(self) == 1) + return PyBool_FromLong(isalnum(*p)); + e = p + PyString_GET_SIZE(self); for (; p < e; p++) { - if (!isalnum(*p)) - return PyBool_FromLong(0); - } + if (!isalnum(*p)) + return PyBool_FromLong(0); + } return PyBool_FromLong(1); } @@ -3603,23 +3991,22 @@ string_isdigit(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; - /* Shortcut for single character strings */ - if (PyString_GET_SIZE(self) == 1 && - isdigit(*p)) - return PyBool_FromLong(1); - /* Special case for empty strings */ if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); + return PyBool_FromLong(0); + /* Shortcut for single character strings */ + if (PyString_GET_SIZE(self) == 1) + return PyBool_FromLong(isdigit(*p)); + e = p + PyString_GET_SIZE(self); for (; p < e; p++) { - if (!isdigit(*p)) - return PyBool_FromLong(0); - } + if (!isdigit(*p)) + return PyBool_FromLong(0); + } return PyBool_FromLong(1); } @@ -3634,26 +4021,26 @@ string_islower(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; int cased; + /* Special case for empty strings */ + if (PyString_GET_SIZE(self) == 0) + return PyBool_FromLong(0); + /* Shortcut for single character strings */ if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(islower(*p) != 0); + return PyBool_FromLong(islower(*p)); - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - e = p + PyString_GET_SIZE(self); cased = 0; for (; p < e; p++) { - if (isupper(*p)) - return PyBool_FromLong(0); - else if (!cased && islower(*p)) - cased = 1; - } + if (isupper(*p)) + return PyBool_FromLong(0); + else if (!cased && islower(*p)) + cased = 1; + } return PyBool_FromLong(cased); } @@ -3668,26 +4055,26 @@ string_isupper(PyStringObject *self) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; int cased; + /* Special case for empty strings */ + if (PyString_GET_SIZE(self) == 0) + return PyBool_FromLong(0); + /* Shortcut for single character strings */ if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(isupper(*p) != 0); + return PyBool_FromLong(isupper(*p)); - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - e = p + PyString_GET_SIZE(self); cased = 0; for (; p < e; p++) { - if (islower(*p)) - return PyBool_FromLong(0); - else if (!cased && isupper(*p)) - cased = 1; - } + if (islower(*p)) + return PyBool_FromLong(0); + else if (!cased && isupper(*p)) + cased = 1; + } return PyBool_FromLong(cased); } @@ -3704,38 +4091,38 @@ string_istitle(PyStringObject *self, PyObject *uncased) { register const unsigned char *p - = (unsigned char *) PyString_AS_STRING(self); + = (unsigned char *) PYSTRING_AS_UNTERMINATED_STRING(self); register const unsigned char *e; int cased, previous_is_cased; + /* Special case for empty strings */ + if (PyString_GET_SIZE(self) == 0) + return PyBool_FromLong(0); + /* Shortcut for single character strings */ if (PyString_GET_SIZE(self) == 1) - return PyBool_FromLong(isupper(*p) != 0); + return PyBool_FromLong(isupper(*p)); - /* Special case for empty strings */ - if (PyString_GET_SIZE(self) == 0) - return PyBool_FromLong(0); - e = p + PyString_GET_SIZE(self); cased = 0; previous_is_cased = 0; for (; p < e; p++) { - register const unsigned char ch = *p; + register const unsigned char ch = *p; - if (isupper(ch)) { - if (previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else if (islower(ch)) { - if (!previous_is_cased) - return PyBool_FromLong(0); - previous_is_cased = 1; - cased = 1; - } - else - previous_is_cased = 0; + if (isupper(ch)) { + if (previous_is_cased) + return PyBool_FromLong(0); + previous_is_cased = 1; + cased = 1; + } + else if (islower(ch)) { + if (!previous_is_cased) + return PyBool_FromLong(0); + previous_is_cased = 1; + cased = 1; + } + else + previous_is_cased = 0; } return PyBool_FromLong(cased); } @@ -3762,7 +4149,7 @@ if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) return NULL; - data = PyString_AS_STRING(self); + data = PYSTRING_AS_UNTERMINATED_STRING(self); len = PyString_GET_SIZE(self); /* This does not use the preallocated list because splitlines is @@ -3795,11 +4182,11 @@ if (keepends) eol = i; } - SPLIT_APPEND(data, j, eol); + SPLIT_APPEND(self, j, eol); j = i; } if (j < len) { - SPLIT_APPEND(data, j, len); + SPLIT_APPEND(self, j, len); } return list; @@ -3817,9 +4204,32 @@ static PyObject * string_getnewargs(PyStringObject *v) { - return Py_BuildValue("(s#)", v->ob_sval, v->ob_size); + return Py_BuildValue("(s#)", PYSTRING_AS_UNTERMINATED_STRING(v), v->ob_size); } + +/* + * Technically, I feel like this should create a new object which contains a + * duplicate of the original object. Then again, I'm not sure this function + * should exist at all--it's simply + */ +PyDoc_STRVAR(simplify__doc__, + "S.simplify() -> S\n\ + \n\ + Simplifies the internal representation of a string, returning the.\n\ + original string. Only needed for specialized memory-use tuning."); + + +static PyObject * +string_simplify(PyStringObject *v) +{ + if (v->ob_sval == NULL) { + PyString_AsString((PyObject *)v); + } + Py_INCREF(v); + return (PyObject *)v; +} + static PyMethodDef string_methods[] = { @@ -3871,6 +4281,7 @@ {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__}, {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, + {"simplify", (PyCFunction)string_simplify, METH_NOARGS, simplify__doc__}, {NULL, NULL} /* sentinel */ }; @@ -3888,14 +4299,15 @@ if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) return NULL; if (x == NULL) - return PyString_FromString(""); + return PyString_FromStringAndSize(NULL, 0); return PyObject_Str(x); } static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { - PyObject *tmp, *pnew; + PyObject *tmp; + PyStringObject *pnew; Py_ssize_t n; assert(PyType_IsSubtype(type, &PyString_Type)); @@ -3904,15 +4316,16 @@ return NULL; assert(PyString_CheckExact(tmp)); n = PyString_GET_SIZE(tmp); - pnew = type->tp_alloc(type, n); + pnew = (PyStringObject *)type->tp_alloc(type, n); if (pnew != NULL) { - Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); - ((PyStringObject *)pnew)->ob_shash = + pnew->ob_sval = pnew->ob_svalstorage; + Py_MEMCPY(pnew->ob_sval, PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)tmp), n+1); + ((PyStringObject *)pnew)->ob_shash = ((PyStringObject *)tmp)->ob_shash; ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; } Py_DECREF(tmp); - return pnew; + return (PyObject *)pnew; } static PyObject * @@ -4081,7 +4494,7 @@ { register PyObject *v; register PyStringObject *sv; - v = *pv; + v = (PyObject *)*pv; if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 || PyString_CHECK_INTERNED(v)) { *pv = 0; @@ -4089,11 +4502,33 @@ PyErr_BadInternalCall(); return -1; } + + if (PyString_CHECK_CONCATENATED(v) || PyString_CHECK_SLICE(v)) { + char *newString; + sv = (PyStringObject *) *pv; + sv->ob_size = newsize; + + if (sv->ob_sval == NULL) + return 0; + + newString = PyObject_Realloc(sv->ob_sval, + newsize + 1); + if (newString == NULL) { + PyObject_Del(*pv); + PyErr_NoMemory(); + return -1; + } + newString[newsize] = '\0'; + sv->ob_sval = newString; + return 0; + } + /* XXX UNREF/NEWREF interface should be more symmetrical */ _Py_DEC_REFTOTAL; _Py_ForgetReference(v); *pv = (PyObject *) - PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize); + PyObject_REALLOC((char *)v, + PYSTRING_OBJECT_BASE_SIZE + newsize + 1); if (*pv == NULL) { PyObject_Del(v); PyErr_NoMemory(); @@ -4102,6 +4537,7 @@ _Py_NewReference(*pv); sv = (PyStringObject *) *pv; sv->ob_size = newsize; + sv->ob_sval = sv->ob_svalstorage; sv->ob_sval[newsize] = '\0'; sv->ob_shash = -1; /* invalidate cached hash value */ return 0; @@ -4249,7 +4685,7 @@ return NULL; } llen = PyString_Size(result); - if (llen > INT_MAX) { + if (llen > PY_SSIZE_T_MAX) { PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); return NULL; } @@ -4445,13 +4881,13 @@ return NULL; } orig_args = args; - fmt = PyString_AS_STRING(format); + fmt = PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)format); fmtcnt = PyString_GET_SIZE(format); reslen = rescnt = fmtcnt + 100; result = PyString_FromStringAndSize((char *)NULL, reslen); if (result == NULL) return NULL; - res = PyString_AsString(result); + res = PyString_AS_STRING_DIRECT(result); if (PyTuple_Check(args)) { arglen = PyTuple_GET_SIZE(args); argidx = 0; @@ -4470,7 +4906,7 @@ reslen += rescnt; if (_PyString_Resize(&result, reslen) < 0) return NULL; - res = PyString_AS_STRING(result) + res = PyString_AS_STRING_DIRECT(result) + reslen - rescnt; --rescnt; } @@ -4740,8 +5176,8 @@ "unsupported format character '%c' (0x%x) " "at index %zd", c, c, - (Py_ssize_t)(fmt - 1 - - PyString_AsString(format))); + (Py_ssize_t)(fmt - 1 + - PyString_AsString(format))); goto error; } if (sign) { @@ -4860,11 +5296,11 @@ args_owned = 1; /* Take what we have of the result and let the Unicode formatting function format the rest of the input. */ - rescnt = res - PyString_AS_STRING(result); + rescnt = res - PyString_AS_STRING_DIRECT(result); if (_PyString_Resize(&result, rescnt)) goto error; fmtcnt = PyString_GET_SIZE(format) - \ - (fmt - PyString_AS_STRING(format)); + (fmt - PYSTRING_AS_UNTERMINATED_STRING((PyStringObject *)format)); format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); if (format == NULL) goto error; @@ -4924,7 +5360,7 @@ /* The two references in interned are not counted by refcnt. The string deallocator will take care of this */ s->ob_refcnt -= 2; - PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; + PyString_SET_INTERNED(s, SSTATE_INTERNED_MORTAL); } void @@ -4932,7 +5368,7 @@ { PyString_InternInPlace(p); if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { - PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; + PyString_SET_INTERNED(*p, SSTATE_INTERNED_IMMORTAL); Py_INCREF(*p); } } @@ -4996,7 +5432,7 @@ default: Py_FatalError("Inconsistent interned string state."); } - s->ob_sstate = SSTATE_NOT_INTERNED; + PyString_SET_INTERNED(s->ob_sstate, SSTATE_NOT_INTERNED); } Py_DECREF(keys); PyDict_Clear(interned);