diff -r 96c7338bf39a Include/unicodeobject.h
--- a/Include/unicodeobject.h	Wed Oct 06 23:21:18 2010 +0200
+++ b/Include/unicodeobject.h	Thu Oct 07 01:21:22 2010 +0200
@@ -359,8 +359,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 
 #define Py_UNICODE_MATCH(string, offset, substring) \
     ((*((string)->str + (offset)) == *((substring)->str)) && \
-    ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
-     !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
+    ((*((string)->str + (offset) + Py_SIZE(substring)-1) == *((substring)->str + Py_SIZE(substring)-1))) && \
+     !memcmp((string)->str + (offset), (substring)->str, Py_SIZE(substring)*sizeof(Py_UNICODE)))
 
 #ifdef __cplusplus
 extern "C" {
@@ -369,18 +369,18 @@ extern "C" {
 /* --- Unicode Type ------------------------------------------------------- */
 
 typedef struct {
-    PyObject_HEAD
-    Py_ssize_t length;          /* Length of raw Unicode data in buffer */
-    Py_UNICODE *str;            /* Raw Unicode buffer */
-    long hash;                  /* Hash value; -1 if not set */
-    int state;                  /* != 0 if interned. In this case the two
+    PyObject_VAR_HEAD
+    long hash;			/* Hash value; -1 if not set */
+    PyObject *defenc;		/* (Default) Encoded version as Python
+                                   string, or NULL; this is used for
+                                   implementing the buffer protocol */
+    unsigned char state;        /* != 0 if interned. In this case the two
                                  * references from the dictionary to this object
                                  * are *not* counted in ob_refcnt. */
-    PyObject *defenc;           /* (Default) Encoded version as Python
-                                   string, or NULL; this is used for
-                                   implementing the buffer protocol */
+    Py_UNICODE str[1];		/* Raw Unicode buffer */
 } PyUnicodeObject;
 
+
 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 
@@ -394,9 +394,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_T
 
 /* Fast access macros */
 #define PyUnicode_GET_SIZE(op) \
-    (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
+    (assert(PyUnicode_Check(op)), Py_SIZE(op))
 #define PyUnicode_GET_DATA_SIZE(op) \
-    (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
+    (assert(PyUnicode_Check(op)), Py_SIZE(op) * sizeof(Py_UNICODE))
 #define PyUnicode_AS_UNICODE(op) \
     (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
 #define PyUnicode_AS_DATA(op) \
diff -r 96c7338bf39a Lib/test/test_io.py
--- a/Lib/test/test_io.py	Wed Oct 06 23:21:18 2010 +0200
+++ b/Lib/test/test_io.py	Thu Oct 07 01:21:22 2010 +0200
@@ -2479,9 +2479,9 @@ class MiscIOTest(unittest.TestCase):
         self.assertRaises(TypeError, self.BlockingIOError, 1, "", None)
         b = self.BlockingIOError(1, "")
         self.assertEqual(b.characters_written, 0)
-        class C(str):
+        class C:
             pass
-        c = C("")
+        c = C()
         b = self.BlockingIOError(1, c)
         c.b = b
         b.c = c
diff -r 96c7338bf39a Lib/test/test_sys.py
--- a/Lib/test/test_sys.py	Wed Oct 06 23:21:18 2010 +0200
+++ b/Lib/test/test_sys.py	Thu Oct 07 01:21:22 2010 +0200
@@ -893,10 +893,11 @@ class SizeofTest(unittest.TestCase):
         # unicode
         usize = len('\0'.encode('unicode-internal'))
         samples = ['', '1'*100]
+        ucode = {2: 'H', 4: 'I'}[usize]
         # we need to test for both sizes, because we don't know if the string
         # has been cached
         for s in samples:
-            basicsize =  size(h + 'PPliP') + usize * (len(s) + 1)
+            basicsize =  struct.calcsize(vh + 'lPb' + '%d%s' % (len(s) + 1, ucode))
             check(s, basicsize)
         # weakref
         import weakref
diff -r 96c7338bf39a Objects/stringlib/eq.h
--- a/Objects/stringlib/eq.h	Wed Oct 06 23:21:18 2010 +0200
+++ b/Objects/stringlib/eq.h	Thu Oct 07 01:21:22 2010 +0200
@@ -9,13 +9,13 @@ unicode_eq(PyObject *aa, PyObject *bb)
     register PyUnicodeObject *a = (PyUnicodeObject *)aa;
     register PyUnicodeObject *b = (PyUnicodeObject *)bb;
 
-    if (a->length != b->length)
+    if (PyUnicode_GET_SIZE(a) != PyUnicode_GET_SIZE(b))
         return 0;
-    if (a->length == 0)
+    if (PyUnicode_GET_SIZE(a) == 0)
         return 1;
     if (a->str[0] != b->str[0])
         return 0;
-    if (a->length == 1)
+    if (PyUnicode_GET_SIZE(a) == 1)
         return 1;
-    return memcmp(a->str, b->str, a->length * sizeof(Py_UNICODE)) == 0;
+    return memcmp(a->str, b->str, PyUnicode_GET_DATA_SIZE(a)) == 0;
 }
diff -r 96c7338bf39a Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Wed Oct 06 23:21:18 2010 +0200
+++ b/Objects/unicodeobject.c	Thu Oct 07 01:21:22 2010 +0200
@@ -46,32 +46,38 @@ OF OR IN CONNECTION WITH THE USE OR PERF
 #include "unicodeobject.h"
 #include "ucnhash.h"
 
+#include <stddef.h>
+
 #ifdef MS_WINDOWS
 #include <windows.h>
 #endif
 
-/* Limit for the Unicode object free list */
-
-#define PyUnicode_MAXFREELIST       1024
-
-/* Limit for the Unicode object free list stay alive optimization.
+/* PyUnicodeObject_SIZE gives the basic physical size of an unicode string;
+   any memory allocation for a string of length n should request
+   (PyUnicodeObject_SIZE + n * sizeof(Py_UNICODE)) bytes.
+
+   Using PyUnicodeObject_SIZE instead of sizeof(PyUnicodeObject) saves
+   3 bytes per string allocation on a typical system.
+*/
+#define PyUnicodeObject_SIZE (offsetof(PyUnicodeObject, str) + sizeof(Py_UNICODE))
+
+
+/* Number of free lists, one per unicode object size.
 
    The implementation will keep allocated Unicode memory intact for
-   all objects on the free list having a size less than this
-   limit. This reduces malloc() overhead for small Unicode objects.
-
-   At worst this will result in PyUnicode_MAXFREELIST *
-   (sizeof(PyUnicodeObject) + KEEPALIVE_SIZE_LIMIT +
-   malloc()-overhead) bytes of unused garbage.
+   objects having a size less than this limit, within a certain number
+   of objects for each size (as defined by the CAN_SAVE macro below).
 
    Setting the limit to 0 effectively turns the feature off.
-
-   Note: This is an experimental feature ! If you get core dumps when
-   using Unicode objects, turn this feature off.
-
 */
 
-#define KEEPALIVE_SIZE_LIMIT       9
+#define MAX_SAVED_SIZE 100
+
+/* We keep lots of small objects in the free lists, but less larger ones. */
+
+#define CAN_SAVE(obj_length, list_size) \
+        ((obj_length < 20 && list_size < 50) \
+        || (list_size < 3))
 
 /* Endianness switches; defaults to little endian */
 
@@ -103,9 +109,8 @@ extern "C" {
 */
 static PyObject *interned;
 
-/* Free list for Unicode objects */
-static PyUnicodeObject *free_list;
-static int numfree;
+/* Free lists for Unicode objects */
+static PyUnicodeObject *unicode_freelist[MAX_SAVED_SIZE];
 
 /* The empty Unicode object is shared to improve performance. */
 static PyUnicodeObject *unicode_empty;
@@ -257,61 +262,74 @@ Py_LOCAL_INLINE(int) unicode_member(Py_U
 /* --- Unicode Object ----------------------------------------------------- */
 
 static
-int unicode_resize(register PyUnicodeObject *unicode,
-                   Py_ssize_t length)
-{
-    void *oldstr;
-
-    /* Shortcut if there's nothing much to do. */
-    if (unicode->length == length)
+PyUnicodeObject *_PyUnicode_New(Py_ssize_t length);
+
+static
+PyUnicodeObject *unicode_resize(register PyUnicodeObject *unicode,
+    Py_ssize_t length)
+{
+    PyUnicodeObject *v;
+
+    /* Optimization for empty strings; yes, this sometimes happens. */
+    if (length == 0 && unicode_empty != NULL) {
+        Py_DECREF(unicode);
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+
+    /* Resizing unicode_empty and single character objects is not
+       possible since these are being shared. We simply return a fresh
+       copy with the same Unicode content. */
+    if (PyUnicode_GET_SIZE(unicode) != length &&
+        (unicode == unicode_empty || PyUnicode_GET_SIZE(unicode) == 1)) {
+        v = _PyUnicode_New(length);
+        if (v == NULL)
+            return NULL;
+        Py_UNICODE_COPY(v->str, unicode->str,
+            length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode));
+        Py_DECREF(unicode);
+        return v;
+    }
+
+    /* PyObject_REALLOC will almost always return a new memory block, so try
+       to find an existing one instead */
+    if (length < MAX_SAVED_SIZE && (v = unicode_freelist[length])) {
+        unicode_freelist[length] = (PyUnicodeObject *) v->defenc;
+        v->defenc = NULL;
+        v->state = 0;
+        Py_UNICODE_COPY(v->str, unicode->str,
+            length < PyUnicode_GET_SIZE(unicode) ? length : PyUnicode_GET_SIZE(unicode));
+        Py_DECREF(unicode);
         goto reset;
-
-    /* Resizing shared object (unicode_empty or single character
-       objects) in-place is not allowed. Use PyUnicode_Resize()
-       instead ! */
-
-    if (unicode == unicode_empty ||
-        (unicode->length == 1 &&
-         unicode->str[0] < 256U &&
-         unicode_latin1[unicode->str[0]] == unicode)) {
-        PyErr_SetString(PyExc_SystemError,
-                        "can't resize shared str objects");
-        return -1;
-    }
-
-    /* We allocate one more byte to make sure the string is Ux0000 terminated.
-       The overallocation is also used by fastsearch, which assumes that it's
-       safe to look at str[length] (without making any assumptions about what
-       it contains). */
-
-    oldstr = unicode->str;
-    unicode->str = PyObject_REALLOC(unicode->str,
-                                    sizeof(Py_UNICODE) * (length + 1));
-    if (!unicode->str) {
-        unicode->str = (Py_UNICODE *)oldstr;
+    }
+
+    /* Adapted from similar code in tupleobject */
+    _Py_DEC_REFTOTAL;
+    _Py_ForgetReference(unicode);
+    v = (PyUnicodeObject *) PyObject_REALLOC((char *) unicode,
+        PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE));
+    if (v == NULL) {
+        PyObject_DEL(unicode);
         PyErr_NoMemory();
-        return -1;
-    }
-    unicode->str[length] = 0;
-    unicode->length = length;
-
+        return NULL;
+    }
   reset:
+    Py_SIZE(v) = length;
+    v->str[length] = 0;
     /* Reset the object caches */
-    if (unicode->defenc) {
-        Py_CLEAR(unicode->defenc);
-    }
-    unicode->hash = -1;
-
-    return 0;
-}
-
-/* We allocate one more byte to make sure the string is
-   Ux0000 terminated; some code (e.g. new_identifier)
-   relies on that.
+    Py_CLEAR(v->defenc);
+    v->hash = -1;
+    _Py_NewReference(v);
+    return v;
+}
+
+/* We allocate one more byte to make sure the string is Ux0000 terminated.
+   The overallocation is also used by fastsearch, which assumes that it's
+   safe to look at str[length] (without making any assumptions about what
+   it contains).
 
    XXX This allocator could further be enhanced by assuring that the
    free list never reduces its size below 1.
-
 */
 
 static
@@ -331,38 +349,22 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize
     }
 
     /* Unicode freelist & memory allocation */
-    if (free_list) {
-        unicode = free_list;
-        free_list = *(PyUnicodeObject **)unicode;
-        numfree--;
-        if (unicode->str) {
-            /* Keep-Alive optimization: we only upsize the buffer,
-               never downsize it. */
-            if ((unicode->length < length) &&
-                unicode_resize(unicode, length) < 0) {
-                PyObject_DEL(unicode->str);
-                unicode->str = NULL;
-            }
-        }
-        else {
-            size_t new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
-            unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
-        }
-        PyObject_INIT(unicode, &PyUnicode_Type);
+    if (length < MAX_SAVED_SIZE
+        && (unicode = unicode_freelist[length])) {
+        _Py_NewReference(unicode);
+        unicode_freelist[length] = (PyUnicodeObject *) unicode->defenc;
     }
     else {
-        size_t new_size;
-        unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
-        if (unicode == NULL)
+        /* Inline PyObject_NewVar */
+        unicode = (PyUnicodeObject *) PyObject_MALLOC(
+                   PyUnicodeObject_SIZE + length * sizeof(Py_UNICODE));
+        if (!unicode) {
+            PyErr_NoMemory();
             return NULL;
-        new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
-        unicode->str = (Py_UNICODE*) PyObject_MALLOC(new_size);
-    }
-
-    if (!unicode->str) {
-        PyErr_NoMemory();
-        goto onError;
-    }
+        }
+        PyObject_INIT_VAR(unicode, &PyUnicode_Type, length);
+    }
+
     /* Initialize the first element to guard against cases where
      * the caller fails before initializing str -- unicode_resize()
      * reads str[0], and the Keep-Alive optimization can keep memory
@@ -372,23 +374,18 @@ PyUnicodeObject *_PyUnicode_New(Py_ssize
      */
     unicode->str[0] = 0;
     unicode->str[length] = 0;
-    unicode->length = length;
+    Py_SIZE(unicode) = length;
     unicode->hash = -1;
     unicode->state = 0;
     unicode->defenc = NULL;
     return unicode;
-
-  onError:
-    /* XXX UNREF/NEWREF interface should be more symmetrical */
-    _Py_DEC_REFTOTAL;
-    _Py_ForgetReference((PyObject *)unicode);
-    PyObject_Del(unicode);
-    return NULL;
 }
 
 static
 void unicode_dealloc(register PyUnicodeObject *unicode)
 {
+    Py_ssize_t length = PyUnicode_GET_SIZE(unicode);
+
     switch (PyUnicode_CHECK_INTERNED(unicode)) {
     case SSTATE_NOT_INTERNED:
         break;
@@ -408,27 +405,20 @@ void unicode_dealloc(register PyUnicodeO
         Py_FatalError("Inconsistent interned string state.");
     }
 
-    if (PyUnicode_CheckExact(unicode) &&
-        numfree < PyUnicode_MAXFREELIST) {
-        /* Keep-Alive optimization */
-        if (unicode->length >= KEEPALIVE_SIZE_LIMIT) {
-            PyObject_DEL(unicode->str);
-            unicode->str = NULL;
-            unicode->length = 0;
-        }
-        if (unicode->defenc) {
-            Py_CLEAR(unicode->defenc);
-        }
-        /* Add to free list */
-        *(PyUnicodeObject **)unicode = free_list;
-        free_list = unicode;
-        numfree++;
-    }
-    else {
-        PyObject_DEL(unicode->str);
-        Py_XDECREF(unicode->defenc);
-        Py_TYPE(unicode)->tp_free((PyObject *)unicode);
-    }
+    Py_CLEAR(unicode->defenc);
+
+    if (PyUnicode_CheckExact(unicode) && length < MAX_SAVED_SIZE) {
+        PyUnicodeObject *v = unicode_freelist[length];
+        if (!v || CAN_SAVE(length, PyUnicode_GET_SIZE(v))) {
+            /* Keep track of number of items stacked on the freelist */
+            Py_SIZE(unicode) = v ? PyUnicode_GET_SIZE(v) + 1 : 1;
+            unicode->defenc = (PyObject *) v;
+            unicode_freelist[length] = unicode;
+            return;
+        }
+    }
+
+    Py_TYPE(unicode)->tp_free((PyObject *)unicode);
 }
 
 static
@@ -447,29 +437,16 @@ int _PyUnicode_Resize(PyUnicodeObject **
         return -1;
     }
 
-    /* Resizing unicode_empty and single character objects is not
-       possible since these are being shared. We simply return a fresh
-       copy with the same Unicode content. */
-    if (v->length != length &&
-        (v == unicode_empty || v->length == 1)) {
-        PyUnicodeObject *w = _PyUnicode_New(length);
-        if (w == NULL)
-            return -1;
-        Py_UNICODE_COPY(w->str, v->str,
-                        length < v->length ? length : v->length);
-        Py_DECREF(*unicode);
-        *unicode = w;
-        return 0;
-    }
-
-    /* Note that we don't have to modify *unicode for unshared Unicode
-       objects, since we can modify them in-place. */
-    return unicode_resize(v, length);
+    v = unicode_resize(v, length);
+    if (v == NULL)
+        return -1;
+    *unicode = v;
+    return 0;
 }
 
 int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length)
 {
-    return _PyUnicode_Resize((PyUnicodeObject **)unicode, length);
+    return _PyUnicode_Resize((PyUnicodeObject **) unicode, length);
 }
 
 PyObject *PyUnicode_FromUnicode(const Py_UNICODE *u,
@@ -794,7 +771,6 @@ PyUnicode_FromFormatV(const char *format
                 width = (width*10) + *f++ - '0';
             while (*++f && *f != '%' && !ISALPHA((unsigned)*f))
                 ;
-
             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
              * they don't affect the amount of space we reserve.
              */
@@ -952,7 +928,6 @@ PyUnicode_FromFormatV(const char *format
     string = PyUnicode_FromUnicode(NULL, n);
     if (!string)
         goto fail;
-
     s = PyUnicode_AS_UNICODE(string);
     callresult = callresults;
 
@@ -6232,9 +6207,9 @@ Py_ssize_t PyUnicode_Count(PyObject *str
         return -1;
     }
 
-    ADJUST_INDICES(start, end, str_obj->length);
+    ADJUST_INDICES(start, end, PyUnicode_GET_SIZE(str_obj));
     result = stringlib_count(
-        str_obj->str + start, end - start, sub_obj->str, sub_obj->length,
+        str_obj->str + start, end - start, sub_obj->str, PyUnicode_GET_SIZE(sub_obj),
         PY_SSIZE_T_MAX
         );
 
@@ -6287,11 +6262,11 @@ int tailmatch(PyUnicodeObject *self,
               Py_ssize_t end,
               int direction)
 {
-    if (substring->length == 0)
+    if (PyUnicode_GET_SIZE(substring) == 0)
         return 1;
 
-    ADJUST_INDICES(start, end, self->length);
-    end -= substring->length;
+    ADJUST_INDICES(start, end, PyUnicode_GET_SIZE(self));
+    end -= PyUnicode_GET_SIZE(substring);
     if (end < start)
         return 0;
 
@@ -6341,11 +6316,11 @@ PyObject *fixup(PyUnicodeObject *self,
 
     PyUnicodeObject *u;
 
-    u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+    u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
     if (u == NULL)
         return NULL;
 
-    Py_UNICODE_COPY(u->str, self->str, self->length);
+    Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
 
     if (!fixfct(u) && PyUnicode_CheckExact(self)) {
         /* fixfct should return TRUE if it modified the buffer. If
@@ -6361,7 +6336,7 @@ PyObject *fixup(PyUnicodeObject *self,
 static
 int fixupper(PyUnicodeObject *self)
 {
-    Py_ssize_t len = self->length;
+    Py_ssize_t len = PyUnicode_GET_SIZE(self);
     Py_UNICODE *s = self->str;
     int status = 0;
 
@@ -6382,7 +6357,7 @@ int fixupper(PyUnicodeObject *self)
 static
 int fixlower(PyUnicodeObject *self)
 {
-    Py_ssize_t len = self->length;
+    Py_ssize_t len = PyUnicode_GET_SIZE(self);
     Py_UNICODE *s = self->str;
     int status = 0;
 
@@ -6403,7 +6378,7 @@ int fixlower(PyUnicodeObject *self)
 static
 int fixswapcase(PyUnicodeObject *self)
 {
-    Py_ssize_t len = self->length;
+    Py_ssize_t len = PyUnicode_GET_SIZE(self);
     Py_UNICODE *s = self->str;
     int status = 0;
 
@@ -6424,7 +6399,7 @@ int fixswapcase(PyUnicodeObject *self)
 static
 int fixcapitalize(PyUnicodeObject *self)
 {
-    Py_ssize_t len = self->length;
+    Py_ssize_t len = PyUnicode_GET_SIZE(self);
     Py_UNICODE *s = self->str;
     int status = 0;
 
@@ -6603,6 +6578,7 @@ PyUnicodeObject *pad(PyUnicodeObject *se
                      Py_UNICODE fill)
 {
     PyUnicodeObject *u;
+    Py_ssize_t length = PyUnicode_GET_SIZE(self);
 
     if (left < 0)
         left = 0;
@@ -6614,18 +6590,18 @@ PyUnicodeObject *pad(PyUnicodeObject *se
         return self;
     }
 
-    if (left > PY_SSIZE_T_MAX - self->length ||
-        right > PY_SSIZE_T_MAX - (left + self->length)) {
+    if (left > PY_SSIZE_T_MAX - length ||
+        right > PY_SSIZE_T_MAX - (left + length)) {
         PyErr_SetString(PyExc_OverflowError, "padded string is too long");
         return NULL;
     }
-    u = _PyUnicode_New(left + self->length + right);
+    u = _PyUnicode_New(left + length + right);
     if (u) {
         if (left)
             Py_UNICODE_FILL(u->str, fill, left);
-        Py_UNICODE_COPY(u->str + left, self->str, self->length);
+        Py_UNICODE_COPY(u->str + left, self->str, length);
         if (right)
-            Py_UNICODE_FILL(u->str + left + self->length, fill, right);
+            Py_UNICODE_FILL(u->str + left + length, fill, right);
     }
 
     return u;
@@ -6657,12 +6633,12 @@ PyObject *split(PyUnicodeObject *self,
 
     if (substring == NULL)
         return stringlib_split_whitespace(
-            (PyObject*) self,  self->str, self->length, maxcount
+            (PyObject*) self,  self->str, PyUnicode_GET_SIZE(self), maxcount
             );
 
     return stringlib_split(
-        (PyObject*) self,  self->str, self->length,
-        substring->str, substring->length,
+        (PyObject*) self,  self->str, PyUnicode_GET_SIZE(self),
+        substring->str, PyUnicode_GET_SIZE(substring),
         maxcount
         );
 }
@@ -6677,12 +6653,12 @@ PyObject *rsplit(PyUnicodeObject *self,
 
     if (substring == NULL)
         return stringlib_rsplit_whitespace(
-            (PyObject*) self,  self->str, self->length, maxcount
+            (PyObject*) self,  self->str, PyUnicode_GET_SIZE(self), maxcount
             );
 
     return stringlib_rsplit(
-        (PyObject*) self,  self->str, self->length,
-        substring->str, substring->length,
+        (PyObject*) self,  self->str, PyUnicode_GET_SIZE(self),
+        substring->str, PyUnicode_GET_SIZE(substring),
         maxcount
         );
 }
@@ -6697,26 +6673,26 @@ PyObject *replace(PyUnicodeObject *self,
 
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
-    else if (maxcount == 0 || self->length == 0)
+    else if (maxcount == 0 || PyUnicode_GET_SIZE(self) == 0)
         goto nothing;
 
-    if (str1->length == str2->length) {
+    if (PyUnicode_GET_SIZE(str1) == PyUnicode_GET_SIZE(str2)) {
         Py_ssize_t i;
         /* same length */
-        if (str1->length == 0)
+        if (PyUnicode_GET_SIZE(str1) == 0)
             goto nothing;
-        if (str1->length == 1) {
+        if (PyUnicode_GET_SIZE(str1) == 1) {
             /* replace characters */
             Py_UNICODE u1, u2;
-            if (!findchar(self->str, self->length, str1->str[0]))
+            if (!findchar(self->str, PyUnicode_GET_SIZE(self), str1->str[0]))
                 goto nothing;
-            u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+            u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
             if (!u)
                 return NULL;
-            Py_UNICODE_COPY(u->str, self->str, self->length);
+            Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
             u1 = str1->str[0];
             u2 = str2->str[0];
-            for (i = 0; i < u->length; i++)
+            for (i = 0; i < PyUnicode_GET_SIZE(u); i++)
                 if (u->str[i] == u1) {
                     if (--maxcount < 0)
                         break;
@@ -6724,27 +6700,28 @@ PyObject *replace(PyUnicodeObject *self,
                 }
         } else {
             i = stringlib_find(
-                self->str, self->length, str1->str, str1->length, 0
+                self->str, PyUnicode_GET_SIZE(self),
+                str1->str, PyUnicode_GET_SIZE(str1), 0
                 );
             if (i < 0)
                 goto nothing;
-            u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, self->length);
+            u = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(self));
             if (!u)
                 return NULL;
-            Py_UNICODE_COPY(u->str, self->str, self->length);
+            Py_UNICODE_COPY(u->str, self->str, PyUnicode_GET_SIZE(self));
 
             /* change everything in-place, starting with this one */
-            Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
-            i += str1->length;
+            Py_UNICODE_COPY(u->str+i, str2->str, PyUnicode_GET_SIZE(str2));
+            i += PyUnicode_GET_SIZE(str1);
 
             while ( --maxcount > 0) {
-                i = stringlib_find(self->str+i, self->length-i,
-                                   str1->str, str1->length,
+                i = stringlib_find(self->str+i, PyUnicode_GET_SIZE(self)-i,
+                                   str1->str, PyUnicode_GET_SIZE(str1),
                                    i);
                 if (i == -1)
                     break;
-                Py_UNICODE_COPY(u->str+i, str2->str, str2->length);
-                i += str1->length;
+                Py_UNICODE_COPY(u->str+i, str2->str, PyUnicode_GET_SIZE(str2));
+                i += PyUnicode_GET_SIZE(str1);
             }
         }
     } else {
@@ -6754,22 +6731,23 @@ PyObject *replace(PyUnicodeObject *self,
         Py_UNICODE *p;
 
         /* replace strings */
-        n = stringlib_count(self->str, self->length, str1->str, str1->length,
+        n = stringlib_count(self->str, PyUnicode_GET_SIZE(self),
+                            str1->str, PyUnicode_GET_SIZE(str1),
                             maxcount);
         if (n == 0)
             goto nothing;
-        /* new_size = self->length + n * (str2->length - str1->length)); */
-        delta = (str2->length - str1->length);
+        /* new_size = PyUnicode_GET_SIZE(self) + n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))); */
+        delta = (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1));
         if (delta == 0) {
-            new_size = self->length;
+            new_size = PyUnicode_GET_SIZE(self);
         } else {
-            product = n * (str2->length - str1->length);
-            if ((product / (str2->length - str1->length)) != n) {
+            product = n * (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1));
+            if ((product / (PyUnicode_GET_SIZE(str2) - PyUnicode_GET_SIZE(str1))) != n) {
                 PyErr_SetString(PyExc_OverflowError,
                                 "replace string is too long");
                 return NULL;
             }
-            new_size = self->length + product;
+            new_size = PyUnicode_GET_SIZE(self) + product;
             if (new_size < 0) {
                 PyErr_SetString(PyExc_OverflowError,
                                 "replace string is too long");
@@ -6781,12 +6759,12 @@ PyObject *replace(PyUnicodeObject *self,
             return NULL;
         i = 0;
         p = u->str;
-        e = self->length - str1->length;
-        if (str1->length > 0) {
+        e = PyUnicode_GET_SIZE(self) - PyUnicode_GET_SIZE(str1);
+        if (PyUnicode_GET_SIZE(str1) > 0) {
             while (n-- > 0) {
                 /* look for next match */
-                j = stringlib_find(self->str+i, self->length-i,
-                                   str1->str, str1->length,
+                j = stringlib_find(self->str+i, PyUnicode_GET_SIZE(self)-i,
+                                   str1->str, PyUnicode_GET_SIZE(str1),
                                    i);
                 if (j == -1)
                     break;
@@ -6796,25 +6774,25 @@ PyObject *replace(PyUnicodeObject *self,
                     p += j - i;
                 }
                 /* copy substitution string */
-                if (str2->length > 0) {
-                    Py_UNICODE_COPY(p, str2->str, str2->length);
-                    p += str2->length;
+                if (PyUnicode_GET_SIZE(str2) > 0) {
+                    Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2));
+                    p += PyUnicode_GET_SIZE(str2);
                 }
-                i = j + str1->length;
-            }
-            if (i < self->length)
+                i = j + PyUnicode_GET_SIZE(str1);
+            }
+            if (i < PyUnicode_GET_SIZE(self))
                 /* copy tail [i:] */
-                Py_UNICODE_COPY(p, self->str+i, self->length-i);
+                Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i);
         } else {
             /* interleave */
             while (n > 0) {
-                Py_UNICODE_COPY(p, str2->str, str2->length);
-                p += str2->length;
+                Py_UNICODE_COPY(p, str2->str, PyUnicode_GET_SIZE(str2));
+                p += PyUnicode_GET_SIZE(str2);
                 if (--n <= 0)
                     break;
                 *p++ = self->str[i++];
             }
-            Py_UNICODE_COPY(p, self->str+i, self->length-i);
+            Py_UNICODE_COPY(p, self->str+i, PyUnicode_GET_SIZE(self)-i);
         }
     }
     return (PyObject *) u;
@@ -6825,7 +6803,7 @@ PyObject *replace(PyUnicodeObject *self,
         Py_INCREF(self);
         return (PyObject *) self;
     }
-    return PyUnicode_FromUnicode(self->str, self->length);
+    return PyUnicode_FromUnicode(self->str, PyUnicode_GET_SIZE(self));
 }
 
 /* --- Unicode Object Methods --------------------------------------------- */
@@ -6935,12 +6913,12 @@ unicode_center(PyUnicodeObject *self, Py
     if (!PyArg_ParseTuple(args, "n|O&:center", &width, convert_uc, &fillchar))
         return NULL;
 
-    if (self->length >= width && PyUnicode_CheckExact(self)) {
+    if (PyUnicode_GET_SIZE(self) >= width && PyUnicode_CheckExact(self)) {
         Py_INCREF(self);
         return (PyObject*) self;
     }
 
-    marg = width - self->length;
+    marg = width - PyUnicode_GET_SIZE(self);
     left = marg / 2 + (marg & width & 1);
 
     return (PyObject*) pad(self, left, marg - left, fillchar);
@@ -6972,8 +6950,8 @@ unicode_compare(PyUnicodeObject *str1, P
     Py_UNICODE *s1 = str1->str;
     Py_UNICODE *s2 = str2->str;
 
-    len1 = str1->length;
-    len2 = str2->length;
+    len1 = PyUnicode_GET_SIZE(str1);
+    len2 = PyUnicode_GET_SIZE(str2);
 
     while (len1 > 0 && len2 > 0) {
         Py_UNICODE c1, c2;
@@ -7006,8 +6984,8 @@ unicode_compare(PyUnicodeObject *str1, P
     Py_UNICODE *s1 = str1->str;
     Py_UNICODE *s2 = str2->str;
 
-    len1 = str1->length;
-    len2 = str2->length;
+    len1 = PyUnicode_GET_SIZE(str1);
+    len2 = PyUnicode_GET_SIZE(str2);
 
     while (len1 > 0 && len2 > 0) {
         Py_UNICODE c1, c2;
@@ -7071,8 +7049,7 @@ PyObject *PyUnicode_RichCompare(PyObject
 
     if (PyUnicode_Check(left) && PyUnicode_Check(right)) {
         PyObject *v;
-        if (((PyUnicodeObject *) left)->length !=
-            ((PyUnicodeObject *) right)->length) {
+        if (PyUnicode_GET_SIZE(left) != PyUnicode_GET_SIZE(right)) {
             if (op == Py_EQ) {
                 Py_INCREF(Py_False);
                 return Py_False;
@@ -7175,11 +7152,11 @@ PyObject *PyUnicode_Concat(PyObject *lef
     }
 
     /* Concat the two Unicode strings */
-    w = _PyUnicode_New(u->length + v->length);
+    w = _PyUnicode_New(PyUnicode_GET_SIZE(u) + PyUnicode_GET_SIZE(v));
     if (w == NULL)
         goto onError;
-    Py_UNICODE_COPY(w->str, u->str, u->length);
-    Py_UNICODE_COPY(w->str + u->length, v->str, v->length);
+    Py_UNICODE_COPY(w->str, u->str, PyUnicode_GET_SIZE(u));
+    Py_UNICODE_COPY(w->str + PyUnicode_GET_SIZE(u), v->str, PyUnicode_GET_SIZE(v));
 
     Py_DECREF(u);
     Py_DECREF(v);
@@ -7238,10 +7215,10 @@ unicode_count(PyUnicodeObject *self, PyO
     if (substring == NULL)
         return NULL;
 
-    ADJUST_INDICES(start, end, self->length);
+    ADJUST_INDICES(start, end, PyUnicode_GET_SIZE(self));
     result = PyLong_FromSsize_t(
         stringlib_count(self->str + start, end - start,
-                        substring->str, substring->length,
+                        substring->str, PyUnicode_GET_SIZE(substring),
                         PY_SSIZE_T_MAX)
         );
 
@@ -7311,7 +7288,7 @@ unicode_expandtabs(PyUnicodeObject *self
     /* First pass: determine size of output string */
     i = 0; /* chars up to and including most recent \n or \r */
     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
-    e = self->str + self->length; /* end of input */
+    e = self->str + PyUnicode_GET_SIZE(self); /* end of input */
     for (p = self->str; p < e; p++)
         if (*p == '\t') {
             if (tabsize > 0) {
@@ -7343,7 +7320,7 @@ unicode_expandtabs(PyUnicodeObject *self
 
     j = 0; /* same as in first pass */
     q = u->str; /* next output char */
-    qe = u->str + u->length; /* end of output */
+    qe = u->str + PyUnicode_GET_SIZE(u); /* end of output */
 
     for (p = self->str; p < e; p++)
         if (*p == '\t') {
@@ -7409,7 +7386,7 @@ unicode_find(PyUnicodeObject *self, PyOb
 static PyObject *
 unicode_getitem(PyUnicodeObject *self, Py_ssize_t index)
 {
-    if (index < 0 || index >= self->length) {
+    if (index < 0 || index >= PyUnicode_GET_SIZE(self)) {
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return NULL;
     }
@@ -7844,7 +7821,7 @@ unicode_join(PyObject *self, PyObject *d
 static Py_ssize_t
 unicode_length(PyUnicodeObject *self)
 {
-    return self->length;
+    return PyUnicode_GET_SIZE(self);
 }
 
 PyDoc_STRVAR(ljust__doc__,
@@ -7862,12 +7839,12 @@ unicode_ljust(PyUnicodeObject *self, PyO
     if (!PyArg_ParseTuple(args, "n|O&:ljust", &width, convert_uc, &fillchar))
         return NULL;
 
-    if (self->length >= width && PyUnicode_CheckExact(self)) {
+    if (PyUnicode_GET_SIZE(self) >= width && PyUnicode_CheckExact(self)) {
         Py_INCREF(self);
         return (PyObject*) self;
     }
 
-    return (PyObject*) pad(self, 0, width - self->length, fillchar);
+    return (PyObject*) pad(self, 0, width - PyUnicode_GET_SIZE(self), fillchar);
 }
 
 PyDoc_STRVAR(lower__doc__,
@@ -8050,8 +8027,8 @@ unicode_repeat(PyUnicodeObject *str, Py_
     /* ensure # of chars needed doesn't overflow int and # of bytes
      * needed doesn't overflow size_t
      */
-    nchars = len * str->length;
-    if (nchars / len != str->length) {
+    nchars = len * PyUnicode_GET_SIZE(str);
+    if (len && nchars / len != PyUnicode_GET_SIZE(str)) {
         PyErr_SetString(PyExc_OverflowError,
                         "repeated string is too long");
         return NULL;
@@ -8068,11 +8045,14 @@ unicode_repeat(PyUnicodeObject *str, Py_
 
     p = u->str;
 
-    if (str->length == 1) {
+    if (PyUnicode_GET_SIZE(str) == 1 && len > 0) {
         Py_UNICODE_FILL(p, str->str[0], len);
     } else {
-        Py_ssize_t done = str->length; /* number of characters copied this far */
-        Py_UNICODE_COPY(p, str->str, str->length);
+        Py_ssize_t done = 0; /* number of characters copied this far */
+        if (done < nchars) {
+            Py_UNICODE_COPY(p, str->str, PyUnicode_GET_SIZE(str));
+            done = PyUnicode_GET_SIZE(str);
+        }
         while (done < nchars) {
             Py_ssize_t n = (done <= nchars-done) ? done : nchars-done;
             Py_UNICODE_COPY(p+done, p, n);
@@ -8376,12 +8356,12 @@ unicode_rjust(PyUnicodeObject *self, PyO
     if (!PyArg_ParseTuple(args, "n|O&:rjust", &width, convert_uc, &fillchar))
         return NULL;
 
-    if (self->length >= width && PyUnicode_CheckExact(self)) {
+    if (PyUnicode_GET_SIZE(self) >= width && PyUnicode_CheckExact(self)) {
         Py_INCREF(self);
         return (PyObject*) self;
     }
 
-    return (PyObject*) pad(self, width - self->length, 0, fillchar);
+    return (PyObject*) pad(self, width - PyUnicode_GET_SIZE(self), 0, fillchar);
 }
 
 PyObject *PyUnicode_Split(PyObject *s,
@@ -8722,7 +8702,7 @@ are deleted.");
 static PyObject*
 unicode_translate(PyUnicodeObject *self, PyObject *table)
 {
-    return PyUnicode_TranslateCharmap(self->str, self->length, table, "ignore");
+    return PyUnicode_TranslateCharmap(self->str, PyUnicode_GET_SIZE(self), table, "ignore");
 }
 
 PyDoc_STRVAR(upper__doc__,
@@ -8752,7 +8732,7 @@ unicode_zfill(PyUnicodeObject *self, PyO
     if (!PyArg_ParseTuple(args, "n:zfill", &width))
         return NULL;
 
-    if (self->length >= width) {
+    if (PyUnicode_GET_SIZE(self) >= width) {
         if (PyUnicode_CheckExact(self)) {
             Py_INCREF(self);
             return (PyObject*) self;
@@ -8764,7 +8744,7 @@ unicode_zfill(PyUnicodeObject *self, PyO
                 );
     }
 
-    fill = width - self->length;
+    fill = width - PyUnicode_GET_SIZE(self);
 
     u = pad(self, fill, 0, '0');
 
@@ -8780,14 +8760,6 @@ unicode_zfill(PyUnicodeObject *self, PyO
     return (PyObject*) u;
 }
 
-#if 0
-static PyObject*
-unicode_freelistsize(PyUnicodeObject *self)
-{
-    return PyLong_FromLong(numfree);
-}
-#endif
-
 PyDoc_STRVAR(startswith__doc__,
              "S.startswith(prefix[, start[, end]]) -> bool\n\
 \n\
@@ -8907,8 +8879,8 @@ PyDoc_STRVAR(p_format__doc__,
 static PyObject *
 unicode__sizeof__(PyUnicodeObject *v)
 {
-    return PyLong_FromSsize_t(sizeof(PyUnicodeObject) +
-                              sizeof(Py_UNICODE) * (v->length + 1));
+    return PyLong_FromSsize_t(PyUnicodeObject_SIZE +
+                              sizeof(Py_UNICODE) * PyUnicode_GET_SIZE(v));
 }
 
 PyDoc_STRVAR(sizeof__doc__,
@@ -8917,7 +8889,7 @@ PyDoc_STRVAR(sizeof__doc__,
 static PyObject *
 unicode_getnewargs(PyUnicodeObject *v)
 {
-    return Py_BuildValue("(u#)", v->str, v->length);
+    return Py_BuildValue("(u#)", v->str, PyUnicode_GET_SIZE(v));
 }
 
 
@@ -9037,7 +9009,7 @@ unicode_subscript(PyUnicodeObject* self,
 
         if (slicelength <= 0) {
             return PyUnicode_FromUnicode(NULL, 0);
-        } else if (start == 0 && step == 1 && slicelength == self->length &&
+        } else if (start == 0 && step == 1 && slicelength == PyUnicode_GET_SIZE(self) &&
                    PyUnicode_CheckExact(self)) {
             Py_INCREF(self);
             return (PyObject *)self;
@@ -9671,22 +9643,13 @@ unicode_subtype_new(PyTypeObject *type, 
     if (tmp == NULL)
         return NULL;
     assert(PyUnicode_Check(tmp));
-    pnew = (PyUnicodeObject *) type->tp_alloc(type, n = tmp->length);
-    if (pnew == NULL) {
+    pnew = (PyUnicodeObject *) type->tp_alloc(type, n = PyUnicode_GET_SIZE(tmp));
+    if (pnew != NULL) {
+        Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
+        Py_SIZE(pnew) = n;
+        pnew->hash = tmp->hash;
         Py_DECREF(tmp);
-        return NULL;
-    }
-    pnew->str = (Py_UNICODE*) PyObject_MALLOC(sizeof(Py_UNICODE) * (n+1));
-    if (pnew->str == NULL) {
-        _Py_ForgetReference((PyObject *)pnew);
-        PyObject_Del(pnew);
-        Py_DECREF(tmp);
-        return PyErr_NoMemory();
-    }
-    Py_UNICODE_COPY(pnew->str, tmp->str, n+1);
-    pnew->length = n;
-    pnew->hash = tmp->hash;
-    Py_DECREF(tmp);
+    }
     return (PyObject *)pnew;
 }
 
@@ -9701,9 +9664,9 @@ static PyObject *unicode_iter(PyObject *
 
 PyTypeObject PyUnicode_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "str",              /* tp_name */
-    sizeof(PyUnicodeObject),        /* tp_size */
-    0,                  /* tp_itemsize */
+    "str",                              /* tp_name */
+    sizeof(PyUnicodeObject),            /* tp_size */
+    sizeof(Py_UNICODE),                 /* tp_itemsize */
     /* Slots */
     (destructor)unicode_dealloc,    /* tp_dealloc */
     0,                  /* tp_print */
@@ -9762,8 +9725,6 @@ void _PyUnicode_Init(void)
     };
 
     /* Init the implementation */
-    free_list = NULL;
-    numfree = 0;
     unicode_empty = _PyUnicode_New(0);
     if (!unicode_empty)
         return;
@@ -9786,21 +9747,20 @@ void _PyUnicode_Init(void)
 int
 PyUnicode_ClearFreeList(void)
 {
-    int freelist_size = numfree;
-    PyUnicodeObject *u;
-
-    for (u = free_list; u != NULL;) {
-        PyUnicodeObject *v = u;
-        u = *(PyUnicodeObject **)u;
-        if (v->str)
-            PyObject_DEL(v->str);
-        Py_XDECREF(v->defenc);
-        PyObject_Del(v);
-        numfree--;
-    }
-    free_list = NULL;
-    assert(numfree == 0);
-    return freelist_size;
+    int i, freed_objects = 0;
+    for (i = 0; i < MAX_SAVED_SIZE; i++) {
+        PyUnicodeObject *u, *v;
+        u = unicode_freelist[i];
+        while (u != NULL) {
+            v = (PyUnicodeObject *) u->defenc;
+            Py_SIZE(u) = i;
+            PyObject_DEL(u);
+            u = v;
+            freed_objects++;
+        }
+        unicode_freelist[i] = NULL;
+    }
+    return freed_objects;
 }
 
 void
@@ -9919,11 +9879,11 @@ void _Py_ReleaseInternedUnicodeStrings(v
             break;
         case SSTATE_INTERNED_IMMORTAL:
             Py_REFCNT(s) += 1;
-            immortal_size += s->length;
+            immortal_size += PyUnicode_GET_SIZE(s);
             break;
         case SSTATE_INTERNED_MORTAL:
             Py_REFCNT(s) += 2;
-            mortal_size += s->length;
+            mortal_size += PyUnicode_GET_SIZE(s);
             break;
         default:
             Py_FatalError("Inconsistent interned string state.");