diff -r 51016ff7f8c9 Include/unicodeobject.h --- a/Include/unicodeobject.h Sun Mar 25 22:41:16 2012 -0400 +++ b/Include/unicodeobject.h Tue Mar 27 13:13:21 2012 +0200 @@ -291,6 +291,7 @@ typedef struct { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ Py_hash_t hash; /* Hash value; -1 if not set */ + wchar_t *wstr; /* wchar_t representation (null-terminated) */ struct { /* SSTATE_NOT_INTERNED (0) @@ -300,7 +301,7 @@ typedef struct { If interned != SSTATE_NOT_INTERNED, the two references from the dictionary to this object are *not* counted in ob_refcnt. */ - unsigned int interned:2; + unsigned char interned:2; /* Character size: - PyUnicode_WCHAR_KIND (0): @@ -328,24 +329,23 @@ typedef struct { * all characters are in the range U+0000-U+10FFFF * at least one character is in the range U+10000-U+10FFFF */ - unsigned int kind:3; + unsigned char kind:3; /* Compact is with respect to the allocation scheme. Compact unicode objects only require one memory block while non-compact objects use one block for the PyUnicodeObject struct and another for its data buffer. */ - unsigned int compact:1; + unsigned char compact:1; /* The string only contains characters in the range U+0000-U+007F (ASCII) and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ - unsigned int ascii:1; + unsigned char ascii:1; /* The ready flag indicates whether the object layout is initialized completely. This means that this is either a compact object, or the data pointer is filled out. The bit is redundant, and helps to minimize the test in PyUnicode_IS_READY(). */ - unsigned int ready:1; + unsigned char ready:1; } state; - wchar_t *wstr; /* wchar_t representation (null-terminated) */ -} PyASCIIObject; +} __attribute__((packed)) PyASCIIObject; /* Non-ASCII strings allocated through PyUnicode_New use the PyCompactUnicodeObject structure. state.compact is set, and the data @@ -357,7 +357,7 @@ typedef struct { char *utf8; /* UTF-8 representation (null-terminated) */ Py_ssize_t wstr_length; /* Number of code points in wstr, possible * surrogates count as two code points. */ -} PyCompactUnicodeObject; +} __attribute__((packed)) PyCompactUnicodeObject; /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the PyUnicodeObject structure. The actual string data is initially in the wstr @@ -370,7 +370,7 @@ typedef struct { Py_UCS2 *ucs2; Py_UCS4 *ucs4; } data; /* Canonical, smallest-form Unicode buffer */ -} PyUnicodeObject; +} __attribute__((packed)) PyUnicodeObject; #endif PyAPI_DATA(PyTypeObject) PyUnicode_Type;