diff -r 59a189a15933 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Thu Oct 25 17:23:54 2012 -0700
+++ b/Objects/unicodeobject.c	Sat Oct 27 01:47:23 2012 +0300
@@ -5286,61 +5286,6 @@
 
 /* --- Unicode Escape Codec ----------------------------------------------- */
 
-/* Helper function for PyUnicode_DecodeUnicodeEscape, determines
-   if all the escapes in the string make it still a valid ASCII string.
-   Returns -1 if any escapes were found which cause the string to
-   pop out of ASCII range.  Otherwise returns the length of the
-   required buffer to hold the string.
-   */
-static Py_ssize_t
-length_of_escaped_ascii_string(const char *s, Py_ssize_t size)
-{
-    const unsigned char *p = (const unsigned char *)s;
-    const unsigned char *end = p + size;
-    Py_ssize_t length = 0;
-
-    if (size < 0)
-        return -1;
-
-    for (; p < end; ++p) {
-        if (*p > 127) {
-            /* Non-ASCII */
-            return -1;
-        }
-        else if (*p != '\\') {
-            /* Normal character */
-            ++length;
-        }
-        else {
-            /* Backslash-escape, check next char */
-            ++p;
-            /* Escape sequence reaches till end of string or
-               non-ASCII follow-up. */
-            if (p >= end || *p > 127)
-                return -1;
-            switch (*p) {
-            case '\n':
-                /* backslash + \n result in zero characters */
-                break;
-            case '\\': case '\'': case '\"':
-            case 'b': case 'f': case 't':
-            case 'n': case 'r': case 'v': case 'a':
-                ++length;
-                break;
-            case '0': case '1': case '2': case '3':
-            case '4': case '5': case '6': case '7':
-            case 'x': case 'u': case 'U': case 'N':
-                /* these do not guarantee ASCII characters */
-                return -1;
-            default:
-                /* count the backslash + the other character */
-                length += 2;
-            }
-        }
-    }
-    return length;
-}
-
 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
 
 PyObject *
@@ -5349,253 +5294,202 @@
                               const char *errors)
 {
     const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
-    int j;
     PyObject *v;
     const char *end;
-    char* message;
-    Py_UCS4 chr = 0xffffffff; /* in case 'getcode' messes up */
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
-    Py_ssize_t len;
-    Py_ssize_t i;
-
-    len = length_of_escaped_ascii_string(s, size);
-
-    /* After length_of_escaped_ascii_string() there are two alternatives,
-       either the string is pure ASCII with named escapes like \n, etc.
-       and we determined it's exact size (common case)
-       or it contains \x, \u, ... escape sequences.  then we create a
-       legacy wchar string and resize it at the end of this function. */
-    if (len >= 0) {
-        v = PyUnicode_New(len, 127);
-        if (!v)
-            goto onError;
-        assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND);
-    }
-    else {
-        /* Escaped strings will always be longer than the resulting
-           Unicode string, so we start with size here and then reduce the
-           length after conversion to the true value.
-           (but if the error callback returns a long replacement string
-           we'll have to allocate more space) */
-        v = PyUnicode_New(size, 127);
-        if (!v)
-            goto onError;
-        len = size;
-    }
+    Py_ssize_t outpos;
+    enum PyUnicode_Kind kind = PyUnicode_1BYTE_KIND;
+    Py_UCS4 maxchar = 127;
+    void *data;
+
+    /* Escaped strings will always be longer than the resulting
+       Unicode string, so we start with size here and then reduce the
+       length after conversion to the true value.
+       (but if the error callback returns a long replacement string
+       we'll have to allocate more space) */
+    v = PyUnicode_New(size, 127);
+    if (!v)
+        goto onError;
 
     if (size == 0)
         return v;
-    i = 0;
+    outpos = 0;
     end = s + size;
 
+    assert(PyUnicode_KIND(v) == kind);
+    assert(PyUnicode_MAX_CHAR_VALUE(v) == maxchar);
+    data = PyUnicode_DATA(v);
+
     while (s < end) {
-        unsigned char c;
-        Py_UCS4 x;
-        int digits;
-
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i <= len);
+        unsigned char c = (unsigned char) *s++;
+        Py_UCS4 ch;
+        int count;
+        Py_ssize_t startinpos;
+        Py_ssize_t endinpos;
+        const char* message;
 
         /* Non-escape characters are interpreted as Unicode ordinals */
-        if (*s != '\\') {
-            if (unicode_putchar(&v, &i, (unsigned char) *s++) < 0)
-                goto onError;
+        if (c != '\\') {
+            ch = c;
+          writechar:
+            assert(outpos < PyUnicode_GET_LENGTH(v));
+            if (ch > maxchar) {
+                if (unicode_widen(&v, outpos, ch) < 0)
+                    goto onError;
+                kind = PyUnicode_KIND(v);
+                maxchar = PyUnicode_MAX_CHAR_VALUE(v);
+                data = PyUnicode_DATA(v);
+            }
+            PyUnicode_WRITE(kind, data, outpos++, ch);
             continue;
         }
 
-        startinpos = s-starts;
+        startinpos = s - starts - 1;
         /* \ - Escapes */
-        s++;
-        c = *s++;
-        if (s > end)
-            c = '\0'; /* Invalid after \ */
-
-        /* The only case in which i == ascii_length is a backslash
-           followed by a newline. */
-        assert(i < len || (i == len && c == '\n'));
+        if (s >= end) {
+            message = "\\ at end of string";
+            goto error;
+        }
+        c = (unsigned char) *s++;
+
+        /* The only case in which outpos == ascii_length is a backslash
+        followed by a newline. */
+        assert(outpos < PyUnicode_GET_LENGTH(v) ||
+            (outpos == PyUnicode_GET_LENGTH(v) && c == '\n'));
 
         switch (c) {
 
             /* \x escapes */
-#define WRITECHAR(ch)                                   \
-            do {                                        \
-                if (unicode_putchar(&v, &i, ch) < 0)    \
-                    goto onError;                       \
-            }while(0)
-
-        case '\n': break;
-        case '\\': WRITECHAR('\\'); break;
-        case '\'': WRITECHAR('\''); break;
-        case '\"': WRITECHAR('\"'); break;
-        case 'b': WRITECHAR('\b'); break;
+
+        case '\n': continue;
+        case '\\': PyUnicode_WRITE(kind, data, outpos++, '\\'); continue;
+        case '\'': PyUnicode_WRITE(kind, data, outpos++, '\''); continue;
+        case '\"': PyUnicode_WRITE(kind, data, outpos++, '\"'); continue;
+        case 'b': PyUnicode_WRITE(kind, data, outpos++, '\b'); continue;
         /* FF */
-        case 'f': WRITECHAR('\014'); break;
-        case 't': WRITECHAR('\t'); break;
-        case 'n': WRITECHAR('\n'); break;
-        case 'r': WRITECHAR('\r'); break;
+        case 'f': PyUnicode_WRITE(kind, data, outpos++, '\014'); continue;
+        case 't': PyUnicode_WRITE(kind, data, outpos++, '\t'); continue;
+        case 'n': PyUnicode_WRITE(kind, data, outpos++, '\n'); continue;
+        case 'r': PyUnicode_WRITE(kind, data, outpos++, '\r'); continue;
         /* VT */
-        case 'v': WRITECHAR('\013'); break;
+        case 'v': PyUnicode_WRITE(kind, data, outpos++, '\013'); continue;
         /* BEL, not classic C */
-        case 'a': WRITECHAR('\007'); break;
+        case 'a': PyUnicode_WRITE(kind, data, outpos++, '\007'); continue;
 
             /* \OOO (octal) escapes */
         case '0': case '1': case '2': case '3':
         case '4': case '5': case '6': case '7':
-            x = s[-1] - '0';
+            ch = c - '0';
             if (s < end && '0' <= *s && *s <= '7') {
-                x = (x<<3) + *s++ - '0';
+                ch = (ch<<3) + *s++ - '0';
                 if (s < end && '0' <= *s && *s <= '7')
-                    x = (x<<3) + *s++ - '0';
-            }
-            WRITECHAR(x);
-            break;
+                    ch = (ch<<3) + *s++ - '0';
+            }
+            goto writechar;
 
             /* hex escapes */
             /* \xXX */
         case 'x':
-            digits = 2;
+            count = 2;
             message = "truncated \\xXX escape";
             goto hexescape;
 
             /* \uXXXX */
         case 'u':
-            digits = 4;
+            count = 4;
             message = "truncated \\uXXXX escape";
             goto hexescape;
 
             /* \UXXXXXXXX */
         case 'U':
-            digits = 8;
+            count = 8;
             message = "truncated \\UXXXXXXXX escape";
-        hexescape:
-            chr = 0;
-            if (s+digits>end) {
-                endinpos = size;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "unicodeescape", "end of string in escape sequence",
-                        &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &i))
-                    goto onError;
-                goto nextByte;
-            }
-            for (j = 0; j < digits; ++j) {
-                c = (unsigned char) s[j];
-                if (!Py_ISXDIGIT(c)) {
-                    endinpos = (s+j+1)-starts;
-                    if (unicode_decode_call_errorhandler(
-                            errors, &errorHandler,
-                            "unicodeescape", message,
-                            &starts, &end, &startinpos, &endinpos, &exc, &s,
-                            &v, &i))
-                        goto onError;
-                    len = PyUnicode_GET_LENGTH(v);
-                    goto nextByte;
-                }
-                chr = (chr<<4) & ~0xF;
+          hexescape:
+            for (ch = 0; count--; ++s) {
+                if (s >= end)
+                    goto error;
+                c = (unsigned char)*s;
+                if (!Py_ISXDIGIT(c))
+                    goto error;
+                ch <<= 4;
                 if (c >= '0' && c <= '9')
-                    chr += c - '0';
+                    ch += c - '0';
                 else if (c >= 'a' && c <= 'f')
-                    chr += 10 + c - 'a';
+                    ch += c - ('a' - 10);
                 else
-                    chr += 10 + c - 'A';
-            }
-            s += j;
-            if (chr == 0xffffffff && PyErr_Occurred())
-                /* _decoding_error will have already written into the
-                   target buffer. */
-                break;
-        store:
-            /* when we get here, chr is a 32-bit unicode character */
-            if (chr <= MAX_UNICODE) {
-                WRITECHAR(chr);
-            } else {
-                endinpos = s-starts;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "unicodeescape", "illegal Unicode character",
-                        &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &i))
-                    goto onError;
-            }
-            break;
+                    ch += c - ('A' - 10);
+            }
+          store:
+            /* when we get here, ch is a 32-bit unicode character */
+            if (ch <= MAX_UNICODE)
+                goto writechar;
+            message = "illegal Unicode character";
+            goto error;
 
             /* \N{name} */
         case 'N':
-            message = "malformed \\N character escape";
             if (ucnhash_CAPI == NULL) {
                 /* load the unicode data module */
                 ucnhash_CAPI = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
                                                 PyUnicodeData_CAPSULE_NAME, 1);
-                if (ucnhash_CAPI == NULL)
-                    goto ucnhashError;
-            }
+                if (ucnhash_CAPI == NULL) {
+                    PyErr_SetString(
+                        PyExc_UnicodeError,
+                        "\\N escapes not supported (can't load unicodedata module)"
+                        );
+                    goto onError;
+                }
+            }
+            message = "malformed \\N character escape";
             if (*s == '{') {
-                const char *start = s+1;
+                const char *start = ++s;
+                size_t namelen;
                 /* look for the closing brace */
-                while (*s != '}' && s < end)
-                    s++;
-                if (s > start && s < end && *s == '}') {
+                do {
+                    if (s >= end)
+                        goto error;
+                } while (*s++ != '}');
+                namelen = s - start - 1;
+                if (!namelen || namelen >= INT_MAX) {
                     /* found a name.  look it up in the unicode database */
+                    ch = 0xffffffff; /* in case 'getcode' messes up */
+                    if (ucnhash_CAPI->getcode(NULL, start, (int)namelen,
+                                            &ch, 0)) {
+                        goto store;
+                    }
                     message = "unknown Unicode character name";
-                    s++;
-                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1),
-                                              &chr, 0))
-                        goto store;
                 }
             }
-            endinpos = s-starts;
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "unicodeescape", message,
-                    &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &i))
-                goto onError;
-            break;
+            goto error;
 
         default:
-            if (s > end) {
-                message = "\\ at end of string";
-                s--;
-                endinpos = s-starts;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "unicodeescape", message,
-                        &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &i))
-                    goto onError;
-            }
-            else {
-                WRITECHAR('\\');
-                WRITECHAR(s[-1]);
-            }
-            break;
-        }
-      nextByte:
-        ;
-    }
-#undef WRITECHAR
-
-    if (unicode_resize(&v, i) < 0)
+            assert(outpos < PyUnicode_GET_LENGTH(v));
+            PyUnicode_WRITE(kind, data, outpos++, '\\');
+            ch = c;
+            goto writechar;
+        }
+
+      error:
+        endinpos = s-starts;
+        if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "unicodeescape", message,
+                &starts, &end, &startinpos, &endinpos, &exc, &s,
+                &v, &outpos))
+            goto onError;
+        kind = PyUnicode_KIND(v);
+        maxchar = PyUnicode_MAX_CHAR_VALUE(v);
+        data = PyUnicode_DATA(v);
+        continue;
+    }
+
+    if (unicode_resize(&v, outpos) < 0)
         goto onError;
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return unicode_result(v);
 
-  ucnhashError:
-    PyErr_SetString(
-        PyExc_UnicodeError,
-        "\\N escapes not supported (can't load unicodedata module)"
-        );
-    Py_XDECREF(v);
-    Py_XDECREF(errorHandler);
-    Py_XDECREF(exc);
-    return NULL;
-
   onError:
     Py_XDECREF(v);
     Py_XDECREF(errorHandler);
@@ -5616,9 +5510,9 @@
     Py_ssize_t i, len;
     PyObject *repr;
     char *p;
-    int kind;
+    enum PyUnicode_Kind kind;
     void *data;
-    Py_ssize_t expandsize = 0;
+    Py_ssize_t expandsize;
 
     /* Initial allocation is based on the longest-possible character
        escape.
@@ -5637,11 +5531,9 @@
     len = PyUnicode_GET_LENGTH(unicode);
     kind = PyUnicode_KIND(unicode);
     data = PyUnicode_DATA(unicode);
-    switch (kind) {
-    case PyUnicode_1BYTE_KIND: expandsize = 4; break;
-    case PyUnicode_2BYTE_KIND: expandsize = 6; break;
-    case PyUnicode_4BYTE_KIND: expandsize = 10; break;
-    }
+    /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6
+       bytes, and 1 byte characters 4. */
+    expandsize = kind * 2 + 2;
 
     if (len == 0)
         return PyBytes_FromStringAndSize(NULL, 0);
@@ -5661,15 +5553,53 @@
     for (i = 0; i < len; i++) {
         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
 
-        /* Escape backslashes */
-        if (ch == '\\') {
+        if (ch < 0x100) {
+            /* Copy printable US ASCII as-is */
+            if (ch >= ' ' && ch < 0x7F) {
+                if (ch != '\\') {
+                    *p++ = (char) ch;
+                    continue;
+                }
+                /* Escape backslashes */
+                else {
+                    *p++ = '\\';
+                    *p++ = (char) ch;
+                }
+            }
+
+            /* Map special whitespace to '\t', \n', '\r' */
+            else if (ch == '\t') {
+                *p++ = '\\';
+                *p++ = 't';
+            }
+            else if (ch == '\n') {
+                *p++ = '\\';
+                *p++ = 'n';
+            }
+            else if (ch == '\r') {
+                *p++ = '\\';
+                *p++ = 'r';
+            }
+
+            /* Map non-printable US ASCII and 8-bit characters to '\xhh' */
+            else {
+                *p++ = '\\';
+                *p++ = 'x';
+                *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
+                *p++ = Py_hexdigits[ch & 0x000F];
+            }
+        }
+        /* Map 16-bit characters to '\uxxxx' */
+        else if (ch < 0x10000) {
             *p++ = '\\';
-            *p++ = (char) ch;
-            continue;
-        }
-
+            *p++ = 'u';
+            *p++ = Py_hexdigits[(ch >> 12) & 0x000F];
+            *p++ = Py_hexdigits[(ch >> 8) & 0x000F];
+            *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
+            *p++ = Py_hexdigits[ch & 0x000F];
+        }
         /* Map 21-bit characters to '\U00xxxxxx' */
-        else if (ch >= 0x10000) {
+        else {
             assert(ch <= MAX_UNICODE);
             *p++ = '\\';
             *p++ = 'U';
@@ -5681,44 +5611,7 @@
             *p++ = Py_hexdigits[(ch >> 8) & 0x0000000F];
             *p++ = Py_hexdigits[(ch >> 4) & 0x0000000F];
             *p++ = Py_hexdigits[ch & 0x0000000F];
-            continue;
-        }
-
-        /* Map 16-bit characters to '\uxxxx' */
-        if (ch >= 256) {
-            *p++ = '\\';
-            *p++ = 'u';
-            *p++ = Py_hexdigits[(ch >> 12) & 0x000F];
-            *p++ = Py_hexdigits[(ch >> 8) & 0x000F];
-            *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
-            *p++ = Py_hexdigits[ch & 0x000F];
-        }
-
-        /* Map special whitespace to '\t', \n', '\r' */
-        else if (ch == '\t') {
-            *p++ = '\\';
-            *p++ = 't';
-        }
-        else if (ch == '\n') {
-            *p++ = '\\';
-            *p++ = 'n';
-        }
-        else if (ch == '\r') {
-            *p++ = '\\';
-            *p++ = 'r';
-        }
-
-        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch >= 0x7F) {
-            *p++ = '\\';
-            *p++ = 'x';
-            *p++ = Py_hexdigits[(ch >> 4) & 0x000F];
-            *p++ = Py_hexdigits[ch & 0x000F];
-        }
-
-        /* Copy everything else as-is */
-        else
-            *p++ = (char) ch;
+        }
     }
 
     assert(p - PyBytes_AS_STRING(repr) > 0);
@@ -5748,14 +5641,14 @@
                                  const char *errors)
 {
     const char *starts = s;
-    Py_ssize_t startinpos;
-    Py_ssize_t endinpos;
     Py_ssize_t outpos;
     PyObject *v;
     const char *end;
-    const char *bs;
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
+    enum PyUnicode_Kind kind = PyUnicode_1BYTE_KIND;
+    Py_UCS4 maxchar = 127;
+    void *data;
 
     /* Escaped strings will always be longer than the resulting
        Unicode string, so we start with size here and then reduce the
@@ -5768,73 +5661,82 @@
         return v;
     outpos = 0;
     end = s + size;
+    assert(PyUnicode_KIND(v) == kind);
+    assert(PyUnicode_MAX_CHAR_VALUE(v) == maxchar);
+    data = PyUnicode_DATA(v);
+
     while (s < end) {
-        unsigned char c;
-        Py_UCS4 x;
-        int i;
+        unsigned char c = *s++;
+        Py_UCS4 ch;
         int count;
+        Py_ssize_t startinpos;
+        Py_ssize_t endinpos;
+        const char *message;
 
         /* Non-escape characters are interpreted as Unicode ordinals */
-        if (*s != '\\') {
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
-                goto onError;
+        if (c != '\\' || s >= end) {
+            ch = c;
+
+          writechar:
+            assert(outpos < PyUnicode_GET_LENGTH(v));
+            if (ch > maxchar) {
+                if (unicode_widen(&v, outpos, ch) < 0)
+                    goto onError;
+                kind = PyUnicode_KIND(v);
+                maxchar = PyUnicode_MAX_CHAR_VALUE(v);
+                data = PyUnicode_DATA(v);
+            }
+            PyUnicode_WRITE(kind, data, outpos++, ch);
             continue;
         }
-        startinpos = s-starts;
-
-        /* \u-escapes are only interpreted iff the number of leading
-           backslashes if odd */
-        bs = s;
-        for (;s < end;) {
-            if (*s != '\\')
-                break;
-            if (unicode_putchar(&v, &outpos, (unsigned char)*s++) < 0)
-                goto onError;
-        }
-        if (((s - bs) & 1) == 0 ||
-            s >= end ||
-            (*s != 'u' && *s != 'U')) {
-            continue;
-        }
-        outpos--;
-        count = *s=='u' ? 4 : 8;
-        s++;
+
+        c = *s++;
+        if (c == 'u') {
+            count = 4;
+            message = "truncated \\uXXXX escape";
+        }
+        else if (c == 'U') {
+            count = 8;
+            message = "truncated \\UXXXXXXXX escape";
+        }
+        else {
+            assert(outpos < PyUnicode_GET_LENGTH(v));
+            PyUnicode_WRITE(kind, data, outpos++, '\\');
+            ch = c;
+            goto writechar;
+        }
+        startinpos = s - starts - 2;
 
         /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */
-        for (x = 0, i = 0; i < count; ++i, ++s) {
+        for (ch = 0; count--; ++s) {
+            if (s >= end)
+                goto error;
             c = (unsigned char)*s;
-            if (!Py_ISXDIGIT(c)) {
-                endinpos = s-starts;
-                if (unicode_decode_call_errorhandler(
-                        errors, &errorHandler,
-                        "rawunicodeescape", "truncated \\uXXXX",
-                        &starts, &end, &startinpos, &endinpos, &exc, &s,
-                        &v, &outpos))
-                    goto onError;
-                goto nextByte;
-            }
-            x = (x<<4) & ~0xF;
+            if (!Py_ISXDIGIT(c))
+                goto error;
+            ch <<= 4;
             if (c >= '0' && c <= '9')
-                x += c - '0';
+                ch += c - '0';
             else if (c >= 'a' && c <= 'f')
-                x += 10 + c - 'a';
+                ch += c - ('a' - 10);
             else
-                x += 10 + c - 'A';
-        }
-        if (x <= MAX_UNICODE) {
-            if (unicode_putchar(&v, &outpos, x) < 0)
-                goto onError;
-        } else {
-            endinpos = s-starts;
-            if (unicode_decode_call_errorhandler(
-                    errors, &errorHandler,
-                    "rawunicodeescape", "\\Uxxxxxxxx out of range",
-                    &starts, &end, &startinpos, &endinpos, &exc, &s,
-                    &v, &outpos))
-                goto onError;
-        }
-      nextByte:
-        ;
+                ch += c - ('A' - 10);
+        }
+        if (ch <= MAX_UNICODE)
+            goto writechar;
+        message = "\\Uxxxxxxxx out of range";
+
+      error:
+        endinpos = s-starts;
+        if (unicode_decode_call_errorhandler(
+                errors, &errorHandler,
+                "rawunicodeescape", message,
+                &starts, &end, &startinpos, &endinpos, &exc, &s,
+                &v, &outpos))
+            goto onError;
+        kind = PyUnicode_KIND(v);
+        maxchar = PyUnicode_MAX_CHAR_VALUE(v);
+        data = PyUnicode_DATA(v);
     }
     if (unicode_resize(&v, outpos) < 0)
         goto onError;
@@ -5870,6 +5772,10 @@
     kind = PyUnicode_KIND(unicode);
     data = PyUnicode_DATA(unicode);
     len = PyUnicode_GET_LENGTH(unicode);
+    if (kind == PyUnicode_1BYTE_KIND) {
+        repr = PyBytes_FromStringAndSize(data, len);
+        return repr;
+    }
     /* 4 byte characters can take up 10 bytes, 2 byte characters can take up 6
        bytes, and 1 byte characters 4. */
     expandsize = kind * 2 + 2;