diff -r deb3e5857d8c Objects/unicodeobject.c --- a/Objects/unicodeobject.c Thu Oct 27 19:30:10 2016 +0900 +++ b/Objects/unicodeobject.c Fri Oct 28 01:15:11 2016 +0800 @@ -4703,16 +4703,16 @@ int kind; void *data; Py_ssize_t len; - PyObject *v; int inShift = 0; - Py_ssize_t i; + Py_ssize_t i, max_char_size; unsigned int base64bits = 0; unsigned long base64buffer = 0; - char * out; - char * start; + char *out; + _PyBytesWriter writer; if (PyUnicode_READY(str) == -1) return NULL; + kind = PyUnicode_KIND(str); data = PyUnicode_DATA(str); len = PyUnicode_GET_LENGTH(str); @@ -4720,14 +4720,21 @@ if (len == 0) return PyBytes_FromStringAndSize(NULL, 0); - /* It might be possible to tighten this worst case */ - if (len > PY_SSIZE_T_MAX / 8) + if (kind == PyUnicode_1BYTE_KIND || kind == PyUnicode_2BYTE_KIND) { + max_char_size = 3; + } else { + assert(kind == PyUnicode_4BYTE_KIND); + max_char_size = 6; + } + if (len > (PY_SSIZE_T_MAX - 2)/ max_char_size) { return PyErr_NoMemory(); - v = PyBytes_FromStringAndSize(NULL, len * 8); - if (v == NULL) - return NULL; - - start = out = PyBytes_AS_STRING(v); + } + + _PyBytesWriter_Init(&writer); + out = _PyBytesWriter_Alloc(&writer, len * max_char_size + 2); + if (out == NULL) + return NULL; + for (i = 0; i < len; ++i) { Py_UCS4 ch = PyUnicode_READ(kind, data, i); @@ -4754,7 +4761,7 @@ else { /* not in a shift sequence */ if (ch == '+') { *out++ = '+'; - *out++ = '-'; + *out++ = '-'; } else if (ENCODE_DIRECT(ch, !base64SetO, !base64WhiteSpace)) { *out++ = (char) ch; @@ -4787,13 +4794,13 @@ base64bits -= 6; } } - if (base64bits) - *out++= TO_BASE64(base64buffer << (6-base64bits) ); - if (inShift) + if (inShift) { + if (base64bits) { + *out++ = TO_BASE64(base64buffer << (6-base64bits)); + } *out++ = '-'; - if (_PyBytes_Resize(&v, out - start) < 0) - return NULL; - return v; + } + return _PyBytesWriter_Finish(&writer, out); } PyObject * PyUnicode_EncodeUTF7(const Py_UNICODE *s,