diff -r 3d900c9641c9 Include/complexobject.h --- a/Include/complexobject.h Tue May 08 22:24:47 2012 +0100 +++ b/Include/complexobject.h Wed May 09 01:42:32 2012 +0200 @@ -63,10 +63,11 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComp /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ #ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyComplex_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif #ifdef __cplusplus diff -r 3d900c9641c9 Include/floatobject.h --- a/Include/floatobject.h Tue May 08 22:24:47 2012 +0100 +++ b/Include/floatobject.h Wed May 09 01:42:32 2012 +0200 @@ -112,10 +112,11 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(vo /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyFloat_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff -r 3d900c9641c9 Include/longobject.h --- a/Include/longobject.h Tue May 08 22:24:47 2012 +0100 +++ b/Include/longobject.h Wed May 09 01:42:32 2012 +0200 @@ -154,11 +154,13 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLo PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); /* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); + (Advanced String Formatting). Return 0 on success, raise an exception + and return -1 on error. */ +PyAPI_FUNC(int) _PyLong_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ /* These aren't really part of the long object, but they're handy. The diff -r 3d900c9641c9 Include/unicodeobject.h --- a/Include/unicodeobject.h Tue May 08 22:24:47 2012 +0100 +++ b/Include/unicodeobject.h Wed May 09 01:42:32 2012 +0200 @@ -865,12 +865,50 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFor ); #ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t pos; +} _PyUnicodeWriter; + +PyAPI_FUNC(int) +_PyUnicodeWriter_init(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_prepare(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_char(_PyUnicodeWriter *writer, Py_UCS4 ch); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_str( + _PyUnicodeWriter *writer, + PyObject *str); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_substr( + _PyUnicodeWriter *writer, + PyObject *str, Py_ssize_t start, Py_ssize_t length); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_dealloc(_PyUnicodeWriter *writer); +#endif + +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); diff -r 3d900c9641c9 Objects/complexobject.c --- a/Objects/complexobject.c Tue May 08 22:24:47 2012 +0100 +++ b/Objects/complexobject.c Wed May 09 01:42:32 2012 +0200 @@ -699,11 +699,22 @@ static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyComplex_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + return NULL; + + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyComplex_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } #if 0 diff -r 3d900c9641c9 Objects/floatobject.c --- a/Objects/floatobject.c Tue May 08 22:24:47 2012 +0100 +++ b/Objects/floatobject.c Wed May 09 01:42:32 2012 +0200 @@ -278,6 +278,36 @@ float_repr(PyFloatObject *v) return result; } +extern void +_PyUnicode_CopyASCII(PyObject *to, Py_ssize_t to_start, + unsigned char *from, Py_ssize_t how_many); + +int +_PyFloat_Format2(PyFloatObject *v, _PyUnicodeWriter *writer) +{ + Py_ssize_t len; + char *buf; + + buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v), + 'r', 0, + Py_DTSF_ADD_DOT_0, + NULL); + if (!buf) { + PyErr_NoMemory(); + return -1; + } + len = strlen(buf); + if (_PyUnicodeWriter_prepare(writer, len, 127) == -1) { + PyMem_Free(buf); + return -1; + } + _PyUnicode_CopyASCII(writer->buffer, writer->pos, + (unsigned char*)buf, len); + writer->pos += len; + PyMem_Free(buf); + return 0; +} + /* Comparison is pretty much a nightmare. When comparing float to float, * we do it as straightforwardly (and long-windedly) as conceivable, so * that, e.g., Python x == y delivers the same result as the platform @@ -1703,11 +1733,22 @@ static PyObject * float__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyFloat_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyFloat_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } PyDoc_STRVAR(float__format__doc, diff -r 3d900c9641c9 Objects/longobject.c --- a/Objects/longobject.c Tue May 08 22:24:47 2012 +0100 +++ b/Objects/longobject.c Wed May 09 01:42:32 2012 +0200 @@ -1550,20 +1550,21 @@ divrem1(PyLongObject *a, digit n, digit string. (Return value is non-shared so that callers can modify the returned value if necessary.) */ -static PyObject * -long_to_decimal_string(PyObject *aa) +static int +long_to_decimal_string_internal(PyObject *aa, + PyObject **p_output, _PyUnicodeWriter *writer) { PyLongObject *scratch, *a; PyObject *str; Py_ssize_t size, strlen, size_a, i, j; digit *pout, *pin, rem, tenpow; - unsigned char *p; int negative; + enum PyUnicode_Kind kind; a = (PyLongObject *)aa; if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1580,13 +1581,13 @@ long_to_decimal_string(PyObject *aa) if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "long is too large to format"); - return NULL; + return -1; } /* the expression size_a * PyLong_SHIFT is now safe from overflow */ size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); scratch = _PyLong_New(size); if (scratch == NULL) - return NULL; + return -1; /* convert array of base _PyLong_BASE digits in pin to an array of base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, @@ -1609,7 +1610,7 @@ long_to_decimal_string(PyObject *aa) /* check for keyboard interrupt */ SIGCHECK({ Py_DECREF(scratch); - return NULL; + return -1; }); } /* pout should have at least one digit, so that the case when a = 0 @@ -1625,41 +1626,96 @@ long_to_decimal_string(PyObject *aa) tenpow *= 10; strlen++; } - str = PyUnicode_New(strlen, '9'); - if (str == NULL) { - Py_DECREF(scratch); + if (writer) { + if (_PyUnicodeWriter_prepare(writer, strlen, '9') == -1) + return -1; + kind = writer->kind; + str = NULL; + } + else { + str = PyUnicode_New(strlen, '9'); + if (str == NULL) { + Py_DECREF(scratch); + return -1; + } + kind = PyUnicode_KIND(str); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ + else \ + p = (TYPE*)PyUnicode_DATA(str) + strlen; \ + \ + *p = '\0'; \ + /* pout[0] through pout[size-2] contribute exactly \ + _PyLong_DECIMAL_SHIFT digits each */ \ + for (i=0; i < size - 1; i++) { \ + rem = pout[i]; \ + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } \ + } \ + /* pout[size-1]: always produce at least one decimal digit */ \ + rem = pout[i]; \ + do { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } while (rem != 0); \ + \ + /* and sign */ \ + if (negative) \ + *--p = '-'; \ + \ + /* check we've counted correctly */ \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(str)); \ + } while (0) + + /* fill the string right-to-left */ + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS + + Py_DECREF(scratch); + if (writer) { + writer->pos += strlen; + } + else { + assert(_PyUnicode_CheckConsistency(str, 1)); + *p_output = (PyObject *)str; + } + return 0; +} + +static PyObject * +long_to_decimal_string(PyObject *aa) +{ + PyObject *v; + if (long_to_decimal_string_internal(aa, &v, NULL) == -1) return NULL; - } - - /* fill the string right-to-left */ - assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); - p = PyUnicode_1BYTE_DATA(str) + strlen; - *p = '\0'; - /* pout[0] through pout[size-2] contribute exactly - _PyLong_DECIMAL_SHIFT digits each */ - for (i=0; i < size - 1; i++) { - rem = pout[i]; - for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { - *--p = '0' + rem % 10; - rem /= 10; - } - } - /* pout[size-1]: always produce at least one decimal digit */ - rem = pout[i]; - do { - *--p = '0' + rem % 10; - rem /= 10; - } while (rem != 0); - - /* and sign */ - if (negative) - *--p = '-'; - - /* check we've counted correctly */ - assert(p == PyUnicode_1BYTE_DATA(str)); - assert(_PyUnicode_CheckConsistency(str, 1)); - Py_DECREF(scratch); - return (PyObject *)str; + return v; +} + +int +_PyLong_Format2(PyObject *aa, _PyUnicodeWriter *writer) +{ + return long_to_decimal_string_internal(aa, NULL, writer); } /* Convert a long int object to a string, using a given conversion base, @@ -4232,11 +4288,21 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyLong_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } /* Return a pair (q, r) such that a = b * q + r, and diff -r 3d900c9641c9 Objects/stringlib/unicode_format.h --- a/Objects/stringlib/unicode_format.h Tue May 08 22:24:47 2012 +0100 +++ b/Objects/stringlib/unicode_format.h Wed May 09 01:42:32 2012 +0200 @@ -492,23 +492,26 @@ error: render_field calls fieldobj.__format__(format_spec) method, and appends to the output. + + Return 1 on success, raise an exception and return 1 on error. */ static int -render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *writer) +render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) { int ok = 0; PyObject *result = NULL; PyObject *format_spec_object = NULL; - PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; + int (*formatter) (PyObject *, PyObject *, Py_ssize_t, Py_ssize_t, _PyUnicodeWriter *writer) = NULL; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ if (PyUnicode_CheckExact(fieldobj)) - formatter = _PyUnicode_FormatAdvanced; - else if (PyLong_CheckExact(fieldobj)) - formatter =_PyLong_FormatAdvanced; + formatter = _PyUnicode_FormatWriter; + else if (PyLong_CheckExact(fieldobj)) { + formatter = _PyLong_FormatWriter; + } else if (PyFloat_CheckExact(fieldobj)) - formatter = _PyFloat_FormatAdvanced; + formatter = _PyFloat_FormatWriter; /* XXX: for 2.6, convert format_spec to the appropriate type (unicode, str) */ @@ -516,29 +519,27 @@ render_field(PyObject *fieldobj, SubStri if (formatter) { /* we know exactly which formatter will be called when __format__ is looked up, so call it directly, instead. */ - result = formatter(fieldobj, format_spec->str, - format_spec->start, format_spec->end); + ok = formatter(fieldobj, format_spec->str, + format_spec->start, format_spec->end, + writer); + return (ok == 0); } - else { - /* We need to create an object out of the pointers we have, because - __format__ takes a string/unicode object for format_spec. */ - if (format_spec->str) - format_spec_object = PyUnicode_Substring(format_spec->str, - format_spec->start, - format_spec->end); - else - format_spec_object = PyUnicode_New(0, 0); - if (format_spec_object == NULL) - goto done; - result = PyObject_Format(fieldobj, format_spec_object); - } - if (result == NULL || PyUnicode_READY(result) == -1) + /* We need to create an object out of the pointers we have, because + __format__ takes a string/unicode object for format_spec. */ + if (format_spec->str) + format_spec_object = PyUnicode_Substring(format_spec->str, + format_spec->start, + format_spec->end); + else + format_spec_object = PyUnicode_New(0, 0); + if (format_spec_object == NULL) goto done; - assert(PyUnicode_Check(result)); - - ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0); + result = PyObject_Format(fieldobj, format_spec_object); + if (result == NULL) + goto done; + ok = (_PyUnicodeWriter_write_str(writer, result) == 0); done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -803,7 +804,7 @@ do_conversion(PyObject *obj, Py_UCS4 con static int output_markup(SubString *field_name, SubString *format_spec, int format_spec_needs_expanding, Py_UCS4 conversion, - unicode_writer_t *writer, PyObject *args, PyObject *kwargs, + _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { PyObject *tmp = NULL; @@ -864,7 +865,7 @@ done: */ static int do_markup(SubString *input, PyObject *args, PyObject *kwargs, - unicode_writer_t *writer, int recursion_depth, AutoNumber *auto_number) + _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) { MarkupIterator iter; int format_spec_needs_expanding; @@ -881,9 +882,9 @@ do_markup(SubString *input, PyObject *ar &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - err = unicode_writer_write_str(writer, - literal.str, literal.start, - literal.end - literal.start); + err = _PyUnicodeWriter_write_substr(writer, + literal.str, literal.start, + literal.end - literal.start); if (err == -1) return 0; if (field_present) @@ -904,7 +905,7 @@ static PyObject * build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { - unicode_writer_t writer; + _PyUnicodeWriter writer; Py_ssize_t initlen; /* check the recursion level */ @@ -915,16 +916,16 @@ build_string(SubString *input, PyObject } initlen = PyUnicode_GET_LENGTH(input->str) + 100; - if (unicode_writer_init(&writer, initlen, 127) == -1) + if (_PyUnicodeWriter_init(&writer, initlen, 127) == -1) return NULL; if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { - unicode_writer_dealloc(&writer); + _PyUnicodeWriter_dealloc(&writer); return NULL; } - return unicode_writer_finish(&writer); + return _PyUnicodeWriter_finish(&writer); } /************************************************************************/ diff -r 3d900c9641c9 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue May 08 22:24:47 2012 +0100 +++ b/Objects/unicodeobject.c Wed May 09 01:42:32 2012 +0200 @@ -1151,11 +1151,12 @@ _copy_characters(PyObject *to, Py_ssize_ int fast; assert(PyUnicode_Check(from)); + assert(PyUnicode_IS_READY(from)); + assert(0 <= from_start); + assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); + assert(PyUnicode_Check(to)); - assert(PyUnicode_IS_READY(from)); assert(PyUnicode_IS_READY(to)); - - assert(PyUnicode_GET_LENGTH(from) >= how_many); assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); assert(0 <= how_many); @@ -1318,6 +1319,58 @@ PyUnicode_CopyCharacters(PyObject *to, P return how_many; } +void +_PyUnicode_CopyASCII(PyObject *to, Py_ssize_t to_start, + unsigned char *from, Py_ssize_t how_many) +{ + enum PyUnicode_Kind to_kind; + void *to_data; +#ifdef Py_DEBUG + Py_ssize_t i; +#endif + + assert(PyUnicode_Check(to)); + assert(PyUnicode_IS_READY(to)); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + assert(0 <= how_many); + + if (how_many == 0) + return; + + to_kind = PyUnicode_KIND(to); + to_data = PyUnicode_DATA(to); + +#ifdef Py_DEBUG + for (i=0; i < how_many; i++) + assert(from[i] <= 127); +#endif + + if (to_kind == PyUnicode_1BYTE_KIND) { + Py_MEMCPY((char*)to_data + to_kind * to_start, + from, + to_kind * how_many); + } + else if (to_kind == PyUnicode_2BYTE_KIND) + { + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS2, + from, + from + how_many, + PyUnicode_2BYTE_DATA(to) + to_start + ); + } + else + { + assert (to_kind == PyUnicode_4BYTE_KIND); + _PyUnicode_CONVERT_BYTES( + Py_UCS1, Py_UCS4, + from, + from + how_many, + PyUnicode_4BYTE_DATA(to) + to_start + ); + } +} + /* Find the maximum code point and count the number of surrogate pairs so a correct string length can be computed before converting a string to UCS4. This function counts single surrogates as a character and not as a pair. @@ -13200,37 +13253,38 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} unicode_writer_t; - Py_LOCAL_INLINE(void) -unicode_writer_update(unicode_writer_t *writer) +_PyUnicodeWriter_update(_PyUnicodeWriter *writer) { writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } -Py_LOCAL(int) -unicode_writer_init(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) +int +_PyUnicodeWriter_init(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - unicode_writer_update(writer); + if (length > 0) { + writer->buffer = PyUnicode_New(length, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_update(writer); + } + else { + writer->buffer = NULL; + writer->maxchar = maxchar; + writer->data = NULL; + /* invalid kind */ + writer->kind = PyUnicode_4BYTE_KIND + 1; + } return 0; } Py_LOCAL_INLINE(int) -unicode_writer_prepare(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) +_PyUnicodeWriter_prepare_inline(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { Py_ssize_t newlen; PyObject *newbuffer; @@ -13241,6 +13295,18 @@ unicode_writer_prepare(unicode_writer_t } newlen = writer->pos + length; + if (writer->buffer == NULL) { + if (newlen == 0) + return 0; + + maxchar = Py_MAX(writer->maxchar, maxchar); + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_update(writer); + return 0; + } + if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { /* overallocate 25% to limit the number of resize */ if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) @@ -13261,19 +13327,26 @@ unicode_writer_prepare(unicode_writer_t return -1; } writer->buffer = newbuffer; - unicode_writer_update(writer); + _PyUnicodeWriter_update(writer); } else if (maxchar > writer->maxchar) { if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) return -1; - unicode_writer_update(writer); + _PyUnicodeWriter_update(writer); } return 0; } +int +_PyUnicodeWriter_prepare(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) +{ + return _PyUnicodeWriter_prepare_inline(writer, length, maxchar); +} + Py_LOCAL_INLINE(int) -unicode_writer_write_str( - unicode_writer_t *writer, +_PyUnicodeWriter_write_substr_inline( + _PyUnicodeWriter *writer, PyObject *str, Py_ssize_t start, Py_ssize_t length) { Py_UCS4 maxchar; @@ -13290,9 +13363,10 @@ unicode_writer_write_str( return 0; maxchar = _PyUnicode_FindMaxChar(str, start, start + length); - if (unicode_writer_prepare(writer, length, maxchar) == -1) - return -1; - + if (_PyUnicodeWriter_prepare_inline(writer, length, maxchar) == -1) + return -1; + + assert(writer->buffer != NULL); assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); copy_characters(writer->buffer, writer->pos, str, start, length); @@ -13300,31 +13374,96 @@ unicode_writer_write_str( return 0; } +int +_PyUnicodeWriter_write_substr( + _PyUnicodeWriter *writer, + PyObject *str, Py_ssize_t start, Py_ssize_t length) +{ + return _PyUnicodeWriter_write_substr_inline(writer, str, start, length); +} + Py_LOCAL_INLINE(int) -unicode_writer_write_char( - unicode_writer_t *writer, +_PyUnicodeWriter_write_str_inline( + _PyUnicodeWriter *writer, + PyObject *str) +{ + Py_UCS4 maxchar; + Py_ssize_t length; + + assert(str != NULL); + assert(PyUnicode_Check(str)); + if (PyUnicode_READY(str) == -1) + return -1; + length = PyUnicode_GET_LENGTH(str); + if (length == 0) + return 0; + + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (_PyUnicodeWriter_prepare_inline(writer, length, maxchar) == -1) + return -1; + + assert(writer->buffer != NULL); + assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); + copy_characters(writer->buffer, writer->pos, + str, 0, length); + writer->pos += length; + return 0; +} + +int +_PyUnicodeWriter_write_str( + _PyUnicodeWriter *writer, + PyObject *str) +{ + return _PyUnicodeWriter_write_str_inline(writer, str); +} + +Py_LOCAL_INLINE(int) +_PyUnicodeWriter_write_char_inline( + _PyUnicodeWriter *writer, Py_UCS4 ch) { - if (unicode_writer_prepare(writer, 1, ch) == -1) - return -1; + if (_PyUnicodeWriter_prepare_inline(writer, 1, ch) == -1) + return -1; + assert(writer->buffer != NULL); assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer)); PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); writer->pos += 1; return 0; } -Py_LOCAL(PyObject *) -unicode_writer_finish(unicode_writer_t *writer) -{ +int +_PyUnicodeWriter_write_char( + _PyUnicodeWriter *writer, + Py_UCS4 ch) +{ + return _PyUnicodeWriter_write_char_inline(writer, ch); +} + +/* Use the inlined version in unicodeobject.c */ +#define _PyUnicodeWriter_prepare _PyUnicodeWriter_prepare_inline +#define _PyUnicodeWriter_write_substr _PyUnicodeWriter_write_substr_inline +#define _PyUnicodeWriter_write_str _PyUnicodeWriter_write_str_inline +#define _PyUnicodeWriter_write_char _PyUnicodeWriter_write_char_inline + +PyObject * +_PyUnicodeWriter_finish(_PyUnicodeWriter *writer) +{ + if (writer->buffer == NULL) { + assert(writer->pos == 0); + Py_INCREF(unicode_empty); + return unicode_empty; + } if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { Py_DECREF(writer->buffer); return NULL; } + assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); return writer->buffer; } -Py_LOCAL(void) -unicode_writer_dealloc(unicode_writer_t *writer) +void +_PyUnicodeWriter_dealloc(_PyUnicodeWriter *writer) { Py_CLEAR(writer->buffer); } @@ -13346,14 +13485,27 @@ The substitutions are identified by brac static PyObject * unicode__format__(PyObject* self, PyObject* args) { - PyObject *format_spec, *out; + PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; + Py_ssize_t len; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; + if (PyUnicode_READY(self) == -1) + return NULL; + len = PyUnicode_GET_LENGTH(self); + if (_PyUnicodeWriter_init(&writer, len + 100, 127) == -1) + return NULL; + ret = _PyUnicode_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } PyDoc_STRVAR(p_format__doc__, @@ -13789,7 +13941,7 @@ PyUnicode_Format(PyObject *format, PyObj PyObject *uformat; void *fmt; enum PyUnicode_Kind kind, fmtkind; - unicode_writer_t writer; + _PyUnicodeWriter writer; if (format == NULL || args == NULL) { PyErr_BadInternalCall(); @@ -13806,7 +13958,7 @@ PyUnicode_Format(PyObject *format, PyObj fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtpos = 0; - if (unicode_writer_init(&writer, fmtcnt + 100, 127) < 0) + if (_PyUnicodeWriter_init(&writer, fmtcnt + 100, 127) < 0) goto onError; if (PyTuple_Check(args)) { @@ -13832,7 +13984,7 @@ PyUnicode_Format(PyObject *format, PyObj } if (fmtcnt < 0) fmtpos--; - if (unicode_writer_write_str(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) + if (_PyUnicodeWriter_write_substr(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) goto onError; } else { @@ -13990,7 +14142,7 @@ PyUnicode_Format(PyObject *format, PyObj } if (c == '%') { - if (unicode_writer_write_char(&writer, '%') < 0) + if (_PyUnicodeWriter_write_char(&writer, '%') < 0) goto onError; continue; } @@ -14127,7 +14279,7 @@ PyUnicode_Format(PyObject *format, PyObj width = len; if (sign) { if (fill != ' ') { - if (unicode_writer_write_char(&writer, signchar) < 0) + if (_PyUnicodeWriter_write_char(&writer, signchar) < 0) goto onError; } if (width > len) @@ -14137,7 +14289,7 @@ PyUnicode_Format(PyObject *format, PyObj assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); if (fill != ' ') { - if (unicode_writer_prepare(&writer, 2, 127) < 0) + if (_PyUnicodeWriter_prepare(&writer, 2, 127) < 0) goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); @@ -14152,7 +14304,7 @@ PyUnicode_Format(PyObject *format, PyObj if (width > len && !(flags & F_LJUST)) { Py_ssize_t sublen; sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, fill) < 0) + if (_PyUnicodeWriter_prepare(&writer, sublen, fill) < 0) goto onError; FILL(writer.kind, writer.data, fill, writer.pos, sublen); writer.pos += sublen; @@ -14160,14 +14312,14 @@ PyUnicode_Format(PyObject *format, PyObj } if (fill == ' ') { if (sign) { - if (unicode_writer_write_char(&writer, signchar) < 0) + if (_PyUnicodeWriter_write_char(&writer, signchar) < 0) goto onError; } if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - if (unicode_writer_prepare(&writer, 2, 127) < 0) + if (_PyUnicodeWriter_prepare(&writer, 2, 127) < 0) goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); @@ -14178,11 +14330,11 @@ PyUnicode_Format(PyObject *format, PyObj } /* Copy all characters, preserving len */ - if (unicode_writer_write_str(&writer, temp, pindex, len) < 0) + if (_PyUnicodeWriter_write_substr(&writer, temp, pindex, len) < 0) goto onError; if (width > len) { Py_ssize_t sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, ' ') < 0) + if (_PyUnicodeWriter_prepare(&writer, sublen, ' ') < 0) goto onError; FILL(writer.kind, writer.data, ' ', writer.pos, sublen); writer.pos += sublen; @@ -14207,13 +14359,13 @@ PyUnicode_Format(PyObject *format, PyObj Py_DECREF(uformat); Py_XDECREF(temp); Py_XDECREF(second); - return unicode_writer_finish(&writer); + return _PyUnicodeWriter_finish(&writer); onError: Py_DECREF(uformat); Py_XDECREF(temp); Py_XDECREF(second); - unicode_writer_dealloc(&writer); + _PyUnicodeWriter_dealloc(&writer); if (args_owned) { Py_DECREF(args); } diff -r 3d900c9641c9 Python/formatter_unicode.c --- a/Python/formatter_unicode.c Tue May 08 22:24:47 2012 +0100 +++ b/Python/formatter_unicode.c Wed May 09 01:42:32 2012 +0200 @@ -316,21 +316,31 @@ calc_padding(Py_ssize_t nchars, Py_ssize /* Do the padding, and return a pointer to where the caller-supplied content goes. */ static Py_ssize_t -fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, +fill_padding(_PyUnicodeWriter *writer, Py_ssize_t start, Py_ssize_t nchars, Py_UCS4 fill_char, Py_ssize_t n_lpadding, Py_ssize_t n_rpadding) { + Py_ssize_t pos, r; + /* Pad on left. */ - if (n_lpadding) - PyUnicode_Fill(s, start, start + n_lpadding, fill_char); + if (n_lpadding) { + pos = writer->pos + start; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_lpadding, fill_char); + if (r == -1) + return -1; + } /* Pad on right. */ - if (n_rpadding) - PyUnicode_Fill(s, start + nchars + n_lpadding, - start + nchars + n_lpadding + n_rpadding, fill_char); + if (n_rpadding) { + pos = writer->pos + start + nchars + n_lpadding; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_rpadding, fill_char); + if (r == -1) + return -1; + } /* Pointer to the user content. */ - return start + n_lpadding; + writer->pos += (start + n_lpadding); + return 0; } /************************************************************************/ @@ -541,7 +551,7 @@ calc_number_widths(NumberFieldWidths *sp as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int -fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, +fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, @@ -549,36 +559,41 @@ fill_number(PyObject *out, Py_ssize_t po { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; - unsigned int kind = PyUnicode_KIND(out); - void *data = PyUnicode_DATA(out); + const unsigned int kind = writer->kind; + const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char); - pos += spec->n_lpadding; + r = PyUnicode_Fill(writer->buffer, writer->pos, spec->n_lpadding, fill_char); + if (r == -1) + return -1; + writer->pos += r; } if (spec->n_sign == 1) { - PyUnicode_WRITE(kind, data, pos++, spec->sign); + if (_PyUnicodeWriter_write_char(writer, spec->sign) == -1) + return -1; } if (spec->n_prefix) { - if (PyUnicode_CopyCharacters(out, pos, + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, prefix, p_start, spec->n_prefix) < 0) return -1; if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_prefix; + writer->pos += spec->n_prefix; } if (spec->n_spadding) { - PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char); - pos += spec->n_spadding; + r = PyUnicode_Fill(writer->buffer, writer->pos, spec->n_spadding, fill_char); + if (r == -1) + return -1; + writer->pos += r; } /* Only for type 'c' special case, it has no digits. */ @@ -594,7 +609,7 @@ fill_number(PyObject *out, Py_ssize_t po return -1; } r = _PyUnicode_InsertThousandsGrouping( - out, pos, + writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, @@ -609,34 +624,34 @@ fill_number(PyObject *out, Py_ssize_t po if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_grouped_digits; + writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { - if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, locale->decimal_point, 0, spec->n_decimal) < 0) return -1; - pos += spec->n_decimal; + writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { - if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0) + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, digits, d_pos, spec->n_remainder) < 0) return -1; - pos += spec->n_remainder; + writer->pos += spec->n_remainder; d_pos += spec->n_remainder; } if (spec->n_rpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char); - pos += spec->n_rpadding; + PyUnicode_Fill(writer->buffer, writer->pos, writer->pos + spec->n_rpadding, fill_char); + writer->pos += spec->n_rpadding; } return 0; } @@ -707,17 +722,20 @@ free_locale_info(LocaleInfo *locale_info /*********** string formatting ******************************************/ /************************************************************************/ -static PyObject * -format_string_internal(PyObject *value, const InternalFormatSpec *format) +static int +format_string_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; - Py_ssize_t pos; - Py_ssize_t len = PyUnicode_GET_LENGTH(value); - PyObject *result = NULL; + Py_ssize_t len; + int result = -1; Py_UCS4 maxchar; + assert(PyUnicode_IS_READY(value)); + len = PyUnicode_GET_LENGTH(value); + /* sign is not allowed on strings */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, @@ -754,21 +772,23 @@ format_string_internal(PyObject *value, maxchar = Py_MAX(maxchar, format->fill_char); /* allocate the resulting string */ - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, total, maxchar) == -1) goto done; /* Write into that space. First the padding. */ - pos = fill_padding(result, 0, len, - format->fill_char=='\0'?' ':format->fill_char, - lpad, rpad); + result = fill_padding(writer, 0, len, + format->fill_char=='\0'?' ':format->fill_char, + lpad, rpad); + if (result == -1) + goto done; /* Then the source string. */ - if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0) - Py_CLEAR(result); + if (_PyUnicodeWriter_write_substr(writer, value, 0, len) == -1) + goto done; + writer->pos += rpad; + result = 0; done: - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -778,13 +798,12 @@ done: /************************************************************************/ typedef PyObject* -(*IntOrLongToString)(PyObject *value, int base); +(*IntOrLongToString)(PyObject *value, int base, _PyUnicodeWriter *writer); -static PyObject * -format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, - IntOrLongToString tostring) +static int +format_long_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; PyObject *tmp = NULL; Py_ssize_t inumeric_chars; @@ -798,7 +817,6 @@ format_int_or_long_internal(PyObject *va Py_ssize_t prefix = 0; NumberFieldWidths spec; long x; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -878,7 +896,7 @@ format_int_or_long_internal(PyObject *va n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = tostring(value, base); + tmp = _PyLong_Format(value, base); if (tmp == NULL || PyUnicode_READY(tmp) == -1) goto done; @@ -914,23 +932,19 @@ format_int_or_long_internal(PyObject *va &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (!result) + if (_PyUnicodeWriter_prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - tmp, inumeric_chars, inumeric_chars + n_digits, - tmp, prefix, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, format->type == 'X'); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + tmp, inumeric_chars, inumeric_chars + n_digits, + tmp, prefix, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, format->type == 'X'); done: Py_XDECREF(tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -945,9 +959,10 @@ strtounicode(char *charbuffer, Py_ssize_ } /* much of this is taken from unicodeobject.c */ -static PyObject * +static int format_float_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; @@ -962,12 +977,11 @@ format_float_internal(PyObject *value, Py_ssize_t index; NumberFieldWidths spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; Py_UCS4 sign_char = '\0'; int float_type; /* Used to see if we have a nan, inf, or regular float. */ PyObject *unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1055,24 +1069,20 @@ format_float_internal(PyObject *value, &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - unicode_tmp, index, index + n_digits, - NULL, 0, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, 0); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + unicode_tmp, index, index + n_digits, + NULL, 0, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, 0); done: PyMem_Free(buf); Py_DECREF(unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -1080,9 +1090,10 @@ done: /*********** complex formatting *****************************************/ /************************************************************************/ -static PyObject * +static int format_complex_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { double re; double im; @@ -1106,11 +1117,8 @@ format_complex_internal(PyObject *value, NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; - int rkind; - void *rdata; - Py_ssize_t index; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ @@ -1122,7 +1130,6 @@ format_complex_internal(PyObject *value, Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1261,47 +1268,47 @@ format_complex_internal(PyObject *value, if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, total, maxchar) == -1) goto done; - rkind = PyUnicode_KIND(result); - rdata = PyUnicode_DATA(result); /* Populate the memory. First, the padding. */ - index = fill_padding(result, 0, - n_re_total + n_im_total + 1 + add_parens * 2, - format->fill_char=='\0' ? ' ' : format->fill_char, - lpad, rpad); + result = fill_padding(writer, 0, + n_re_total + n_im_total + 1 + add_parens * 2, + format->fill_char=='\0' ? ' ' : format->fill_char, + lpad, rpad); + if (result == -1) + goto done; - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, '('); + if (add_parens) { + if (_PyUnicodeWriter_write_char(writer, '(') == -1) + goto done; + } if (!skip_re) { - err = fill_number(result, index, &re_spec, - re_unicode_tmp, i_re, i_re + n_re_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &re_spec, + re_unicode_tmp, i_re, i_re + n_re_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; - } - index += n_re_total; } - err = fill_number(result, index, &im_spec, - im_unicode_tmp, i_im, i_im + n_im_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &im_spec, + im_unicode_tmp, i_im, i_im + n_im_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; + if (_PyUnicodeWriter_write_char(writer, 'j') == -1) + goto done; + + if (add_parens) { + if (_PyUnicodeWriter_write_char(writer, ')') == -1) + goto done; } - index += n_im_total; - PyUnicode_WRITE(rkind, rdata, index++, 'j'); - - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, ')'); + writer->pos += rpad; + result = 0; done: PyMem_Free(re_buf); @@ -1309,61 +1316,85 @@ done: Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } /************************************************************************/ /*********** built in formatters ****************************************/ /************************************************************************/ -PyObject * -_PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) + +extern int +_PyLong_Format2(PyObject *aa, _PyUnicodeWriter *writer); +extern int +_PyFloat_Format2(PyObject *aa, _PyUnicodeWriter *writer); + +int +format_obj(PyObject *obj, _PyUnicodeWriter *writer) +{ + int result; + PyObject *str; + + str = PyObject_Str(obj); + if (str == NULL) + return -1; + result = _PyUnicodeWriter_write_str(writer, str); + Py_DECREF(str); + return result; +} + +int +_PyUnicode_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { InternalFormatSpec format; - PyObject *result; + int result; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) - return PyObject_Str(obj); + if (start == end) { + if (PyUnicode_CheckExact(obj)) + return _PyUnicodeWriter_write_str(writer, obj); + else + return format_obj(obj, writer); + } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, 's', '<')) - return NULL; + return -1; /* type conversion? */ switch (format.type) { case 's': /* no type conversion needed, already a string. do the formatting */ - result = format_string_internal(obj, &format); - if (result != NULL) - assert(_PyUnicode_CheckConsistency(result, 1)); + result = format_string_internal(obj, &format, writer); break; default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - result = NULL; + result = -1; } return result; } -static PyObject* -format_int_or_long(PyObject* obj, PyObject* format_spec, - Py_ssize_t start, Py_ssize_t end, - IntOrLongToString tostring) +int +_PyLong_FormatWriter(PyObject* obj, PyObject* format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; - PyObject *tmp = NULL; + PyObject *tmp = NULL, *str = NULL; InternalFormatSpec format; + int result = -1; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + if (PyLong_CheckExact(obj)) + return _PyLong_Format2(obj, writer); + else + return format_obj(obj, writer); } /* parse the format_spec */ @@ -1382,7 +1413,7 @@ format_int_or_long(PyObject* obj, PyObje case 'n': /* no type conversion needed, already an int (or long). do the formatting */ - result = format_int_or_long_internal(obj, &format, tostring); + result = format_long_internal(obj, &format, writer); break; case 'e': @@ -1396,7 +1427,7 @@ format_int_or_long(PyObject* obj, PyObje tmp = PyNumber_Float(obj); if (tmp == NULL) goto done; - result = format_float_internal(tmp, &format); + result = format_float_internal(tmp, &format, writer); break; default: @@ -1407,41 +1438,31 @@ format_int_or_long(PyObject* obj, PyObje done: Py_XDECREF(tmp); + Py_XDECREF(str); return result; } -/* Need to define long_format as a function that will convert a long - to a string. In 3.0, _PyLong_Format has the correct signature. */ -#define long_format _PyLong_Format - -PyObject * -_PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyFloat_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - return format_int_or_long(obj, format_spec, start, end, - long_format); -} - -PyObject * -_PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) -{ - PyObject *result = NULL; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + if (PyFloat_CheckExact(obj)) + return _PyFloat_Format2(obj, writer); + else + return format_obj(obj, writer); } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1455,38 +1476,33 @@ _PyFloat_FormatAdvanced(PyObject *obj, case 'n': case '%': /* no conversion, already a float. do the formatting */ - result = format_float_internal(obj, &format); - break; + return format_float_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; } -PyObject * -_PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyComplex_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) { - result = PyObject_Str(obj); - goto done; - } + if (start == end) + return format_obj(obj, writer); /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1499,15 +1515,12 @@ _PyComplex_FormatAdvanced(PyObject *obj, case 'G': case 'n': /* no conversion, already a complex. do the formatting */ - result = format_complex_internal(obj, &format); - break; + result = format_complex_internal(obj, &format, writer); + return result; default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; }