diff -r 01581e8b50f2 Include/complexobject.h --- a/Include/complexobject.h Mon May 07 23:50:05 2012 +0200 +++ b/Include/complexobject.h Tue May 08 00:07:29 2012 +0200 @@ -63,10 +63,11 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComp /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ #ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyComplex_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif #ifdef __cplusplus diff -r 01581e8b50f2 Include/floatobject.h --- a/Include/floatobject.h Mon May 07 23:50:05 2012 +0200 +++ b/Include/floatobject.h Tue May 08 00:07:29 2012 +0200 @@ -112,10 +112,11 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(vo /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyFloat_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff -r 01581e8b50f2 Include/longobject.h --- a/Include/longobject.h Mon May 07 23:50:05 2012 +0200 +++ b/Include/longobject.h Tue May 08 00:07:29 2012 +0200 @@ -154,11 +154,13 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLo PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); /* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); + (Advanced String Formatting). Return 0 on success, raise an exception + and return -1 on error. */ +PyAPI_FUNC(int) _PyLong_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ /* These aren't really part of the long object, but they're handy. The diff -r 01581e8b50f2 Include/unicodeobject.h --- a/Include/unicodeobject.h Mon May 07 23:50:05 2012 +0200 +++ b/Include/unicodeobject.h Tue May 08 00:07:29 2012 +0200 @@ -865,12 +865,50 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFor ); #ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t pos; +} _PyUnicodeWriter; + +PyAPI_FUNC(int) +_PyUnicodeWriter_init(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_prepare(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_char(_PyUnicodeWriter *writer, Py_UCS4 ch); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_str( + _PyUnicodeWriter *writer, + PyObject *str); + +PyAPI_FUNC(int) +_PyUnicodeWriter_write_substr( + _PyUnicodeWriter *writer, + PyObject *str, Py_ssize_t start, Py_ssize_t length); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_dealloc(_PyUnicodeWriter *writer); +#endif + +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); diff -r 01581e8b50f2 Objects/complexobject.c --- a/Objects/complexobject.c Mon May 07 23:50:05 2012 +0200 +++ b/Objects/complexobject.c Tue May 08 00:07:29 2012 +0200 @@ -699,11 +699,22 @@ static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyComplex_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + return NULL; + + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyComplex_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } #if 0 diff -r 01581e8b50f2 Objects/floatobject.c --- a/Objects/floatobject.c Mon May 07 23:50:05 2012 +0200 +++ b/Objects/floatobject.c Tue May 08 00:07:29 2012 +0200 @@ -1703,11 +1703,22 @@ static PyObject * float__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyFloat_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyFloat_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } PyDoc_STRVAR(float__format__doc, diff -r 01581e8b50f2 Objects/longobject.c --- a/Objects/longobject.c Mon May 07 23:50:05 2012 +0200 +++ b/Objects/longobject.c Tue May 08 00:07:29 2012 +0200 @@ -4232,11 +4232,21 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + if (_PyUnicodeWriter_init(&writer, 0, 0) == -1) + return NULL; + ret = _PyLong_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } /* Return a pair (q, r) such that a = b * q + r, and diff -r 01581e8b50f2 Objects/stringlib/unicode_format.h --- a/Objects/stringlib/unicode_format.h Mon May 07 23:50:05 2012 +0200 +++ b/Objects/stringlib/unicode_format.h Tue May 08 00:07:29 2012 +0200 @@ -492,23 +492,26 @@ error: render_field calls fieldobj.__format__(format_spec) method, and appends to the output. + + Return 1 on success, raise an exception and return 1 on error. */ static int -render_field(PyObject *fieldobj, SubString *format_spec, unicode_writer_t *writer) +render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer) { int ok = 0; PyObject *result = NULL; PyObject *format_spec_object = NULL; - PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; + int (*formatter) (PyObject *, PyObject *, Py_ssize_t, Py_ssize_t, _PyUnicodeWriter *writer) = NULL; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ if (PyUnicode_CheckExact(fieldobj)) - formatter = _PyUnicode_FormatAdvanced; - else if (PyLong_CheckExact(fieldobj)) - formatter =_PyLong_FormatAdvanced; + formatter = _PyUnicode_FormatWriter; + else if (PyLong_CheckExact(fieldobj)) { + formatter = _PyLong_FormatWriter; + } else if (PyFloat_CheckExact(fieldobj)) - formatter = _PyFloat_FormatAdvanced; + formatter = _PyFloat_FormatWriter; /* XXX: for 2.6, convert format_spec to the appropriate type (unicode, str) */ @@ -516,29 +519,27 @@ render_field(PyObject *fieldobj, SubStri if (formatter) { /* we know exactly which formatter will be called when __format__ is looked up, so call it directly, instead. */ - result = formatter(fieldobj, format_spec->str, - format_spec->start, format_spec->end); + ok = formatter(fieldobj, format_spec->str, + format_spec->start, format_spec->end, + writer); + return (ok == 0); } - else { - /* We need to create an object out of the pointers we have, because - __format__ takes a string/unicode object for format_spec. */ - if (format_spec->str) - format_spec_object = PyUnicode_Substring(format_spec->str, - format_spec->start, - format_spec->end); - else - format_spec_object = PyUnicode_New(0, 0); - if (format_spec_object == NULL) - goto done; - result = PyObject_Format(fieldobj, format_spec_object); - } - if (result == NULL || PyUnicode_READY(result) == -1) + /* We need to create an object out of the pointers we have, because + __format__ takes a string/unicode object for format_spec. */ + if (format_spec->str) + format_spec_object = PyUnicode_Substring(format_spec->str, + format_spec->start, + format_spec->end); + else + format_spec_object = PyUnicode_New(0, 0); + if (format_spec_object == NULL) goto done; - assert(PyUnicode_Check(result)); - - ok = (unicode_writer_write_str(writer, result, 0, PyUnicode_GET_LENGTH(result)) == 0); + result = PyObject_Format(fieldobj, format_spec_object); + if (result == NULL) + goto done; + ok = (_PyUnicodeWriter_write_str(writer, result) == 0); done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -803,7 +804,7 @@ do_conversion(PyObject *obj, Py_UCS4 con static int output_markup(SubString *field_name, SubString *format_spec, int format_spec_needs_expanding, Py_UCS4 conversion, - unicode_writer_t *writer, PyObject *args, PyObject *kwargs, + _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { PyObject *tmp = NULL; @@ -864,7 +865,7 @@ done: */ static int do_markup(SubString *input, PyObject *args, PyObject *kwargs, - unicode_writer_t *writer, int recursion_depth, AutoNumber *auto_number) + _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number) { MarkupIterator iter; int format_spec_needs_expanding; @@ -881,9 +882,9 @@ do_markup(SubString *input, PyObject *ar &field_name, &format_spec, &conversion, &format_spec_needs_expanding)) == 2) { - err = unicode_writer_write_str(writer, - literal.str, literal.start, - literal.end - literal.start); + err = _PyUnicodeWriter_write_substr(writer, + literal.str, literal.start, + literal.end - literal.start); if (err == -1) return 0; if (field_present) @@ -904,7 +905,7 @@ static PyObject * build_string(SubString *input, PyObject *args, PyObject *kwargs, int recursion_depth, AutoNumber *auto_number) { - unicode_writer_t writer; + _PyUnicodeWriter writer; Py_ssize_t initlen; /* check the recursion level */ @@ -915,16 +916,16 @@ build_string(SubString *input, PyObject } initlen = PyUnicode_GET_LENGTH(input->str) + 100; - if (unicode_writer_init(&writer, initlen, 127) == -1) + if (_PyUnicodeWriter_init(&writer, initlen, 127) == -1) return NULL; if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { - unicode_writer_dealloc(&writer); + _PyUnicodeWriter_dealloc(&writer); return NULL; } - return unicode_writer_finish(&writer); + return _PyUnicodeWriter_finish(&writer); } /************************************************************************/ diff -r 01581e8b50f2 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Mon May 07 23:50:05 2012 +0200 +++ b/Objects/unicodeobject.c Tue May 08 00:07:29 2012 +0200 @@ -13200,37 +13200,38 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} unicode_writer_t; - Py_LOCAL_INLINE(void) -unicode_writer_update(unicode_writer_t *writer) +_PyUnicodeWriter_update(_PyUnicodeWriter *writer) { writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } -Py_LOCAL(int) -unicode_writer_init(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) +int +_PyUnicodeWriter_init(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - unicode_writer_update(writer); + if (length > 0) { + writer->buffer = PyUnicode_New(length, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_update(writer); + } + else { + writer->buffer = NULL; + writer->maxchar = maxchar; + writer->data = NULL; + /* invalid kind */ + writer->kind = PyUnicode_4BYTE_KIND + 1; + } return 0; } Py_LOCAL_INLINE(int) -unicode_writer_prepare(unicode_writer_t *writer, - Py_ssize_t length, Py_UCS4 maxchar) +_PyUnicodeWriter_prepare_inline(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { Py_ssize_t newlen; PyObject *newbuffer; @@ -13241,6 +13242,18 @@ unicode_writer_prepare(unicode_writer_t } newlen = writer->pos + length; + if (writer->buffer == NULL) { + if (newlen == 0) + return 0; + + maxchar = Py_MAX(writer->maxchar, maxchar); + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_update(writer); + return 0; + } + if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { /* overallocate 25% to limit the number of resize */ if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) @@ -13261,19 +13274,26 @@ unicode_writer_prepare(unicode_writer_t return -1; } writer->buffer = newbuffer; - unicode_writer_update(writer); + _PyUnicodeWriter_update(writer); } else if (maxchar > writer->maxchar) { if (unicode_widen(&writer->buffer, writer->pos, maxchar) < 0) return -1; - unicode_writer_update(writer); + _PyUnicodeWriter_update(writer); } return 0; } +int +_PyUnicodeWriter_prepare(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) +{ + return _PyUnicodeWriter_prepare_inline(writer, length, maxchar); +} + Py_LOCAL_INLINE(int) -unicode_writer_write_str( - unicode_writer_t *writer, +_PyUnicodeWriter_write_substr_inline( + _PyUnicodeWriter *writer, PyObject *str, Py_ssize_t start, Py_ssize_t length) { Py_UCS4 maxchar; @@ -13290,9 +13310,10 @@ unicode_writer_write_str( return 0; maxchar = _PyUnicode_FindMaxChar(str, start, start + length); - if (unicode_writer_prepare(writer, length, maxchar) == -1) - return -1; - + if (_PyUnicodeWriter_prepare_inline(writer, length, maxchar) == -1) + return -1; + + assert(writer->buffer != NULL); assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); copy_characters(writer->buffer, writer->pos, str, start, length); @@ -13300,31 +13321,96 @@ unicode_writer_write_str( return 0; } +int +_PyUnicodeWriter_write_substr( + _PyUnicodeWriter *writer, + PyObject *str, Py_ssize_t start, Py_ssize_t length) +{ + return _PyUnicodeWriter_write_substr_inline(writer, str, start, length); +} + Py_LOCAL_INLINE(int) -unicode_writer_write_char( - unicode_writer_t *writer, +_PyUnicodeWriter_write_str_inline( + _PyUnicodeWriter *writer, + PyObject *str) +{ + Py_UCS4 maxchar; + Py_ssize_t length; + + assert(str != NULL); + assert(PyUnicode_Check(str)); + if (PyUnicode_READY(str) == -1) + return -1; + length = PyUnicode_GET_LENGTH(str); + if (length == 0) + return 0; + + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (_PyUnicodeWriter_prepare_inline(writer, length, maxchar) == -1) + return -1; + + assert(writer->buffer != NULL); + assert((writer->pos + length) <= PyUnicode_GET_LENGTH(writer->buffer)); + copy_characters(writer->buffer, writer->pos, + str, 0, length); + writer->pos += length; + return 0; +} + +int +_PyUnicodeWriter_write_str( + _PyUnicodeWriter *writer, + PyObject *str) +{ + return _PyUnicodeWriter_write_str_inline(writer, str); +} + +Py_LOCAL_INLINE(int) +_PyUnicodeWriter_write_char_inline( + _PyUnicodeWriter *writer, Py_UCS4 ch) { - if (unicode_writer_prepare(writer, 1, ch) == -1) - return -1; + if (_PyUnicodeWriter_prepare_inline(writer, 1, ch) == -1) + return -1; + assert(writer->buffer != NULL); assert((writer->pos + 1) <= PyUnicode_GET_LENGTH(writer->buffer)); PyUnicode_WRITE(writer->kind, writer->data, writer->pos, ch); writer->pos += 1; return 0; } -Py_LOCAL(PyObject *) -unicode_writer_finish(unicode_writer_t *writer) -{ +int +_PyUnicodeWriter_write_char( + _PyUnicodeWriter *writer, + Py_UCS4 ch) +{ + return _PyUnicodeWriter_write_char_inline(writer, ch); +} + +/* Use the inlined version in unicodeobject.c */ +#define _PyUnicodeWriter_prepare _PyUnicodeWriter_prepare_inline +#define _PyUnicodeWriter_write_substr _PyUnicodeWriter_write_substr_inline +#define _PyUnicodeWriter_write_str _PyUnicodeWriter_write_str_inline +#define _PyUnicodeWriter_write_char _PyUnicodeWriter_write_char_inline + +PyObject * +_PyUnicodeWriter_finish(_PyUnicodeWriter *writer) +{ + if (writer->buffer == NULL) { + assert(writer->pos == 0); + Py_INCREF(unicode_empty); + return unicode_empty; + } if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { Py_DECREF(writer->buffer); return NULL; } + assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); return writer->buffer; } -Py_LOCAL(void) -unicode_writer_dealloc(unicode_writer_t *writer) +void +_PyUnicodeWriter_dealloc(_PyUnicodeWriter *writer) { Py_CLEAR(writer->buffer); } @@ -13346,14 +13432,27 @@ The substitutions are identified by brac static PyObject * unicode__format__(PyObject* self, PyObject* args) { - PyObject *format_spec, *out; + PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; + Py_ssize_t len; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; + if (PyUnicode_READY(self) == -1) + return NULL; + len = PyUnicode_GET_LENGTH(self); + if (_PyUnicodeWriter_init(&writer, len + 100, 127) == -1) + return NULL; + ret = _PyUnicode_FormatWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_finish(&writer); } PyDoc_STRVAR(p_format__doc__, @@ -13789,7 +13888,7 @@ PyUnicode_Format(PyObject *format, PyObj PyObject *uformat; void *fmt; enum PyUnicode_Kind kind, fmtkind; - unicode_writer_t writer; + _PyUnicodeWriter writer; if (format == NULL || args == NULL) { PyErr_BadInternalCall(); @@ -13806,7 +13905,7 @@ PyUnicode_Format(PyObject *format, PyObj fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtpos = 0; - if (unicode_writer_init(&writer, fmtcnt + 100, 127) < 0) + if (_PyUnicodeWriter_init(&writer, fmtcnt + 100, 127) < 0) goto onError; if (PyTuple_Check(args)) { @@ -13832,7 +13931,7 @@ PyUnicode_Format(PyObject *format, PyObj } if (fmtcnt < 0) fmtpos--; - if (unicode_writer_write_str(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) + if (_PyUnicodeWriter_write_substr(&writer, uformat, nonfmtpos, fmtpos - nonfmtpos) < 0) goto onError; } else { @@ -13990,7 +14089,7 @@ PyUnicode_Format(PyObject *format, PyObj } if (c == '%') { - if (unicode_writer_write_char(&writer, '%') < 0) + if (_PyUnicodeWriter_write_char(&writer, '%') < 0) goto onError; continue; } @@ -14127,7 +14226,7 @@ PyUnicode_Format(PyObject *format, PyObj width = len; if (sign) { if (fill != ' ') { - if (unicode_writer_write_char(&writer, signchar) < 0) + if (_PyUnicodeWriter_write_char(&writer, signchar) < 0) goto onError; } if (width > len) @@ -14137,7 +14236,7 @@ PyUnicode_Format(PyObject *format, PyObj assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex + 1) == c); if (fill != ' ') { - if (unicode_writer_prepare(&writer, 2, 127) < 0) + if (_PyUnicodeWriter_prepare(&writer, 2, 127) < 0) goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); @@ -14152,7 +14251,7 @@ PyUnicode_Format(PyObject *format, PyObj if (width > len && !(flags & F_LJUST)) { Py_ssize_t sublen; sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, fill) < 0) + if (_PyUnicodeWriter_prepare(&writer, sublen, fill) < 0) goto onError; FILL(writer.kind, writer.data, fill, writer.pos, sublen); writer.pos += sublen; @@ -14160,14 +14259,14 @@ PyUnicode_Format(PyObject *format, PyObj } if (fill == ' ') { if (sign) { - if (unicode_writer_write_char(&writer, signchar) < 0) + if (_PyUnicodeWriter_write_char(&writer, signchar) < 0) goto onError; } if ((flags & F_ALT) && (c == 'x' || c == 'X' || c == 'o')) { assert(PyUnicode_READ(kind, pbuf, pindex) == '0'); assert(PyUnicode_READ(kind, pbuf, pindex+1) == c); - if (unicode_writer_prepare(&writer, 2, 127) < 0) + if (_PyUnicodeWriter_prepare(&writer, 2, 127) < 0) goto onError; PyUnicode_WRITE(writer.kind, writer.data, writer.pos, '0'); PyUnicode_WRITE(writer.kind, writer.data, writer.pos+1, c); @@ -14178,11 +14277,11 @@ PyUnicode_Format(PyObject *format, PyObj } /* Copy all characters, preserving len */ - if (unicode_writer_write_str(&writer, temp, pindex, len) < 0) + if (_PyUnicodeWriter_write_substr(&writer, temp, pindex, len) < 0) goto onError; if (width > len) { Py_ssize_t sublen = width - len; - if (unicode_writer_prepare(&writer, sublen, ' ') < 0) + if (_PyUnicodeWriter_prepare(&writer, sublen, ' ') < 0) goto onError; FILL(writer.kind, writer.data, ' ', writer.pos, sublen); writer.pos += sublen; @@ -14207,13 +14306,13 @@ PyUnicode_Format(PyObject *format, PyObj Py_DECREF(uformat); Py_XDECREF(temp); Py_XDECREF(second); - return unicode_writer_finish(&writer); + return _PyUnicodeWriter_finish(&writer); onError: Py_DECREF(uformat); Py_XDECREF(temp); Py_XDECREF(second); - unicode_writer_dealloc(&writer); + _PyUnicodeWriter_dealloc(&writer); if (args_owned) { Py_DECREF(args); } diff -r 01581e8b50f2 Python/formatter_unicode.c --- a/Python/formatter_unicode.c Mon May 07 23:50:05 2012 +0200 +++ b/Python/formatter_unicode.c Tue May 08 00:07:29 2012 +0200 @@ -316,21 +316,31 @@ calc_padding(Py_ssize_t nchars, Py_ssize /* Do the padding, and return a pointer to where the caller-supplied content goes. */ static Py_ssize_t -fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, +fill_padding(_PyUnicodeWriter *writer, Py_ssize_t start, Py_ssize_t nchars, Py_UCS4 fill_char, Py_ssize_t n_lpadding, Py_ssize_t n_rpadding) { + Py_ssize_t pos, r; + /* Pad on left. */ - if (n_lpadding) - PyUnicode_Fill(s, start, start + n_lpadding, fill_char); + if (n_lpadding) { + pos = writer->pos + start; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_lpadding, fill_char); + if (r == -1) + return -1; + } /* Pad on right. */ - if (n_rpadding) - PyUnicode_Fill(s, start + nchars + n_lpadding, - start + nchars + n_lpadding + n_rpadding, fill_char); + if (n_rpadding) { + pos = writer->pos + start + nchars + n_lpadding; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_rpadding, fill_char); + if (r == -1) + return -1; + } /* Pointer to the user content. */ - return start + n_lpadding; + writer->pos += (start + n_lpadding); + return 0; } /************************************************************************/ @@ -541,7 +551,7 @@ calc_number_widths(NumberFieldWidths *sp as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int -fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, +fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, @@ -549,36 +559,41 @@ fill_number(PyObject *out, Py_ssize_t po { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; - unsigned int kind = PyUnicode_KIND(out); - void *data = PyUnicode_DATA(out); + const unsigned int kind = writer->kind; + const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char); - pos += spec->n_lpadding; + r = PyUnicode_Fill(writer->buffer, writer->pos, spec->n_lpadding, fill_char); + if (r == -1) + return -1; + writer->pos += r; } if (spec->n_sign == 1) { - PyUnicode_WRITE(kind, data, pos++, spec->sign); + if (_PyUnicodeWriter_write_char(writer, spec->sign) == -1) + return -1; } if (spec->n_prefix) { - if (PyUnicode_CopyCharacters(out, pos, + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, prefix, p_start, spec->n_prefix) < 0) return -1; if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_prefix; + writer->pos += spec->n_prefix; } if (spec->n_spadding) { - PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char); - pos += spec->n_spadding; + r = PyUnicode_Fill(writer->buffer, writer->pos, spec->n_spadding, fill_char); + if (r == -1) + return -1; + writer->pos += r; } /* Only for type 'c' special case, it has no digits. */ @@ -594,7 +609,7 @@ fill_number(PyObject *out, Py_ssize_t po return -1; } r = _PyUnicode_InsertThousandsGrouping( - out, pos, + writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, @@ -609,34 +624,34 @@ fill_number(PyObject *out, Py_ssize_t po if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_grouped_digits; + writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { - if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, locale->decimal_point, 0, spec->n_decimal) < 0) return -1; - pos += spec->n_decimal; + writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { - if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0) + if (PyUnicode_CopyCharacters(writer->buffer, writer->pos, digits, d_pos, spec->n_remainder) < 0) return -1; - pos += spec->n_remainder; + writer->pos += spec->n_remainder; d_pos += spec->n_remainder; } if (spec->n_rpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char); - pos += spec->n_rpadding; + PyUnicode_Fill(writer->buffer, writer->pos, writer->pos + spec->n_rpadding, fill_char); + writer->pos += spec->n_rpadding; } return 0; } @@ -707,17 +722,20 @@ free_locale_info(LocaleInfo *locale_info /*********** string formatting ******************************************/ /************************************************************************/ -static PyObject * -format_string_internal(PyObject *value, const InternalFormatSpec *format) +static int +format_string_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; - Py_ssize_t pos; - Py_ssize_t len = PyUnicode_GET_LENGTH(value); - PyObject *result = NULL; + Py_ssize_t len; + int result = -1; Py_UCS4 maxchar; + assert(PyUnicode_IS_READY(value)); + len = PyUnicode_GET_LENGTH(value); + /* sign is not allowed on strings */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, @@ -754,21 +772,23 @@ format_string_internal(PyObject *value, maxchar = Py_MAX(maxchar, format->fill_char); /* allocate the resulting string */ - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, total, maxchar) == -1) goto done; /* Write into that space. First the padding. */ - pos = fill_padding(result, 0, len, - format->fill_char=='\0'?' ':format->fill_char, - lpad, rpad); + result = fill_padding(writer, 0, len, + format->fill_char=='\0'?' ':format->fill_char, + lpad, rpad); + if (result == -1) + goto done; /* Then the source string. */ - if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0) - Py_CLEAR(result); + if (_PyUnicodeWriter_write_substr(writer, value, 0, len) == -1) + goto done; + writer->pos += rpad; + result = 0; done: - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -778,13 +798,12 @@ done: /************************************************************************/ typedef PyObject* -(*IntOrLongToString)(PyObject *value, int base); +(*IntOrLongToString)(PyObject *value, int base, _PyUnicodeWriter *writer); -static PyObject * -format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, - IntOrLongToString tostring) +static int +format_long_internal(PyObject *value, const InternalFormatSpec *format, _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; PyObject *tmp = NULL; Py_ssize_t inumeric_chars; @@ -798,7 +817,6 @@ format_int_or_long_internal(PyObject *va Py_ssize_t prefix = 0; NumberFieldWidths spec; long x; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -878,7 +896,7 @@ format_int_or_long_internal(PyObject *va n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = tostring(value, base); + tmp = _PyLong_Format(value, base); if (tmp == NULL || PyUnicode_READY(tmp) == -1) goto done; @@ -914,23 +932,19 @@ format_int_or_long_internal(PyObject *va &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (!result) + if (_PyUnicodeWriter_prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - tmp, inumeric_chars, inumeric_chars + n_digits, - tmp, prefix, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, format->type == 'X'); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + tmp, inumeric_chars, inumeric_chars + n_digits, + tmp, prefix, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, format->type == 'X'); done: Py_XDECREF(tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -945,9 +959,10 @@ strtounicode(char *charbuffer, Py_ssize_ } /* much of this is taken from unicodeobject.c */ -static PyObject * +static int format_float_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; @@ -962,12 +977,11 @@ format_float_internal(PyObject *value, Py_ssize_t index; NumberFieldWidths spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; Py_UCS4 sign_char = '\0'; int float_type; /* Used to see if we have a nan, inf, or regular float. */ PyObject *unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1055,24 +1069,20 @@ format_float_internal(PyObject *value, &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - unicode_tmp, index, index + n_digits, - NULL, 0, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, 0); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + unicode_tmp, index, index + n_digits, + NULL, 0, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, 0); done: PyMem_Free(buf); Py_DECREF(unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -1080,9 +1090,10 @@ done: /*********** complex formatting *****************************************/ /************************************************************************/ -static PyObject * +static int format_complex_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { double re; double im; @@ -1106,11 +1117,8 @@ format_complex_internal(PyObject *value, NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; - int rkind; - void *rdata; - Py_ssize_t index; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ @@ -1122,7 +1130,6 @@ format_complex_internal(PyObject *value, Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1261,47 +1268,47 @@ format_complex_internal(PyObject *value, if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_prepare(writer, total, maxchar) == -1) goto done; - rkind = PyUnicode_KIND(result); - rdata = PyUnicode_DATA(result); /* Populate the memory. First, the padding. */ - index = fill_padding(result, 0, - n_re_total + n_im_total + 1 + add_parens * 2, - format->fill_char=='\0' ? ' ' : format->fill_char, - lpad, rpad); + result = fill_padding(writer, 0, + n_re_total + n_im_total + 1 + add_parens * 2, + format->fill_char=='\0' ? ' ' : format->fill_char, + lpad, rpad); + if (result == -1) + goto done; - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, '('); + if (add_parens) { + if (_PyUnicodeWriter_write_char(writer, '(') == -1) + goto done; + } if (!skip_re) { - err = fill_number(result, index, &re_spec, - re_unicode_tmp, i_re, i_re + n_re_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &re_spec, + re_unicode_tmp, i_re, i_re + n_re_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; - } - index += n_re_total; } - err = fill_number(result, index, &im_spec, - im_unicode_tmp, i_im, i_im + n_im_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &im_spec, + im_unicode_tmp, i_im, i_im + n_im_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; + if (_PyUnicodeWriter_write_char(writer, 'j') == -1) + goto done; + + if (add_parens) { + if (_PyUnicodeWriter_write_char(writer, ')') == -1) + goto done; } - index += n_im_total; - PyUnicode_WRITE(rkind, rdata, index++, 'j'); - - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, ')'); + writer->pos += rpad; + result = 0; done: PyMem_Free(re_buf); @@ -1309,60 +1316,67 @@ done: Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } /************************************************************************/ /*********** built in formatters ****************************************/ /************************************************************************/ -PyObject * -_PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyUnicode_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { InternalFormatSpec format; - PyObject *result; + int result; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) - return PyObject_Str(obj); + if (start == end) { + PyObject *str = PyObject_Str(obj); + if (str == NULL) + return -1; + result = _PyUnicodeWriter_write_str(writer, str); + Py_DECREF(str); + return result; + } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, 's', '<')) - return NULL; + return -1; /* type conversion? */ switch (format.type) { case 's': /* no type conversion needed, already a string. do the formatting */ - result = format_string_internal(obj, &format); - if (result != NULL) - assert(_PyUnicode_CheckConsistency(result, 1)); + result = format_string_internal(obj, &format, writer); break; default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - result = NULL; + result = -1; } return result; } -static PyObject* -format_int_or_long(PyObject* obj, PyObject* format_spec, - Py_ssize_t start, Py_ssize_t end, - IntOrLongToString tostring) +int +_PyLong_FormatWriter(PyObject* obj, PyObject* format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; - PyObject *tmp = NULL; + PyObject *tmp = NULL, *str = NULL; InternalFormatSpec format; + int result = -1; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); + PyObject *str = PyObject_Str(obj); + if (str == NULL) + goto done; + result = _PyUnicodeWriter_write_str(writer, str); goto done; } @@ -1382,7 +1396,7 @@ format_int_or_long(PyObject* obj, PyObje case 'n': /* no type conversion needed, already an int (or long). do the formatting */ - result = format_int_or_long_internal(obj, &format, tostring); + result = format_long_internal(obj, &format, writer); break; case 'e': @@ -1396,7 +1410,7 @@ format_int_or_long(PyObject* obj, PyObje tmp = PyNumber_Float(obj); if (tmp == NULL) goto done; - result = format_float_internal(tmp, &format); + result = format_float_internal(tmp, &format, writer); break; default: @@ -1407,41 +1421,34 @@ format_int_or_long(PyObject* obj, PyObje done: Py_XDECREF(tmp); + Py_XDECREF(str); return result; } -/* Need to define long_format as a function that will convert a long - to a string. In 3.0, _PyLong_Format has the correct signature. */ -#define long_format _PyLong_Format - -PyObject * -_PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyFloat_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - return format_int_or_long(obj, format_spec, start, end, - long_format); -} - -PyObject * -_PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) -{ - PyObject *result = NULL; + int result; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + PyObject *str = PyObject_Str(obj); + if (str == NULL) + return -1; + result = _PyUnicodeWriter_write_str(writer, str); + Py_DECREF(str); + return result; } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1455,38 +1462,39 @@ _PyFloat_FormatAdvanced(PyObject *obj, case 'n': case '%': /* no conversion, already a float. do the formatting */ - result = format_float_internal(obj, &format); - break; + return format_float_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; } -PyObject * -_PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyComplex_FormatWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + PyObject *str = PyObject_Str(obj); + if (str == NULL) + return -1; + result = _PyUnicodeWriter_write_str(writer, str); + Py_DECREF(str); + return result; } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1499,15 +1507,12 @@ _PyComplex_FormatAdvanced(PyObject *obj, case 'G': case 'n': /* no conversion, already a complex. do the formatting */ - result = format_complex_internal(obj, &format); - break; + result = format_complex_internal(obj, &format, writer); + return result; default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; }