diff -r f4f2139202c5 Include/complexobject.h --- a/Include/complexobject.h Wed May 23 23:17:22 2012 +0200 +++ b/Include/complexobject.h Wed May 23 23:45:27 2012 +0200 @@ -63,10 +63,12 @@ PyAPI_FUNC(Py_complex) PyComplex_AsCComp /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ #ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyComplex_FormatAdvancedWriter( + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif #ifdef __cplusplus diff -r f4f2139202c5 Include/floatobject.h --- a/Include/floatobject.h Wed May 23 23:17:22 2012 +0200 +++ b/Include/floatobject.h Wed May 23 23:45:27 2012 +0200 @@ -112,10 +112,12 @@ PyAPI_FUNC(int) PyFloat_ClearFreeList(vo /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyFloat_FormatAdvancedWriter( + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff -r f4f2139202c5 Include/longobject.h --- a/Include/longobject.h Wed May 23 23:17:22 2012 +0200 +++ b/Include/longobject.h Wed May 23 23:45:27 2012 +0200 @@ -153,12 +153,20 @@ PyAPI_FUNC(int) _PyLong_AsByteArray(PyLo appending a base prefix of 0[box] if base is 2, 8 or 16. */ PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base); +PyAPI_FUNC(int) _PyLong_FormatWriter( + PyObject *aa, + int base, + int alternate, + _PyUnicodeWriter *writer); + /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter( + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif /* Py_LIMITED_API */ /* These aren't really part of the long object, but they're handy. The diff -r f4f2139202c5 Include/unicodeobject.h --- a/Include/unicodeobject.h Wed May 23 23:17:22 2012 +0200 +++ b/Include/unicodeobject.h Wed May 23 23:45:27 2012 +0200 @@ -648,6 +648,17 @@ PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCha Py_ssize_t from_start, Py_ssize_t how_many ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check + arguments and so may crash parameters are invalid (e.g. if the output + string is too short). */ +PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); #endif /* Fill a string with a character: write fill_char into @@ -865,12 +876,68 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFor ); #ifndef Py_LIMITED_API +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + /* minimum length of the buffer when overallocation is enabled, + see _PyUnicodeWriter_Init() */ + Py_ssize_t min_length; + struct { + unsigned char overallocate:1; + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly:1; + } flags; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + + min_length is used by _PyUnicodeWriter_Prepare() as the minimum length of + the buffer when overallocation is enabled (overallocate=1) */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length); + +/* Prepare the buffer for to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str); + +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); +#endif + +#ifndef Py_LIMITED_API /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end, + _PyUnicodeWriter *writer); #endif PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); diff -r f4f2139202c5 Objects/complexobject.c --- a/Objects/complexobject.c Wed May 23 23:17:22 2012 +0200 +++ b/Objects/complexobject.c Wed May 23 23:45:27 2012 +0200 @@ -699,11 +699,22 @@ static PyObject * complex__format__(PyObject* self, PyObject* args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) - return NULL; - return _PyComplex_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + return NULL; + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyComplex_FormatAdvancedWriter( + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } #if 0 diff -r f4f2139202c5 Objects/floatobject.c --- a/Objects/floatobject.c Wed May 23 23:17:22 2012 +0200 +++ b/Objects/floatobject.c Wed May 23 23:45:27 2012 +0200 @@ -273,7 +273,8 @@ float_repr(PyFloatObject *v) NULL); if (!buf) return PyErr_NoMemory(); - result = PyUnicode_FromString(buf); + result = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, + buf, strlen(buf)); PyMem_Free(buf); return result; } @@ -1703,11 +1704,22 @@ static PyObject * float__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyFloat_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyFloat_FormatAdvancedWriter( + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(float__format__doc, diff -r f4f2139202c5 Objects/longobject.c --- a/Objects/longobject.c Wed May 23 23:17:22 2012 +0200 +++ b/Objects/longobject.c Wed May 23 23:45:27 2012 +0200 @@ -1550,20 +1550,22 @@ divrem1(PyLongObject *a, digit n, digit string. (Return value is non-shared so that callers can modify the returned value if necessary.) */ -static PyObject * -long_to_decimal_string(PyObject *aa) +static int +long_to_decimal_string_internal(PyObject *aa, + PyObject **p_output, + _PyUnicodeWriter *writer) { PyLongObject *scratch, *a; PyObject *str; Py_ssize_t size, strlen, size_a, i, j; digit *pout, *pin, rem, tenpow; - unsigned char *p; int negative; + enum PyUnicode_Kind kind; a = (PyLongObject *)aa; if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1580,13 +1582,13 @@ long_to_decimal_string(PyObject *aa) if (size_a > PY_SSIZE_T_MAX / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "long is too large to format"); - return NULL; + return -1; } /* the expression size_a * PyLong_SHIFT is now safe from overflow */ size = 1 + size_a * PyLong_SHIFT / (3 * _PyLong_DECIMAL_SHIFT); scratch = _PyLong_New(size); if (scratch == NULL) - return NULL; + return -1; /* convert array of base _PyLong_BASE digits in pin to an array of base _PyLong_DECIMAL_BASE digits in pout, following Knuth (TAOCP, @@ -1609,7 +1611,7 @@ long_to_decimal_string(PyObject *aa) /* check for keyboard interrupt */ SIGCHECK({ Py_DECREF(scratch); - return NULL; + return -1; }); } /* pout should have at least one digit, so that the case when a = 0 @@ -1625,65 +1627,113 @@ long_to_decimal_string(PyObject *aa) tenpow *= 10; strlen++; } - str = PyUnicode_New(strlen, '9'); - if (str == NULL) { - Py_DECREF(scratch); + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) + return -1; + kind = writer->kind; + str = NULL; + } + else { + str = PyUnicode_New(strlen, '9'); + if (str == NULL) { + Py_DECREF(scratch); + return -1; + } + kind = PyUnicode_KIND(str); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \ + else \ + p = (TYPE*)PyUnicode_DATA(str) + strlen; \ + \ + *p = '\0'; \ + /* pout[0] through pout[size-2] contribute exactly \ + _PyLong_DECIMAL_SHIFT digits each */ \ + for (i=0; i < size - 1; i++) { \ + rem = pout[i]; \ + for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } \ + } \ + /* pout[size-1]: always produce at least one decimal digit */ \ + rem = pout[i]; \ + do { \ + *--p = '0' + rem % 10; \ + rem /= 10; \ + } while (rem != 0); \ + \ + /* and sign */ \ + if (negative) \ + *--p = '-'; \ + \ + /* check we've counted correctly */ \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(str)); \ + } while (0) + + /* fill the string right-to-left */ + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS + + Py_DECREF(scratch); + if (writer) { + writer->pos += strlen; + } + else { + assert(_PyUnicode_CheckConsistency(str, 1)); + *p_output = (PyObject *)str; + } + return 0; +} + +static PyObject * +long_to_decimal_string(PyObject *aa) +{ + PyObject *v; + if (long_to_decimal_string_internal(aa, &v, NULL) == -1) return NULL; - } - - /* fill the string right-to-left */ - assert(PyUnicode_KIND(str) == PyUnicode_1BYTE_KIND); - p = PyUnicode_1BYTE_DATA(str) + strlen; - *p = '\0'; - /* pout[0] through pout[size-2] contribute exactly - _PyLong_DECIMAL_SHIFT digits each */ - for (i=0; i < size - 1; i++) { - rem = pout[i]; - for (j = 0; j < _PyLong_DECIMAL_SHIFT; j++) { - *--p = '0' + rem % 10; - rem /= 10; - } - } - /* pout[size-1]: always produce at least one decimal digit */ - rem = pout[i]; - do { - *--p = '0' + rem % 10; - rem /= 10; - } while (rem != 0); - - /* and sign */ - if (negative) - *--p = '-'; - - /* check we've counted correctly */ - assert(p == PyUnicode_1BYTE_DATA(str)); - assert(_PyUnicode_CheckConsistency(str, 1)); - Py_DECREF(scratch); - return (PyObject *)str; + return v; } /* Convert a long int object to a string, using a given conversion base, - which should be one of 2, 8, 10 or 16. Return a string object. - If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. */ - -PyObject * -_PyLong_Format(PyObject *aa, int base) + which should be one of 2, 8 or 16. Return a string object. + If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x' + if alternate is nonzero. */ + +static int +long_format_binary(PyObject *aa, int base, int alternate, + PyObject **p_output, _PyUnicodeWriter *writer) { register PyLongObject *a = (PyLongObject *)aa; PyObject *v; Py_ssize_t sz; Py_ssize_t size_a; - Py_UCS1 *p; + enum PyUnicode_Kind kind; int negative; int bits; - assert(base == 2 || base == 8 || base == 10 || base == 16); - if (base == 10) - return long_to_decimal_string((PyObject *)a); - + assert(base == 2 || base == 8 || base == 16); if (a == NULL || !PyLong_Check(a)) { PyErr_BadInternalCall(); - return NULL; + return -1; } size_a = ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; @@ -1706,7 +1756,7 @@ _PyLong_Format(PyObject *aa, int base) /* Compute exact length 'sz' of output string. */ if (size_a == 0) { - sz = 3; + sz = 1; } else { Py_ssize_t size_a_in_bits; @@ -1714,56 +1764,126 @@ _PyLong_Format(PyObject *aa, int base) if (size_a > (PY_SSIZE_T_MAX - 3) / PyLong_SHIFT) { PyErr_SetString(PyExc_OverflowError, "int is too large to format"); - return NULL; + return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + bits_in_digit(a->ob_digit[size_a - 1]); - /* Allow 2 characters for prefix and 1 for a '-' sign. */ - sz = 2 + negative + (size_a_in_bits + (bits - 1)) / bits; - } - - v = PyUnicode_New(sz, 'x'); - if (v == NULL) { + /* Allow 1 character for a '-' sign. */ + sz = negative + (size_a_in_bits + (bits - 1)) / bits; + } + if (alternate) { + /* 2 characters for prefix */ + sz += 2; + } + + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, sz, 'x') == -1) + return -1; + kind = writer->kind; + v = NULL; + } + else { + v = PyUnicode_New(sz, 'x'); + if (v == NULL) + return -1; + kind = PyUnicode_KIND(v); + } + +#define WRITE_DIGITS(TYPE) \ + do { \ + if (writer) \ + p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \ + else \ + p = (TYPE*)PyUnicode_DATA(v) + sz; \ + \ + if (size_a == 0) { \ + *--p = '0'; \ + } \ + else { \ + /* JRH: special case for power-of-2 bases */ \ + twodigits accum = 0; \ + int accumbits = 0; /* # of bits in accum */ \ + Py_ssize_t i; \ + for (i = 0; i < size_a; ++i) { \ + accum |= (twodigits)a->ob_digit[i] << accumbits; \ + accumbits += PyLong_SHIFT; \ + assert(accumbits >= bits); \ + do { \ + char cdigit; \ + cdigit = (char)(accum & (base - 1)); \ + cdigit += (cdigit < 10) ? '0' : 'a'-10; \ + *--p = cdigit; \ + accumbits -= bits; \ + accum >>= bits; \ + } while (i < size_a-1 ? accumbits >= bits : accum > 0); \ + } \ + } \ + \ + if (alternate) { \ + if (base == 16) \ + *--p = 'x'; \ + else if (base == 8) \ + *--p = 'o'; \ + else /* (base == 2) */ \ + *--p = 'b'; \ + *--p = '0'; \ + } \ + if (negative) \ + *--p = '-'; \ + if (writer) \ + assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \ + else \ + assert(p == (TYPE*)PyUnicode_DATA(v)); \ + } while (0) + + if (kind == PyUnicode_1BYTE_KIND) { + Py_UCS1 *p; + WRITE_DIGITS(Py_UCS1); + } + else if (kind == PyUnicode_2BYTE_KIND) { + Py_UCS2 *p; + WRITE_DIGITS(Py_UCS2); + } + else { + assert (kind == PyUnicode_4BYTE_KIND); + Py_UCS4 *p; + WRITE_DIGITS(Py_UCS4); + } +#undef WRITE_DIGITS + + if (writer) { + writer->pos += sz; + } + else { + assert(_PyUnicode_CheckConsistency(v, 1)); + *p_output = v; + } + return 0; +} + +PyObject * +_PyLong_Format(PyObject *aa, int base) +{ + PyObject *str; + int err; + if (base == 10) + err = long_to_decimal_string_internal(aa, &str, NULL); + else + err = long_format_binary(aa, base, 1, &str, NULL); + if (err == -1) return NULL; - } - assert(PyUnicode_KIND(v) == PyUnicode_1BYTE_KIND); - - p = PyUnicode_1BYTE_DATA(v) + sz; - if (size_a == 0) { - *--p = '0'; - } - else { - /* JRH: special case for power-of-2 bases */ - twodigits accum = 0; - int accumbits = 0; /* # of bits in accum */ - Py_ssize_t i; - for (i = 0; i < size_a; ++i) { - accum |= (twodigits)a->ob_digit[i] << accumbits; - accumbits += PyLong_SHIFT; - assert(accumbits >= bits); - do { - char cdigit; - cdigit = (char)(accum & (base - 1)); - cdigit += (cdigit < 10) ? '0' : 'a'-10; - *--p = cdigit; - accumbits -= bits; - accum >>= bits; - } while (i < size_a-1 ? accumbits >= bits : accum > 0); - } - } - - if (base == 16) - *--p = 'x'; - else if (base == 8) - *--p = 'o'; - else /* (base == 2) */ - *--p = 'b'; - *--p = '0'; - if (negative) - *--p = '-'; - assert(p == PyUnicode_1BYTE_DATA(v)); - assert(_PyUnicode_CheckConsistency(v, 1)); - return v; + return str; +} + +int +_PyLong_FormatWriter(PyObject *aa, + int base, int alternate, + _PyUnicodeWriter *writer) +{ + if (base == 10) + return long_to_decimal_string_internal(aa, NULL, writer); + else + return long_format_binary(aa, base, alternate, NULL, writer); } /* Table of digit values for 8-bit string -> integer conversion. @@ -4232,11 +4352,22 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - return _PyLong_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); + + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyLong_FormatAdvancedWriter( + self, + format_spec, 0, PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } /* Return a pair (q, r) such that a = b * q + r, and diff -r f4f2139202c5 Objects/stringlib/unicode_format.h --- a/Objects/stringlib/unicode_format.h Wed May 23 23:17:22 2012 +0200 +++ b/Objects/stringlib/unicode_format.h Wed May 23 23:45:27 2012 +0200 @@ -499,26 +499,27 @@ render_field(PyObject *fieldobj, SubStri int ok = 0; PyObject *result = NULL; PyObject *format_spec_object = NULL; - PyObject *(*formatter)(PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL; - Py_ssize_t len; + int (*formatter) (PyObject *, PyObject *, Py_ssize_t, Py_ssize_t, _PyUnicodeWriter*) = NULL; + int err; /* If we know the type exactly, skip the lookup of __format__ and just call the formatter directly. */ if (PyUnicode_CheckExact(fieldobj)) - formatter = _PyUnicode_FormatAdvanced; + formatter = _PyUnicode_FormatAdvancedWriter; else if (PyLong_CheckExact(fieldobj)) - formatter =_PyLong_FormatAdvanced; + formatter = _PyLong_FormatAdvancedWriter; else if (PyFloat_CheckExact(fieldobj)) - formatter = _PyFloat_FormatAdvanced; - - /* XXX: for 2.6, convert format_spec to the appropriate type - (unicode, str) */ + formatter = _PyFloat_FormatAdvancedWriter; + else if (PyComplex_CheckExact(fieldobj)) + formatter = _PyComplex_FormatAdvancedWriter; if (formatter) { /* we know exactly which formatter will be called when __format__ is looked up, so call it directly, instead. */ - result = formatter(fieldobj, format_spec->str, - format_spec->start, format_spec->end); + err = formatter(fieldobj, format_spec->str, + format_spec->start, format_spec->end, + writer); + return (err == 0); } else { /* We need to create an object out of the pointers we have, because @@ -536,17 +537,11 @@ render_field(PyObject *fieldobj, SubStri } if (result == NULL) goto done; - if (PyUnicode_READY(result) == -1) + + if (_PyUnicodeWriter_WriteStr(writer, result) == -1) goto done; + ok = 1; - len = PyUnicode_GET_LENGTH(result); - if (_PyUnicodeWriter_Prepare(writer, - len, PyUnicode_MAX_CHAR_VALUE(result)) == -1) - goto done; - copy_characters(writer->buffer, writer->pos, - result, 0, len); - writer->pos += len; - ok = 1; done: Py_XDECREF(format_spec_object); Py_XDECREF(result); @@ -897,16 +892,19 @@ do_markup(SubString *input, PyObject *ar err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar); if (err == -1) return 0; - copy_characters(writer->buffer, writer->pos, - literal.str, literal.start, sublen); + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + literal.str, literal.start, sublen); writer->pos += sublen; } - if (field_present) + if (field_present) { + if (iter.str.start == iter.str.end) + writer->flags.overallocate = 0; if (!output_markup(&field_name, &format_spec, format_spec_needs_expanding, conversion, writer, args, kwargs, recursion_depth, auto_number)) return 0; + } } return result; } @@ -921,7 +919,7 @@ build_string(SubString *input, PyObject int recursion_depth, AutoNumber *auto_number) { _PyUnicodeWriter writer; - Py_ssize_t initlen; + Py_ssize_t minlen; /* check the recursion level */ if (recursion_depth <= 0) { @@ -930,9 +928,8 @@ build_string(SubString *input, PyObject return NULL; } - initlen = PyUnicode_GET_LENGTH(input->str) + 100; - if (_PyUnicodeWriter_Init(&writer, initlen, 127) == -1) - return NULL; + minlen = PyUnicode_GET_LENGTH(input->str) + 100; + _PyUnicodeWriter_Init(&writer, minlen); if (!do_markup(input, args, kwargs, &writer, recursion_depth, auto_number)) { diff -r f4f2139202c5 Objects/unicodeobject.c --- a/Objects/unicodeobject.c Wed May 23 23:17:22 2012 +0200 +++ b/Objects/unicodeobject.c Wed May 23 23:45:27 2012 +0200 @@ -225,10 +225,6 @@ const unsigned char _Py_ascii_whitespace /* forward */ static PyUnicodeObject *_PyUnicode_New(Py_ssize_t length); static PyObject* get_latin1_char(unsigned char ch); -static void copy_characters( - PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many); static int unicode_modifiable(PyObject *unicode); @@ -783,7 +779,7 @@ resize_copy(PyObject *unicode, Py_ssize_ return NULL; copy_length = Py_MIN(length, PyUnicode_GET_LENGTH(unicode)); - copy_characters(copy, 0, unicode, 0, copy_length); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, copy_length); return copy; } else { @@ -1154,15 +1150,16 @@ _copy_characters(PyObject *to, Py_ssize_ assert(0 <= from_start); assert(0 <= to_start); assert(PyUnicode_Check(from)); - assert(PyUnicode_Check(to)); assert(PyUnicode_IS_READY(from)); - assert(PyUnicode_IS_READY(to)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); - assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); if (how_many == 0) return 0; + assert(PyUnicode_Check(to)); + assert(PyUnicode_IS_READY(to)); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + from_kind = PyUnicode_KIND(from); from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); @@ -1267,10 +1264,10 @@ _copy_characters(PyObject *to, Py_ssize_ return 0; } -static void -copy_characters(PyObject *to, Py_ssize_t to_start, - PyObject *from, Py_ssize_t from_start, - Py_ssize_t how_many) +void +_PyUnicode_FastCopyCharacters( + PyObject *to, Py_ssize_t to_start, + PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many) { (void)_copy_characters(to, to_start, from, from_start, how_many, 0); } @@ -2085,7 +2082,7 @@ unicode_adjust_maxchar(PyObject **p_unic return; } copy = PyUnicode_New(len, max_char); - copy_characters(copy, 0, unicode, 0, len); + _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len); Py_DECREF(unicode); *p_unicode = copy; } @@ -2753,7 +2750,7 @@ PyUnicode_FromFormatV(const char *format (void) va_arg(vargs, char *); size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -2767,7 +2764,7 @@ PyUnicode_FromFormatV(const char *format Py_ssize_t size; assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); size = PyUnicode_GET_LENGTH(obj); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; break; } @@ -2779,13 +2776,13 @@ PyUnicode_FromFormatV(const char *format if (obj) { size = PyUnicode_GET_LENGTH(obj); assert(PyUnicode_KIND(obj) <= PyUnicode_KIND(string)); - copy_characters(string, i, obj, 0, size); + _PyUnicode_FastCopyCharacters(string, i, obj, 0, size); i += size; } else { size = PyUnicode_GET_LENGTH(*callresult); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; Py_DECREF(*callresult); } @@ -2800,7 +2797,7 @@ PyUnicode_FromFormatV(const char *format /* unused, since we already have the result */ (void) va_arg(vargs, PyObject *); assert(PyUnicode_KIND(*callresult) <= PyUnicode_KIND(string)); - copy_characters(string, i, *callresult, 0, size); + _PyUnicode_FastCopyCharacters(string, i, *callresult, 0, size); i += size; /* We're done with the unicode()/repr() => forget it */ Py_DECREF(*callresult); @@ -4171,7 +4168,7 @@ unicode_decode_call_errorhandler(const c if (unicode_widen(output, *outpos, PyUnicode_MAX_CHAR_VALUE(repunicode)) < 0) goto onError; - copy_characters(*output, *outpos, repunicode, 0, replen); + _PyUnicode_FastCopyCharacters(*output, *outpos, repunicode, 0, replen); *outpos += replen; } else { @@ -9216,12 +9213,14 @@ fixup(PyObject *self, /* If the maxchar increased so that the kind changed, not all characters are representable anymore and we need to fix the string again. This only happens in very few cases. */ - copy_characters(v, 0, self, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + self, 0, PyUnicode_GET_LENGTH(self)); maxchar_old = fixfct(v); assert(maxchar_old > 0 && maxchar_old <= maxchar_new); } else { - copy_characters(v, 0, u, 0, PyUnicode_GET_LENGTH(self)); + _PyUnicode_FastCopyCharacters(v, 0, + u, 0, PyUnicode_GET_LENGTH(self)); } Py_DECREF(u); assert(_PyUnicode_CheckConsistency(v, 1)); @@ -9603,7 +9602,7 @@ PyUnicode_Join(PyObject *separator, PyOb res_data += kind * seplen; } else { - copy_characters(res, res_offset, sep, 0, seplen); + _PyUnicode_FastCopyCharacters(res, res_offset, sep, 0, seplen); res_offset += seplen; } } @@ -9616,7 +9615,7 @@ PyUnicode_Join(PyObject *separator, PyOb res_data += kind * itemlen; } else { - copy_characters(res, res_offset, item, 0, itemlen); + _PyUnicode_FastCopyCharacters(res, res_offset, item, 0, itemlen); res_offset += itemlen; } } @@ -9734,7 +9733,7 @@ pad(PyObject *self, FILL(kind, data, fill, 0, left); if (right) FILL(kind, data, fill, left + _PyUnicode_LENGTH(self), right); - copy_characters(u, left, self, 0, _PyUnicode_LENGTH(self)); + _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self)); assert(_PyUnicode_CheckConsistency(u, 1)); return u; } @@ -10058,7 +10057,7 @@ replace(PyObject *self, PyObject *str1, u = PyUnicode_New(slen, maxchar); if (!u) goto error; - copy_characters(u, 0, self, 0, slen); + _PyUnicode_FastCopyCharacters(u, 0, self, 0, slen); rkind = PyUnicode_KIND(u); PyUnicode_WRITE(rkind, PyUnicode_DATA(u), pos, u2); @@ -10626,8 +10625,8 @@ PyUnicode_Concat(PyObject *left, PyObjec w = PyUnicode_New(new_len, maxchar); if (w == NULL) goto onError; - copy_characters(w, 0, u, 0, u_len); - copy_characters(w, u_len, v, 0, v_len); + _PyUnicode_FastCopyCharacters(w, 0, u, 0, u_len); + _PyUnicode_FastCopyCharacters(w, u_len, v, 0, v_len); Py_DECREF(u); Py_DECREF(v); assert(_PyUnicode_CheckConsistency(w, 1)); @@ -10702,7 +10701,7 @@ PyUnicode_Append(PyObject **p_left, PyOb goto error; } /* copy 'right' into the newly allocated area of 'left' */ - copy_characters(*p_left, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(*p_left, left_len, right, 0, right_len); } else { maxchar = PyUnicode_MAX_CHAR_VALUE(left); @@ -10713,8 +10712,8 @@ PyUnicode_Append(PyObject **p_left, PyOb res = PyUnicode_New(new_len, maxchar); if (res == NULL) goto error; - copy_characters(res, 0, left, 0, left_len); - copy_characters(res, left_len, right, 0, right_len); + _PyUnicode_FastCopyCharacters(res, 0, left, 0, left_len); + _PyUnicode_FastCopyCharacters(res, left_len, right, 0, right_len); Py_DECREF(left); *p_left = res; } @@ -12769,60 +12768,74 @@ unicode_endswith(PyObject *self, return PyBool_FromLong(result); } -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t pos; -} _PyUnicodeWriter ; - Py_LOCAL_INLINE(void) _PyUnicodeWriter_Update(_PyUnicodeWriter *writer) { + writer->size = PyUnicode_GET_LENGTH(writer->buffer); writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer); writer->data = PyUnicode_DATA(writer->buffer); writer->kind = PyUnicode_KIND(writer->buffer); } -Py_LOCAL(int) -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) -{ - writer->pos = 0; - writer->buffer = PyUnicode_New(length, maxchar); - if (writer->buffer == NULL) - return -1; - _PyUnicodeWriter_Update(writer); - return 0; -} - -Py_LOCAL_INLINE(int) -_PyUnicodeWriter_Prepare(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar) +void +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer, Py_ssize_t min_length) +{ + memset(writer, 0, sizeof(*writer)); +#ifdef Py_DEBUG + writer->kind = 5; /* invalid kind */ +#endif + writer->min_length = Py_MAX(min_length, 100); + writer->flags.overallocate = 1; +} + +int +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar) { Py_ssize_t newlen; PyObject *newbuffer; + assert(length > 0); + if (length > PY_SSIZE_T_MAX - writer->pos) { PyErr_NoMemory(); return -1; } newlen = writer->pos + length; - if (newlen > PyUnicode_GET_LENGTH(writer->buffer)) { - /* overallocate 25% to limit the number of resize */ - if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) - newlen += newlen / 4; - - if (maxchar > writer->maxchar) { + if (writer->buffer == NULL) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < writer->min_length) + newlen = writer->min_length; + } + writer->buffer = PyUnicode_New(newlen, maxchar); + if (writer->buffer == NULL) + return -1; + _PyUnicodeWriter_Update(writer); + return 0; + } + + if (newlen > writer->size) { + if (writer->flags.overallocate) { + /* overallocate 25% to limit the number of resize */ + if (newlen <= (PY_SSIZE_T_MAX - newlen / 4)) + newlen += newlen / 4; + if (newlen < 100) + newlen = 100; + } + + if (maxchar > writer->maxchar || writer->flags.readonly) { /* resize + widen */ newbuffer = PyUnicode_New(newlen, maxchar); if (newbuffer == NULL) return -1; - PyUnicode_CopyCharacters(newbuffer, 0, - writer->buffer, 0, writer->pos); + _PyUnicode_FastCopyCharacters(newbuffer, 0, + writer->buffer, 0, writer->pos); Py_DECREF(writer->buffer); + writer->flags.readonly = 0; } else { newbuffer = resize_compact(writer->buffer, newlen); @@ -12840,18 +12853,63 @@ _PyUnicodeWriter_Prepare(_PyUnicodeWrite return 0; } -Py_LOCAL(PyObject *) +int +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) +{ + Py_UCS4 maxchar; + Py_ssize_t len; + + if (PyUnicode_READY(str) == -1) + return -1; + len = PyUnicode_GET_LENGTH(str); + if (len == 0) + return 0; + maxchar = PyUnicode_MAX_CHAR_VALUE(str); + if (maxchar > writer->maxchar || len > writer->size - writer->pos) { + if (writer->buffer == NULL && !writer->flags.overallocate) { + Py_INCREF(str); + writer->buffer = str; + _PyUnicodeWriter_Update(writer); + writer->flags.readonly = 1; + writer->size = 0; + writer->pos += len; + return 0; + } + if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1) + return -1; + } + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + str, 0, len); + writer->pos += len; + return 0; +} + +PyObject * _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer) { - if (PyUnicode_Resize(&writer->buffer, writer->pos) < 0) { - Py_DECREF(writer->buffer); - return NULL; + if (writer->pos == 0) { + Py_XDECREF(writer->buffer); + Py_INCREF(unicode_empty); + return unicode_empty; + } + if (writer->flags.readonly) { + assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos); + return writer->buffer; + } + if (PyUnicode_GET_LENGTH(writer->buffer) != writer->pos) { + PyObject *newbuffer; + newbuffer = resize_compact(writer->buffer, writer->pos); + if (newbuffer == NULL) { + Py_DECREF(writer->buffer); + return NULL; + } + writer->buffer = newbuffer; } assert(_PyUnicode_CheckConsistency(writer->buffer, 1)); return writer->buffer; } -Py_LOCAL(void) +void _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer) { Py_CLEAR(writer->buffer); @@ -12874,14 +12932,24 @@ The substitutions are identified by brac static PyObject * unicode__format__(PyObject* self, PyObject* args) { - PyObject *format_spec, *out; + PyObject *format_spec; + _PyUnicodeWriter writer; + int ret; if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) return NULL; - out = _PyUnicode_FormatAdvanced(self, format_spec, 0, - PyUnicode_GET_LENGTH(format_spec)); - return out; + if (PyUnicode_READY(self) == -1) + return NULL; + _PyUnicodeWriter_Init(&writer, 0); + ret = _PyUnicode_FormatAdvancedWriter(self, format_spec, 0, + PyUnicode_GET_LENGTH(format_spec), + &writer); + if (ret == -1) { + _PyUnicodeWriter_Dealloc(&writer); + return NULL; + } + return _PyUnicodeWriter_Finish(&writer); } PyDoc_STRVAR(p_format__doc__, @@ -13111,16 +13179,17 @@ getnextarg(PyObject *args, Py_ssize_t ar /* Returns a new reference to a PyUnicode object, or NULL on failure. */ -static PyObject * -formatfloat(PyObject *v, int flags, int prec, int type) +static int +formatfloat(PyObject *v, int flags, int prec, int type, + PyObject **p_output, _PyUnicodeWriter *writer) { char *p; - PyObject *result; double x; + Py_ssize_t len; x = PyFloat_AsDouble(v); if (x == -1.0 && PyErr_Occurred()) - return NULL; + return -1; if (prec < 0) prec = 6; @@ -13128,10 +13197,20 @@ formatfloat(PyObject *v, int flags, int p = PyOS_double_to_string(x, type, prec, (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); if (p == NULL) - return NULL; - result = unicode_fromascii((unsigned char*)p, strlen(p)); + return -1; + len = strlen(p); + if (writer) { + if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1) + return -1; + memcpy(writer->data + writer->pos * writer->kind, + p, + len); + writer->pos += len; + } + else + *p_output = unicode_fromascii((unsigned char*)p, strlen(p)); PyMem_Free(p); - return result; + return 0; } /* formatlong() emulates the format codes d, u, o, x and X, and @@ -13336,8 +13415,7 @@ PyUnicode_Format(PyObject *format, PyObj fmtcnt = PyUnicode_GET_LENGTH(uformat); fmtpos = 0; - if (_PyUnicodeWriter_Init(&writer, fmtcnt + 100, 127) < 0) - goto onError; + _PyUnicodeWriter_Init(&writer, fmtcnt + 100); if (PyTuple_Check(args)) { arglen = PyTuple_Size(args); @@ -13368,8 +13446,8 @@ PyUnicode_Format(PyObject *format, PyObj if (_PyUnicodeWriter_Prepare(&writer, sublen, maxchar) == -1) goto onError; - copy_characters(writer.buffer, writer.pos, - uformat, nonfmtpos, sublen); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + uformat, nonfmtpos, sublen); writer.pos += sublen; } else { @@ -13530,6 +13608,8 @@ PyUnicode_Format(PyObject *format, PyObj "incomplete format"); goto onError; } + if (fmtcnt == 0) + writer.flags.overallocate = 0; if (c == '%') { if (_PyUnicodeWriter_Prepare(&writer, 1, '%') == -1) @@ -13539,7 +13619,6 @@ PyUnicode_Format(PyObject *format, PyObj continue; } - v = getnextarg(args, arglen, &argidx); if (v == NULL) goto onError; @@ -13552,6 +13631,13 @@ PyUnicode_Format(PyObject *format, PyObj case 's': case 'r': case 'a': + if (PyLong_CheckExact(v) && width == -1 && prec == -1) { + /* Fast path */ + if (_PyLong_FormatWriter(v, 10, flags & F_ALT, &writer) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_CheckExact(v) && c == 's') { temp = v; Py_INCREF(temp); @@ -13572,6 +13658,32 @@ PyUnicode_Format(PyObject *format, PyObj case 'o': case 'x': case 'X': + if (PyLong_CheckExact(v) + && width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + switch(c) + { + case 'd': + case 'i': + case 'u': + if (_PyLong_FormatWriter(v, 10, flags & F_ALT, &writer) == -1) + goto onError; + goto nextarg; + case 'x': + if (_PyLong_FormatWriter(v, 16, flags & F_ALT, &writer) == -1) + goto onError; + goto nextarg; + case 'o': + if (_PyLong_FormatWriter(v, 8, flags & F_ALT, &writer) == -1) + goto onError; + goto nextarg; + default: + break; + } + } + isnumok = 0; if (PyNumber_Check(v)) { PyObject *iobj=NULL; @@ -13611,10 +13723,20 @@ PyUnicode_Format(PyObject *format, PyObj case 'F': case 'g': case 'G': + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (formatfloat(v, flags, prec, c, NULL, &writer) == -1) + goto onError; + goto nextarg; + } + sign = 1; if (flags & F_ZERO) fill = '0'; - temp = formatfloat(v, flags, prec, c); + if (formatfloat(v, flags, prec, c, &temp, NULL) == -1) + temp = NULL; break; case 'c': @@ -13622,6 +13744,14 @@ PyUnicode_Format(PyObject *format, PyObj Py_UCS4 ch = formatchar(v); if (ch == (Py_UCS4) -1) goto onError; + if (width == -1 && prec == -1) { + /* Fast path */ + if (_PyUnicodeWriter_Prepare(&writer, 1, ch) == -1) + goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, ch); + writer.pos += 1; + goto nextarg; + } temp = PyUnicode_FromOrdinal(ch); break; } @@ -13638,6 +13768,16 @@ PyUnicode_Format(PyObject *format, PyObj if (temp == NULL) goto onError; assert (PyUnicode_Check(temp)); + + if (width == -1 && prec == -1 + && !(flags & (F_SIGN | F_BLANK))) + { + /* Fast path */ + if (_PyUnicodeWriter_WriteStr(&writer, temp) == -1) + goto onError; + goto nextarg; + } + if (PyUnicode_READY(temp) == -1) { Py_CLEAR(temp); goto onError; @@ -13676,15 +13816,15 @@ PyUnicode_Format(PyObject *format, PyObj if (!(flags & F_LJUST)) { if (sign) { if ((width-1) > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } else { if (width > len) - bufmaxchar = Py_MAX(bufmaxchar, fill); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, fill); } } maxchar = _PyUnicode_FindMaxChar(temp, 0, pindex+len); - bufmaxchar = Py_MAX(bufmaxchar, maxchar); + bufmaxchar = MAX_MAXCHAR(bufmaxchar, maxchar); buflen = width; if (sign && len == width) @@ -13737,8 +13877,8 @@ PyUnicode_Format(PyObject *format, PyObj } } - copy_characters(writer.buffer, writer.pos, - temp, pindex, len); + _PyUnicode_FastCopyCharacters(writer.buffer, writer.pos, + temp, pindex, len); writer.pos += len; if (width > len) { sublen = width - len; @@ -13746,6 +13886,7 @@ PyUnicode_Format(PyObject *format, PyObj writer.pos += sublen; } +nextarg: if (dict && (argidx < arglen) && c != '%') { PyErr_SetString(PyExc_TypeError, "not all arguments converted during string formatting"); diff -r f4f2139202c5 Python/formatter_unicode.c --- a/Python/formatter_unicode.c Wed May 23 23:17:22 2012 +0200 +++ b/Python/formatter_unicode.c Wed May 23 23:45:27 2012 +0200 @@ -316,21 +316,32 @@ calc_padding(Py_ssize_t nchars, Py_ssize /* Do the padding, and return a pointer to where the caller-supplied content goes. */ static Py_ssize_t -fill_padding(PyObject *s, Py_ssize_t start, Py_ssize_t nchars, +fill_padding(_PyUnicodeWriter *writer, + Py_ssize_t nchars, Py_UCS4 fill_char, Py_ssize_t n_lpadding, Py_ssize_t n_rpadding) { + Py_ssize_t pos, r; + /* Pad on left. */ - if (n_lpadding) - PyUnicode_Fill(s, start, start + n_lpadding, fill_char); + if (n_lpadding) { + pos = writer->pos; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_lpadding, fill_char); + if (r == -1) + return -1; + } /* Pad on right. */ - if (n_rpadding) - PyUnicode_Fill(s, start + nchars + n_lpadding, - start + nchars + n_lpadding + n_rpadding, fill_char); + if (n_rpadding) { + pos = writer->pos + nchars + n_lpadding; + r = PyUnicode_Fill(writer->buffer, pos, pos + n_rpadding, fill_char); + if (r == -1) + return -1; + } /* Pointer to the user content. */ - return start + n_lpadding; + writer->pos += n_lpadding; + return 0; } /************************************************************************/ @@ -541,7 +552,7 @@ calc_number_widths(NumberFieldWidths *sp as determined in calc_number_widths(). Return -1 on error, or 0 on success. */ static int -fill_number(PyObject *out, Py_ssize_t pos, const NumberFieldWidths *spec, +fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, PyObject *digits, Py_ssize_t d_start, Py_ssize_t d_end, PyObject *prefix, Py_ssize_t p_start, Py_UCS4 fill_char, @@ -549,36 +560,36 @@ fill_number(PyObject *out, Py_ssize_t po { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; - unsigned int kind = PyUnicode_KIND(out); - void *data = PyUnicode_DATA(out); + const enum PyUnicode_Kind kind = writer->kind; + const void *data = writer->data; Py_ssize_t r; if (spec->n_lpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_lpadding, fill_char); - pos += spec->n_lpadding; + PyUnicode_Fill(writer->buffer, writer->pos, spec->n_lpadding, fill_char); + writer->pos += spec->n_lpadding; } if (spec->n_sign == 1) { - PyUnicode_WRITE(kind, data, pos++, spec->sign); + PyUnicode_WRITE(kind, data, writer->pos, spec->sign); + writer->pos++; } if (spec->n_prefix) { - if (PyUnicode_CopyCharacters(out, pos, - prefix, p_start, - spec->n_prefix) < 0) - return -1; + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + prefix, p_start, + spec->n_prefix); if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_prefix; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); assert (c <= 127); - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_prefix; + writer->pos += spec->n_prefix; } if (spec->n_spadding) { - PyUnicode_Fill(out, pos, pos + spec->n_spadding, fill_char); - pos += spec->n_spadding; + PyUnicode_Fill(writer->buffer, writer->pos, spec->n_spadding, fill_char); + writer->pos += spec->n_spadding; } /* Only for type 'c' special case, it has no digits. */ @@ -594,7 +605,7 @@ fill_number(PyObject *out, Py_ssize_t po return -1; } r = _PyUnicode_InsertThousandsGrouping( - out, pos, + writer->buffer, writer->pos, spec->n_grouped_digits, pdigits + kind * d_pos, spec->n_digits, spec->n_min_width, @@ -609,34 +620,32 @@ fill_number(PyObject *out, Py_ssize_t po if (toupper) { Py_ssize_t t; for (t = 0; t < spec->n_grouped_digits; t++) { - Py_UCS4 c = PyUnicode_READ(kind, data, pos + t); + Py_UCS4 c = PyUnicode_READ(kind, data, writer->pos + t); c = Py_TOUPPER(c); if (c > 127) { PyErr_SetString(PyExc_SystemError, "non-ascii grouped digit"); return -1; } - PyUnicode_WRITE(kind, data, pos + t, c); + PyUnicode_WRITE(kind, data, writer->pos + t, c); } } - pos += spec->n_grouped_digits; + writer->pos += spec->n_grouped_digits; if (spec->n_decimal) { - if (PyUnicode_CopyCharacters(out, pos, locale->decimal_point, 0, spec->n_decimal) < 0) - return -1; - pos += spec->n_decimal; + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, locale->decimal_point, 0, spec->n_decimal); + writer->pos += spec->n_decimal; d_pos += 1; } if (spec->n_remainder) { - if (PyUnicode_CopyCharacters(out, pos, digits, d_pos, spec->n_remainder) < 0) - return -1; - pos += spec->n_remainder; + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, digits, d_pos, spec->n_remainder); + writer->pos += spec->n_remainder; d_pos += spec->n_remainder; } if (spec->n_rpadding) { - PyUnicode_Fill(out, pos, pos + spec->n_rpadding, fill_char); - pos += spec->n_rpadding; + PyUnicode_Fill(writer->buffer, writer->pos, writer->pos + spec->n_rpadding, fill_char); + writer->pos += spec->n_rpadding; } return 0; } @@ -707,17 +716,20 @@ free_locale_info(LocaleInfo *locale_info /*********** string formatting ******************************************/ /************************************************************************/ -static PyObject * -format_string_internal(PyObject *value, const InternalFormatSpec *format) +static int +format_string_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { Py_ssize_t lpad; Py_ssize_t rpad; Py_ssize_t total; - Py_ssize_t pos; - Py_ssize_t len = PyUnicode_GET_LENGTH(value); - PyObject *result = NULL; + Py_ssize_t len; + int result = -1; Py_UCS4 maxchar; + assert(PyUnicode_IS_READY(value)); + len = PyUnicode_GET_LENGTH(value); + /* sign is not allowed on strings */ if (format->sign != '\0') { PyErr_SetString(PyExc_ValueError, @@ -741,6 +753,11 @@ format_string_internal(PyObject *value, goto done; } + if (format->width == -1 && format->precision == -1) { + /* Fast path */ + return _PyUnicodeWriter_WriteStr(writer, value); + } + /* if precision is specified, output no more that format.precision characters */ if (format->precision >= 0 && len >= format->precision) { @@ -754,21 +771,23 @@ format_string_internal(PyObject *value, maxchar = Py_MAX(maxchar, format->fill_char); /* allocate the resulting string */ - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; /* Write into that space. First the padding. */ - pos = fill_padding(result, 0, len, - format->fill_char=='\0'?' ':format->fill_char, - lpad, rpad); + result = fill_padding(writer, len, + format->fill_char=='\0'?' ':format->fill_char, + lpad, rpad); + if (result == -1) + goto done; /* Then the source string. */ - if (PyUnicode_CopyCharacters(result, pos, value, 0, len) < 0) - Py_CLEAR(result); + _PyUnicode_FastCopyCharacters(writer->buffer, writer->pos, + value, 0, len); + writer->pos += (len + rpad); + result = 0; done: - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -780,11 +799,11 @@ done: typedef PyObject* (*IntOrLongToString)(PyObject *value, int base); -static PyObject * -format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format, - IntOrLongToString tostring) +static int +format_long_internal(PyObject *value, const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; PyObject *tmp = NULL; Py_ssize_t inumeric_chars; @@ -798,7 +817,6 @@ format_int_or_long_internal(PyObject *va Py_ssize_t prefix = 0; NumberFieldWidths spec; long x; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -872,13 +890,23 @@ format_int_or_long_internal(PyObject *va break; } + if (format->sign != '+' && format->sign != ' ' + && format->width == -1 + && format->type != 'X' && format->type != 'n' + && !format->thousands_separators + && PyLong_CheckExact(value)) + { + /* Fast path */ + return _PyLong_FormatWriter(value, base, format->alternate, writer); + } + /* The number of prefix chars is the same as the leading chars to skip */ if (format->alternate) n_prefix = leading_chars_to_skip; /* Do the hard part, converting to a string in a given base */ - tmp = tostring(value, base); + tmp = _PyLong_Format(value, base); if (tmp == NULL || PyUnicode_READY(tmp) == -1) goto done; @@ -914,23 +942,19 @@ format_int_or_long_internal(PyObject *va &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (!result) + if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - tmp, inumeric_chars, inumeric_chars + n_digits, - tmp, prefix, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, format->type == 'X'); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + tmp, inumeric_chars, inumeric_chars + n_digits, + tmp, prefix, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, format->type == 'X'); done: Py_XDECREF(tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -945,9 +969,10 @@ strtounicode(char *charbuffer, Py_ssize_ } /* much of this is taken from unicodeobject.c */ -static PyObject * +static int format_float_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { char *buf = NULL; /* buffer returned from PyOS_double_to_string */ Py_ssize_t n_digits; @@ -962,12 +987,11 @@ format_float_internal(PyObject *value, Py_ssize_t index; NumberFieldWidths spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; Py_UCS4 sign_char = '\0'; int float_type; /* Used to see if we have a nan, inf, or regular float. */ PyObject *unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1025,12 +1049,24 @@ format_float_internal(PyObject *value, /* Since there is no unicode version of PyOS_double_to_string, just use the 8 bit version and then convert to unicode. */ unicode_tmp = strtounicode(buf, n_digits); + PyMem_Free(buf); if (unicode_tmp == NULL) goto done; - index = 0; + + if (format->sign != '+' && format->sign != ' ' + && format->width == -1 + && format->type != 'n' + && !format->thousands_separators) + { + /* Fast path */ + result = _PyUnicodeWriter_WriteStr(writer, unicode_tmp); + Py_DECREF(unicode_tmp); + return result; + } /* Is a sign character present in the output? If so, remember it and skip it */ + index = 0; if (PyUnicode_READ_CHAR(unicode_tmp, index) == '-') { sign_char = '-'; ++index; @@ -1055,24 +1091,19 @@ format_float_internal(PyObject *value, &locale, format, &maxchar); /* Allocate the memory. */ - result = PyUnicode_New(n_total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) goto done; /* Populate the memory. */ - err = fill_number(result, 0, &spec, - unicode_tmp, index, index + n_digits, - NULL, 0, - format->fill_char == '\0' ? ' ' : format->fill_char, - &locale, 0); - if (err) - Py_CLEAR(result); + result = fill_number(writer, &spec, + unicode_tmp, index, index + n_digits, + NULL, 0, + format->fill_char == '\0' ? ' ' : format->fill_char, + &locale, 0); done: - PyMem_Free(buf); Py_DECREF(unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } @@ -1080,9 +1111,10 @@ done: /*********** complex formatting *****************************************/ /************************************************************************/ -static PyObject * +static int format_complex_internal(PyObject *value, - const InternalFormatSpec *format) + const InternalFormatSpec *format, + _PyUnicodeWriter *writer) { double re; double im; @@ -1106,11 +1138,10 @@ format_complex_internal(PyObject *value, NumberFieldWidths re_spec; NumberFieldWidths im_spec; int flags = 0; - PyObject *result = NULL; + int result = -1; Py_UCS4 maxchar = 127; - int rkind; + enum PyUnicode_Kind rkind; void *rdata; - Py_ssize_t index; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0'; int re_float_type; /* Used to see if we have a nan, inf, or regular float. */ @@ -1122,7 +1153,6 @@ format_complex_internal(PyObject *value, Py_ssize_t total; PyObject *re_unicode_tmp = NULL; PyObject *im_unicode_tmp = NULL; - int err; /* Locale settings, either from the actual locale or from a hard-code pseudo-locale */ @@ -1261,47 +1291,49 @@ format_complex_internal(PyObject *value, if (lpad || rpad) maxchar = Py_MAX(maxchar, format->fill_char); - result = PyUnicode_New(total, maxchar); - if (result == NULL) + if (_PyUnicodeWriter_Prepare(writer, total, maxchar) == -1) goto done; - rkind = PyUnicode_KIND(result); - rdata = PyUnicode_DATA(result); + rkind = writer->kind; + rdata = writer->data; /* Populate the memory. First, the padding. */ - index = fill_padding(result, 0, - n_re_total + n_im_total + 1 + add_parens * 2, - format->fill_char=='\0' ? ' ' : format->fill_char, - lpad, rpad); + result = fill_padding(writer, + n_re_total + n_im_total + 1 + add_parens * 2, + format->fill_char=='\0' ? ' ' : format->fill_char, + lpad, rpad); + if (result == -1) + goto done; - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, '('); + if (add_parens) { + PyUnicode_WRITE(rkind, rdata, writer->pos, '('); + writer->pos++; + } if (!skip_re) { - err = fill_number(result, index, &re_spec, - re_unicode_tmp, i_re, i_re + n_re_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &re_spec, + re_unicode_tmp, i_re, i_re + n_re_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; - } - index += n_re_total; } - err = fill_number(result, index, &im_spec, - im_unicode_tmp, i_im, i_im + n_im_digits, - NULL, 0, - 0, - &locale, 0); - if (err) { - Py_CLEAR(result); + result = fill_number(writer, &im_spec, + im_unicode_tmp, i_im, i_im + n_im_digits, + NULL, 0, + 0, + &locale, 0); + if (result == -1) goto done; + PyUnicode_WRITE(rkind, rdata, writer->pos, 'j'); + writer->pos++; + + if (add_parens) { + PyUnicode_WRITE(rkind, rdata, writer->pos, ')'); + writer->pos++; } - index += n_im_total; - PyUnicode_WRITE(rkind, rdata, index++, 'j'); - if (add_parens) - PyUnicode_WRITE(rkind, rdata, index++, ')'); + writer->pos += rpad; done: PyMem_Free(re_buf); @@ -1309,61 +1341,79 @@ done: Py_XDECREF(re_unicode_tmp); Py_XDECREF(im_unicode_tmp); free_locale_info(&locale); - assert(!result || _PyUnicode_CheckConsistency(result, 1)); return result; } /************************************************************************/ /*********** built in formatters ****************************************/ /************************************************************************/ -PyObject * -_PyUnicode_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +format_obj(PyObject *obj, _PyUnicodeWriter *writer) +{ + PyObject *str; + int err; + + str = PyObject_Str(obj); + if (str == NULL) + return -1; + err = _PyUnicodeWriter_WriteStr(writer, str); + Py_DECREF(str); + return err; +} + +int +_PyUnicode_FormatAdvancedWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { InternalFormatSpec format; - PyObject *result; + + assert(PyUnicode_Check(obj)); /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) - return PyObject_Str(obj); + if (start == end) { + if (PyUnicode_CheckExact(obj)) + return _PyUnicodeWriter_WriteStr(writer, obj); + else + return format_obj(obj, writer); + } /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, 's', '<')) - return NULL; + return -1; /* type conversion? */ switch (format.type) { case 's': /* no type conversion needed, already a string. do the formatting */ - result = format_string_internal(obj, &format); - if (result != NULL) - assert(_PyUnicode_CheckConsistency(result, 1)); - break; + return format_string_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - result = NULL; + return -1; } - return result; } -static PyObject* -format_int_or_long(PyObject* obj, PyObject* format_spec, - Py_ssize_t start, Py_ssize_t end, - IntOrLongToString tostring) +int +_PyLong_FormatAdvancedWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; - PyObject *tmp = NULL; + PyObject *tmp = NULL, *str = NULL; InternalFormatSpec format; + int result = -1; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ if (start == end) { - result = PyObject_Str(obj); - goto done; + if (PyLong_CheckExact(obj)) + return _PyLong_FormatWriter(obj, 10, 0, writer); + else + return format_obj(obj, writer); } /* parse the format_spec */ @@ -1382,7 +1432,7 @@ format_int_or_long(PyObject* obj, PyObje case 'n': /* no type conversion needed, already an int (or long). do the formatting */ - result = format_int_or_long_internal(obj, &format, tostring); + result = format_long_internal(obj, &format, writer); break; case 'e': @@ -1396,7 +1446,7 @@ format_int_or_long(PyObject* obj, PyObje tmp = PyNumber_Float(obj); if (tmp == NULL) goto done; - result = format_float_internal(tmp, &format); + result = format_float_internal(tmp, &format, writer); break; default: @@ -1407,41 +1457,27 @@ format_int_or_long(PyObject* obj, PyObje done: Py_XDECREF(tmp); + Py_XDECREF(str); return result; } -/* Need to define long_format as a function that will convert a long - to a string. In 3.0, _PyLong_Format has the correct signature. */ -#define long_format _PyLong_Format - -PyObject * -_PyLong_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyFloat_FormatAdvancedWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - return format_int_or_long(obj, format_spec, start, end, - long_format); -} - -PyObject * -_PyFloat_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) -{ - PyObject *result = NULL; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) { - result = PyObject_Str(obj); - goto done; - } + if (start == end) + return format_obj(obj, writer); /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1455,38 +1491,32 @@ _PyFloat_FormatAdvanced(PyObject *obj, case 'n': case '%': /* no conversion, already a float. do the formatting */ - result = format_float_internal(obj, &format); - break; + return format_float_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; } -PyObject * -_PyComplex_FormatAdvanced(PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, Py_ssize_t end) +int +_PyComplex_FormatAdvancedWriter(PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, Py_ssize_t end, + _PyUnicodeWriter *writer) { - PyObject *result = NULL; InternalFormatSpec format; /* check for the special case of zero length format spec, make it equivalent to str(obj) */ - if (start == end) { - result = PyObject_Str(obj); - goto done; - } + if (start == end) + return format_obj(obj, writer); /* parse the format_spec */ if (!parse_internal_render_format_spec(format_spec, start, end, &format, '\0', '>')) - goto done; + return -1; /* type conversion? */ switch (format.type) { @@ -1499,15 +1529,11 @@ _PyComplex_FormatAdvanced(PyObject *obj, case 'G': case 'n': /* no conversion, already a complex. do the formatting */ - result = format_complex_internal(obj, &format); - break; + return format_complex_internal(obj, &format, writer); default: /* unknown */ unknown_presentation_type(format.type, obj->ob_type->tp_name); - goto done; + return -1; } - -done: - return result; } diff -r f4f2139202c5 Python/getargs.c --- a/Python/getargs.c Wed May 23 23:17:22 2012 +0200 +++ b/Python/getargs.c Wed May 23 23:45:27 2012 +0200 @@ -1167,8 +1167,11 @@ convertsimple(PyObject *arg, const char case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); - if (PyUnicode_Check(arg)) + if (PyUnicode_Check(arg)) { + if (PyUnicode_READY(arg) == -1) + RETURN_ERR_OCCURRED; *p = arg; + } else return converterr("str", arg, msgbuf, bufsize); break;