diff -r 1c6c2ec8916a Objects/unicodeobject.c --- a/Objects/unicodeobject.c Tue Apr 01 02:08:41 2014 +0300 +++ b/Objects/unicodeobject.c Tue Apr 01 10:14:35 2014 +0200 @@ -8495,27 +8495,7 @@ charmaptranslate_lookup(Py_UCS4 c, PyObj return -1; } } -/* ensure that *outobj is at least requiredsize characters long, - if not reallocate and adjust various state variables. - Return 0 on success, -1 on error */ -static int -charmaptranslate_makespace(Py_UCS4 **outobj, Py_ssize_t *psize, - Py_ssize_t requiredsize) -{ - Py_ssize_t oldsize = *psize; - Py_UCS4 *new_outobj; - if (requiredsize > oldsize) { - /* exponentially overallocate to minimize reallocations */ - if (requiredsize < 2 * oldsize) - requiredsize = 2 * oldsize; - new_outobj = PyMem_Realloc(*outobj, requiredsize * sizeof(Py_UCS4)); - if (new_outobj == 0) - return -1; - *outobj = new_outobj; - *psize = requiredsize; - } - return 0; -} + /* lookup the character, put the result in the output string and adjust various state variables. Return a new reference to the object that was put in the output buffer in *result, or Py_None, if the mapping was @@ -8524,47 +8504,49 @@ charmaptranslate_makespace(Py_UCS4 **out Return 0 on success, -1 on error. */ static int charmaptranslate_output(PyObject *input, Py_ssize_t ipos, - PyObject *mapping, Py_UCS4 **output, - Py_ssize_t *osize, Py_ssize_t *opos, - PyObject **res) -{ + PyObject *mapping, _PyUnicodeWriter *writer) +{ + PyObject *item; Py_UCS4 curinp = PyUnicode_READ_CHAR(input, ipos); - if (charmaptranslate_lookup(curinp, mapping, res)) - return -1; - if (*res==NULL) { + + if (charmaptranslate_lookup(curinp, mapping, &item)) + return -1; + + if (item == NULL) { /* not found => default to 1:1 mapping */ - (*output)[(*opos)++] = curinp; - } - else if (*res==Py_None) - ; - else if (PyLong_Check(*res)) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = (Py_UCS4)PyLong_AS_LONG(*res); - } - else if (PyUnicode_Check(*res)) { - Py_ssize_t repsize; - if (PyUnicode_READY(*res) == -1) + if (_PyUnicodeWriter_WriteCharInline(writer, curinp) < 0) { return -1; - repsize = PyUnicode_GET_LENGTH(*res); - if (repsize==1) { - /* no overflow check, because we know that the space is enough */ - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, 0); - } - else if (repsize!=0) { - /* more than one character */ - Py_ssize_t requiredsize = *opos + - (PyUnicode_GET_LENGTH(input) - ipos) + - repsize - 1; - Py_ssize_t i; - if (charmaptranslate_makespace(output, osize, requiredsize)) - return -1; - for(i = 0; i < repsize; i++) - (*output)[(*opos)++] = PyUnicode_READ_CHAR(*res, i); - } - } - else - return -1; - return 0; + } + return 1; + } + + if (item == Py_None) { + Py_DECREF(item); + return 0; + } + + if (PyLong_Check(item)) { + Py_UCS4 ch = (Py_UCS4)PyLong_AS_LONG(item); + if (_PyUnicodeWriter_WriteCharInline(writer, ch) < 0) { + Py_DECREF(item); + return -1; + } + Py_DECREF(item); + return 1; + } + + if (!PyUnicode_Check(item)) { + Py_DECREF(item); + return -1; + } + + if (_PyUnicodeWriter_WriteStr(writer, item) < 0) { + Py_DECREF(item); + return -1; + } + + Py_DECREF(item); + return 1; } PyObject * @@ -8577,18 +8559,15 @@ PyObject * Py_ssize_t size, i; int kind; /* output buffer */ - Py_UCS4 *output = NULL; - Py_ssize_t osize; - PyObject *res; - /* current output position */ - Py_ssize_t opos; + _PyUnicodeWriter writer; + /* error handler */ char *reason = "character maps to "; PyObject *errorHandler = NULL; PyObject *exc = NULL; /* the following variable is used for caching string comparisons * -1=not initialized, 0=unknown, 1=strict, 2=replace, * 3=ignore, 4=xmlcharrefreplace */ - int known_errorHandler = -1; + int ignore = -1; if (mapping == NULL) { PyErr_BadArgument(); @@ -8609,121 +8588,75 @@ PyObject * /* allocate enough for a simple 1:1 translation without replacements, if we need more, we'll resize */ - osize = size; - output = PyMem_Malloc(osize * sizeof(Py_UCS4)); - opos = 0; - if (output == NULL) { - PyErr_NoMemory(); + _PyUnicodeWriter_Init(&writer); + if (_PyUnicodeWriter_Prepare(&writer, size, 127) == -1) goto onError; - } while (i adjust input pointer */ + ++i; + continue; + } + + /* untranslatable character */ + collstart = i; + collend = i+1; + + /* find all untranslatable characters */ + while (collend < size) { + PyObject *x; + Py_UCS4 ch = PyUnicode_READ(kind,idata, collend); + if (charmaptranslate_lookup(ch, mapping, &x)) + goto onError; Py_XDECREF(x); - goto onError; - } - Py_XDECREF(x); - if (x!=Py_None) /* it worked => adjust input pointer */ - ++i; - else { /* untranslatable character */ - PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ - Py_ssize_t repsize; - Py_ssize_t newpos; - Py_ssize_t uni2; - /* startpos for collecting untranslatable chars */ - Py_ssize_t collstart = i; - Py_ssize_t collend = i+1; - Py_ssize_t coll; - - /* find all untranslatable characters */ - while (collend < size) { - if (charmaptranslate_lookup(PyUnicode_READ(kind,idata, collend), mapping, &x)) - goto onError; - Py_XDECREF(x); - if (x!=Py_None) - break; - ++collend; - } - /* cache callback name lookup - * (if not done yet, i.e. it's the first error) */ - if (known_errorHandler==-1) { - if ((errors==NULL) || (!strcmp(errors, "strict"))) - known_errorHandler = 1; - else if (!strcmp(errors, "replace")) - known_errorHandler = 2; - else if (!strcmp(errors, "ignore")) - known_errorHandler = 3; - else if (!strcmp(errors, "xmlcharrefreplace")) - known_errorHandler = 4; - else - known_errorHandler = 0; - } - switch (known_errorHandler) { - case 1: /* strict */ - make_translate_exception(&exc, - input, collstart, collend, reason); - if (exc != NULL) - PyCodec_StrictErrors(exc); + if (x!=Py_None) + break; + ++collend; + } + + /* cache callback name lookup + * (if not done yet, i.e. it's the first error) */ + if (ignore == -1) { + ignore = (errors != NULL && strcmp(errors, "ignore") == 0); + } + if (ignore) { + /* ignore */ + i = collend; + } + else { + repunicode = unicode_translate_call_errorhandler(errors, &errorHandler, + reason, input, &exc, + collstart, collend, &newpos); + if (repunicode == NULL) goto onError; - case 2: /* replace */ - /* No need to check for space, this is a 1:1 replacement */ - for (coll = collstart; coll0; ++uni2) - output[opos++] = PyUnicode_READ_CHAR(repunicode, uni2); - i = newpos; + if (_PyUnicodeWriter_WriteStr(&writer, repunicode) < 0) { Py_DECREF(repunicode); - } - } - } - res = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, opos); - if (!res) - goto onError; - PyMem_Free(output); + goto onError; + } + Py_DECREF(repunicode); + } + } Py_XDECREF(exc); Py_XDECREF(errorHandler); - return res; + return _PyUnicodeWriter_Finish(&writer); onError: - PyMem_Free(output); + _PyUnicodeWriter_Dealloc(&writer); Py_XDECREF(exc); Py_XDECREF(errorHandler); return NULL;