Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86843) +++ Include/unicodeobject.h (working copy) @@ -1173,6 +1173,13 @@ const char *errors /* error handling */ ); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeDecimalUTF8( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ); + /* --- File system encoding ---------------------------------------------- */ /* ParseTuple converter: encode str objects to bytes using Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86843) +++ Objects/unicodeobject.c (working copy) @@ -6207,6 +6207,49 @@ return NULL; } +PyObject * +_PyUnicode_EncodeDecimalUTF8(Py_UNICODE *s, + Py_ssize_t length, + const char *errors) +{ + PyObject *result; + char *wp; /* write pointer into result */ + const char *rp; /* read pointer into result */ + const Py_UNICODE *end = s + length; + /* Strip whitespace */ + while (s < end) { + if (Py_UNICODE_ISSPACE(*s)) + s++; + else if (Py_UNICODE_ISSPACE(end[-1])) + end--; + else + break; + } + /* Convert to UTF-8 */ + result = PyUnicode_EncodeUTF8(s, end - s, errors); + if (result == NULL) + return result; + /* Iterate over code points and bytes */ + for (rp = wp = PyBytes_AS_STRING(result); s < end; s++) { + int cl = utf8_code_length[(unsigned char)*rp]; + Py_UNICODE ch = *s; + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) { + *wp++ = '0' + decimal; + if (cl > 1) + Py_SIZE(result) -= cl - 1; + } + else { + if (wp != rp) + memcpy(wp, rp, cl); + wp += cl; + } + rp += cl; + } + if (wp != rp) + *wp = '\0'; + return result; +} /* --- Decimal Encoder ---------------------------------------------------- */ int PyUnicode_EncodeDecimal(Py_UNICODE *s, Index: Objects/floatobject.c =================================================================== --- Objects/floatobject.c (revision 86843) +++ Objects/floatobject.c (working copy) @@ -175,52 +175,52 @@ { const char *s, *last, *end; double x; - char buffer[256]; /* for errors */ - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; PyObject *result = NULL; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1); + s_buffer = _PyUnicode_EncodeDecimalUTF8(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v), + NULL); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) - goto error; - s = s_buffer; - len = strlen(s); + return NULL; + s = PyBytes_AS_STRING(s_buffer); + last = s + PyBytes_GET_SIZE(s_buffer); } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, "float() argument must be a string or a number"); return NULL; } - last = s + len; - - while (Py_ISSPACE(*s)) - s++; + else { + last = s + len; + /* strip space */ + while (last - s > 0) { + if (Py_ISSPACE(*s)) + s++; + else if (Py_ISSPACE(last[- 1])) + last--; + else + break; + } + } /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ - x = PyOS_string_to_double(s, (char **)&end, NULL); - if (x == -1.0 && PyErr_Occurred()) - goto error; - while (Py_ISSPACE(*end)) - end++; - if (end == last) - result = PyFloat_FromDouble(x); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); + x = PyOS_string_to_double(s, &end, NULL); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); result = NULL; } - - error: - if (s_buffer) - PyMem_FREE(s_buffer); + else if (x == -1.0 && PyErr_Occurred()) + result = NULL; + else + result = PyFloat_FromDouble(x); + + Py_XDECREF(s_buffer); return result; }