Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86843) +++ Include/unicodeobject.h (working copy) @@ -1173,6 +1173,17 @@ const char *errors /* error handling */ ); +/* Strip leading and trailing space and convert code points that have decimal + digit property to the corresponding ASCII digit code point. + + Returns a new Unicode string on success, NULL on failure. +*/ + +PyAPI_FUNC(PyObject*) _PyUnicode_NormalizeDecimal( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ); + /* --- File system encoding ---------------------------------------------- */ /* ParseTuple converter: encode str objects to bytes using Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86843) +++ Objects/unicodeobject.c (working copy) @@ -6207,6 +6207,40 @@ return NULL; } +PyObject * +_PyUnicode_NormalizeDecimal(Py_UNICODE *s, + Py_ssize_t length) +{ + PyObject *result; + Py_UNICODE *p; /* write pointer into result */ + const Py_UNICODE *end = s + length; + Py_ssize_t i; + /* Strip whitespace */ + while (s < end) { + if (Py_UNICODE_ISSPACE(*s)) + s++; + else if (Py_UNICODE_ISSPACE(end[-1])) + end--; + else + break; + } + length = end - s; + /* Copy to a new string */ + result = PyUnicode_FromUnicode(s, length); + if (result == NULL) + return result; + p = PyUnicode_AS_UNICODE(result); + /* Iterate over code points */ + for (i = 0; i < length; i++) { + Py_UNICODE ch = p[i]; + if (!Py_ISDIGIT(ch)) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + p[i] = '0' + decimal; + } + } + return result; +} /* --- Decimal Encoder ---------------------------------------------------- */ int PyUnicode_EncodeDecimal(Py_UNICODE *s, Index: Objects/floatobject.c =================================================================== --- Objects/floatobject.c (revision 86843) +++ Objects/floatobject.c (working copy) @@ -175,52 +175,53 @@ { const char *s, *last, *end; double x; - char buffer[256]; /* for errors */ - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; PyObject *result = NULL; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1); + s_buffer = _PyUnicode_NormalizeDecimal(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) - goto error; - s = s_buffer; - len = strlen(s); + return NULL; + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) + return NULL; + last = s + len; } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, "float() argument must be a string or a number"); return NULL; } - last = s + len; - - while (Py_ISSPACE(*s)) - s++; + else { + last = s + len; + /* strip space */ + while (last - s > 0) { + if (Py_ISSPACE(*s)) + s++; + else if (Py_ISSPACE(last[- 1])) + last--; + else + break; + } + } /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ x = PyOS_string_to_double(s, (char **)&end, NULL); - if (x == -1.0 && PyErr_Occurred()) - goto error; - while (Py_ISSPACE(*end)) - end++; - if (end == last) - result = PyFloat_FromDouble(x); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); result = NULL; } - - error: - if (s_buffer) - PyMem_FREE(s_buffer); + else if (x == -1.0 && PyErr_Occurred()) + result = NULL; + else + result = PyFloat_FromDouble(x); + + Py_XDECREF(s_buffer); return result; }