Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86944) +++ Include/unicodeobject.h (working copy) @@ -1173,6 +1173,18 @@ const char *errors /* error handling */ ); +/* Transforms code points that have decimal digit property to the + corresponding ASCII digit code points and transforms Unicode space to + '\N{SPACE}' (ASCII space). + + Returns a new Unicode string on success, NULL on failure. +*/ + +PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalToASCII( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ); + /* --- File system encoding ---------------------------------------------- */ /* ParseTuple converter: encode str objects to bytes using Index: Objects/complexobject.c =================================================================== --- Objects/complexobject.c (revision 86944) +++ Objects/complexobject.c (working copy) @@ -766,20 +766,17 @@ char *end; double x=0.0, y=0.0, z; int got_bracket=0; - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1); + s_buffer = _PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) + return NULL; + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) goto error; - s = s_buffer; - len = strlen(s); } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, @@ -894,16 +891,14 @@ if (s-start != len) goto parse_error; - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); parse_error: PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return NULL; } Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86944) +++ Objects/unicodeobject.c (working copy) @@ -6206,6 +6206,31 @@ return NULL; } +PyObject * +_PyUnicode_TransformDecimalToASCII(Py_UNICODE *s, + Py_ssize_t length) +{ + PyObject *result; + Py_UNICODE *p; /* write pointer into result */ + Py_ssize_t i; + /* Copy to a new string */ + result = PyUnicode_FromUnicode(s, length); + if (result == NULL) + return result; + p = PyUnicode_AS_UNICODE(result); + /* Iterate over code points */ + for (i = 0; i < length; i++) { + Py_UNICODE ch =s[i]; + if (ch != ' ' && Py_UNICODE_ISSPACE(ch)) + p[i] = ' '; + else if (!Py_ISDIGIT(ch)) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + p[i] = '0' + decimal; + } + } + return result; +} /* --- Decimal Encoder ---------------------------------------------------- */ int PyUnicode_EncodeDecimal(Py_UNICODE *s, @@ -8985,6 +9010,13 @@ } #endif +static PyObject * +unicode__decascii(PyObject *self) +{ + return _PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(self), + PyUnicode_GET_SIZE(self)); +} + PyDoc_STRVAR(startswith__doc__, "S.startswith(prefix[, start[, end]]) -> bool\n\ \n\ @@ -9124,7 +9156,6 @@ return Py_BuildValue("(u#)", v->str, v->length); } - static PyMethodDef unicode_methods[] = { /* Order is according to common usage: often used methods should @@ -9186,9 +9217,10 @@ #endif #if 0 - /* This one is just used for debugging the implementation. */ + /* These methods are just used for debugging the implementation. */ {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS}, #endif + {"_decascii", (PyCFunction) unicode__decascii, METH_NOARGS}, {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, {NULL, NULL} Index: Objects/floatobject.c =================================================================== --- Objects/floatobject.c (revision 86944) +++ Objects/floatobject.c (working copy) @@ -174,22 +174,21 @@ { const char *s, *last, *end; double x; - char buffer[256]; /* for errors */ - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; PyObject *result = NULL; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1); + s_buffer = _PyUnicode_TransformDecimalToASCII(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) - goto error; - s = s_buffer; - len = strlen(s); + return NULL; + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) { + Py_DECREF(s_buffer); + return NULL; + } + last = s + len; } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, @@ -197,29 +196,27 @@ return NULL; } last = s + len; - - while (Py_ISSPACE(*s)) + /* strip space */ + while (s < last && Py_ISSPACE(*s)) s++; + while (s < last - 1 && Py_ISSPACE(last[-1])) + last--; /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ x = PyOS_string_to_double(s, (char **)&end, NULL); - if (x == -1.0 && PyErr_Occurred()) - goto error; - while (Py_ISSPACE(*end)) - end++; - if (end == last) - result = PyFloat_FromDouble(x); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); result = NULL; } + else if (x == -1.0 && PyErr_Occurred()) + result = NULL; + else + result = PyFloat_FromDouble(x); - error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return result; } Index: Lib/test/test_float.py =================================================================== --- Lib/test/test_float.py (revision 86944) +++ Lib/test/test_float.py (working copy) @@ -51,6 +51,17 @@ float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_error_message(self): + testlist = ('\xbd', '123\xbd', ' 123 456 ') + for s in testlist: + try: + float(s) + except ValueError as e: + self.assertIn(s.strip(), e.args[0]) + else: + self.fail("Expected int(%r) to raise a ValueError", s) + + @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE') def test_float_with_comma(self): # set locale to something that doesn't use '.' for the decimal point