Index: Include/unicodeobject.h =================================================================== --- Include/unicodeobject.h (revision 86927) +++ Include/unicodeobject.h (working copy) @@ -1144,7 +1144,10 @@ /* --- Decimal Encoder ---------------------------------------------------- */ -/* Takes a Unicode string holding a decimal value and writes it into +/* This function is no longer used. Use _PyUnicode_NormalizeDecimal + instead. + + Takes a Unicode string holding a decimal value and writes it into an output buffer using standard ASCII digit codes. The output buffer has to provide at least length+1 bytes of storage @@ -1173,6 +1176,17 @@ const char *errors /* error handling */ ); +/* Strips leading and trailing space and converts code points that have decimal + digit property to the corresponding ASCII digit code point. + + Returns a new Unicode string on success, NULL on failure. +*/ + +PyAPI_FUNC(PyObject*) _PyUnicode_NormalizeDecimal( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ); + /* --- File system encoding ---------------------------------------------- */ /* ParseTuple converter: encode str objects to bytes using Index: Objects/complexobject.c =================================================================== --- Objects/complexobject.c (revision 86927) +++ Objects/complexobject.c (working copy) @@ -766,20 +766,17 @@ char *end; double x=0.0, y=0.0, z; int got_bracket=0; - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v) + 1); + s_buffer = _PyUnicode_NormalizeDecimal(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) + return NULL; + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) goto error; - s = s_buffer; - len = strlen(s); } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, @@ -894,16 +891,14 @@ if (s-start != len) goto parse_error; - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); parse_error: PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return NULL; } Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 86927) +++ Objects/unicodeobject.c (working copy) @@ -6206,6 +6206,36 @@ return NULL; } +PyObject * +_PyUnicode_NormalizeDecimal(Py_UNICODE *s, + Py_ssize_t length) +{ + PyObject *result; + Py_UNICODE *p; /* write pointer into result */ + const Py_UNICODE *end = s + length; + Py_ssize_t i; + /* Strip whitespace */ + while (s < end && Py_UNICODE_ISSPACE(*s)) + s++; + while (s < end - 1 && Py_UNICODE_ISSPACE(end[-1])) + end--; + length = end - s; + /* Copy to a new string */ + result = PyUnicode_FromUnicode(s, length); + if (result == NULL) + return result; + p = PyUnicode_AS_UNICODE(result); + /* Iterate over code points */ + for (i = 0; i < length; i++) { + Py_UNICODE ch = p[i]; + if (!Py_ISDIGIT(ch)) { + int decimal = Py_UNICODE_TODECIMAL(ch); + if (decimal >= 0) + p[i] = '0' + decimal; + } + } + return result; +} /* --- Decimal Encoder ---------------------------------------------------- */ int PyUnicode_EncodeDecimal(Py_UNICODE *s, @@ -9085,6 +9115,12 @@ return Py_BuildValue("(u#)", v->str, v->length); } +static PyObject * +unicode__normalize_decimal(PyObject *self) +{ + return _PyUnicode_NormalizeDecimal(PyUnicode_AS_UNICODE(self), + PyUnicode_GET_SIZE(self)); +} static PyMethodDef unicode_methods[] = { @@ -9142,8 +9178,9 @@ #endif #if 0 - /* This one is just used for debugging the implementation. */ + /* These methods are just used for debugging the implementation. */ {"freelistsize", (PyCFunction) unicode_freelistsize, METH_NOARGS}, + {"_normalize_decimal", (PyCFunction) unicode__normalize_decimal, METH_NOARGS}, #endif {"__getnewargs__", (PyCFunction)unicode_getnewargs, METH_NOARGS}, Index: Objects/longobject.c =================================================================== --- Objects/longobject.c (revision 86927) +++ Objects/longobject.c (working copy) @@ -2133,17 +2133,28 @@ PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) { PyObject *result; - char *buffer = (char *)PyMem_MALLOC(length+1); + PyObject *norm; + char *buffer, *end; + Py_ssize_t buflen; - if (buffer == NULL) + norm = _PyUnicode_NormalizeDecimal(u, length); + if (norm == NULL) return NULL; - if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) { - PyMem_FREE(buffer); + buffer = _PyUnicode_AsStringAndSize(norm, &buflen); + if (buffer == NULL) { + Py_DECREF(norm); return NULL; } - result = PyLong_FromString(buffer, NULL, base); - PyMem_FREE(buffer); + + result = PyLong_FromString(buffer, &end, base); + if (result != NULL && end != buffer + buflen) { + PyErr_SetString(PyExc_ValueError, + "null byte in argument for int()"); + Py_DECREF(result); + result = NULL; + } + Py_DECREF(norm); return result; } Index: Objects/floatobject.c =================================================================== --- Objects/floatobject.c (revision 86927) +++ Objects/floatobject.c (working copy) @@ -174,52 +174,51 @@ { const char *s, *last, *end; double x; - char buffer[256]; /* for errors */ - char *s_buffer = NULL; + PyObject *s_buffer = NULL; Py_ssize_t len; PyObject *result = NULL; if (PyUnicode_Check(v)) { - s_buffer = (char *)PyMem_MALLOC(PyUnicode_GET_SIZE(v)+1); + s_buffer = _PyUnicode_NormalizeDecimal(PyUnicode_AS_UNICODE(v), + PyUnicode_GET_SIZE(v)); if (s_buffer == NULL) - return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(PyUnicode_AS_UNICODE(v), - PyUnicode_GET_SIZE(v), - s_buffer, - NULL)) - goto error; - s = s_buffer; - len = strlen(s); + return NULL; + s = _PyUnicode_AsStringAndSize(s_buffer, &len); + if (s == NULL) { + Py_DECREF(s_buffer); + return NULL; + } + last = s + len; } else if (PyObject_AsCharBuffer(v, &s, &len)) { PyErr_SetString(PyExc_TypeError, "float() argument must be a string or a number"); return NULL; } - last = s + len; - - while (Py_ISSPACE(*s)) - s++; + else { + last = s + len; + /* strip space */ + while (s < last && Py_ISSPACE(*s)) + s++; + while (s < last - 1 && Py_ISSPACE(last[-1])) + last--; + } /* We don't care about overflow or underflow. If the platform * supports them, infinities and signed zeroes (on underflow) are * fine. */ x = PyOS_string_to_double(s, (char **)&end, NULL); - if (x == -1.0 && PyErr_Occurred()) - goto error; - while (Py_ISSPACE(*end)) - end++; - if (end == last) - result = PyFloat_FromDouble(x); - else { - PyOS_snprintf(buffer, sizeof(buffer), - "invalid literal for float(): %.200s", s); - PyErr_SetString(PyExc_ValueError, buffer); + if (end != last) { + PyErr_Format(PyExc_ValueError, + "could not convert string to float: " + "%.200s", s); result = NULL; } + else if (x == -1.0 && PyErr_Occurred()) + result = NULL; + else + result = PyFloat_FromDouble(x); - error: - if (s_buffer) - PyMem_FREE(s_buffer); + Py_XDECREF(s_buffer); return result; } Index: Lib/test/test_unicode.py =================================================================== --- Lib/test/test_unicode.py (revision 86927) +++ Lib/test/test_unicode.py (working copy) @@ -1168,8 +1168,9 @@ # Error handling (wrong arguments) self.assertRaises(TypeError, "hello".encode, 42, 42, 42) - # Error handling (PyUnicode_EncodeDecimal()) - self.assertRaises(UnicodeError, int, "\u0200") + # Error handling (lone surrogate in _PyUnicode_NormalizeDecimal()) + self.assertRaises(UnicodeError, int, "\ud800") + self.assertRaises(UnicodeError, int, "\udf00") def test_codecs(self): # Encoding Index: Lib/test/test_int.py =================================================================== --- Lib/test/test_int.py (revision 86927) +++ Lib/test/test_int.py (working copy) @@ -20,7 +20,8 @@ (' 1\02 ', ValueError), ('', ValueError), (' ', ValueError), - (' \t\t ', ValueError) + (' \t\t ', ValueError), + ("\u0200", ValueError) ] class IntTestCases(unittest.TestCase): @@ -302,6 +303,16 @@ self.fail("Failed to raise TypeError with %s" % ((base, trunc_result_base),)) + def test_error_message(self): + testlist = ('\xbd', '123\xbd', ' 123 456 ') + for s in testlist: + try: + int(s) + except ValueError as e: + self.assertIn(s.strip(), e.args[0]) + else: + self.fail("Expected int(%r) to raise a ValueError", s) + def test_main(): run_unittest(IntTestCases) Index: Lib/test/test_float.py =================================================================== --- Lib/test/test_float.py (revision 86927) +++ Lib/test/test_float.py (working copy) @@ -51,6 +51,17 @@ float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_error_message(self): + testlist = ('\xbd', '123\xbd', ' 123 456 ') + for s in testlist: + try: + float(s) + except ValueError as e: + self.assertIn(s.strip(), e.args[0]) + else: + self.fail("Expected int(%r) to raise a ValueError", s) + + @support.run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE') def test_float_with_comma(self): # set locale to something that doesn't use '.' for the decimal point