diff -r ba4c826848d5 Include/intobject.h --- a/Include/intobject.h Sat Jul 13 04:05:42 2013 -0400 +++ b/Include/intobject.h Sun Jul 14 15:34:25 2013 +0300 @@ -35,6 +35,7 @@ #ifdef Py_USING_UNICODE PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, Py_ssize_t, int); #endif +PyAPI_FUNC(PyObject *) _PyInt_FromBytes(const char *, Py_ssize_t, int); PyAPI_FUNC(PyObject *) PyInt_FromLong(long); PyAPI_FUNC(PyObject *) PyInt_FromSize_t(size_t); PyAPI_FUNC(PyObject *) PyInt_FromSsize_t(Py_ssize_t); diff -r ba4c826848d5 Include/longobject.h --- a/Include/longobject.h Sat Jul 13 04:05:42 2013 -0400 +++ b/Include/longobject.h Sun Jul 14 15:34:25 2013 +0300 @@ -59,6 +59,7 @@ #ifdef Py_USING_UNICODE PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); #endif +PyAPI_FUNC(PyObject *) _PyLong_FromBytes(const char *, Py_ssize_t, int); /* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. v must not be NULL, and must be a normalized long. diff -r ba4c826848d5 Lib/test/test_int.py --- a/Lib/test/test_int.py Sat Jul 13 04:05:42 2013 -0400 +++ b/Lib/test/test_int.py Sun Jul 14 15:34:25 2013 +0300 @@ -63,6 +63,43 @@ self.assertRaises(TypeError, self.ntype, base=10) self.assertRaises(TypeError, self.ntype, base=0) + def test_error_message(self): + def check(s, base=None): + with self.assertRaises(ValueError, + msg="%s(%r, %r)" % + (self.ntype.__name__, s, base)) as cm: + if base is None: + self.ntype(s) + else: + self.ntype(s, base) + self.assertEqual(cm.exception.args[0], + "invalid literal for %s() with base %d: %r" % + (self.ntype.__name__, 10 if base is None else base, s)) + + check('\xbd') + check('123\xbd') + check(' 123 456 ') + + check('123\x00') + # SF bug 1545497: embedded NULs were not detected with explicit base + check('123\x00', 10) + check('123\x00 245', 20) + check('123\x00 245', 16) + check('123\x00245', 20) + check('123\x00245', 16) + if have_unicode: + # non-ascii digits + check(unicode('\u0663\u0661\u0664!', 'raw-unicode-escape'), 10) + # Unicode string with embedded NUL + check(unicode('123\x00')) + check(unicode('123\x00'), 10) + # Unicode string with non-decimal + check(unicode(r'123\xbd', 'raw-unicode-escape')) + check(unicode(r'123\xbd', 'raw-unicode-escape'), 10) + # lone surrogate in Unicode string + check(unicode(r'123\ud800', 'raw-unicode-escape')) + check(unicode(r'123\ud800', 'raw-unicode-escape'), 10) + class IntTestCases(IntLongCommonTests, unittest.TestCase): ntype = int @@ -114,14 +151,6 @@ x = -1-sys.maxint self.assertEqual(x >> 1, x//2) - self.assertRaises(ValueError, int, '123\0') - self.assertRaises(ValueError, int, '53', 40) - - # SF bug 1545497: embedded NULs were not detected with - # explicit base - self.assertRaises(ValueError, int, '123\0', 10) - self.assertRaises(ValueError, int, '123\x00 245', 20) - x = int('1' * 600) self.assertIsInstance(x, long) diff -r ba4c826848d5 Lib/test/test_long.py --- a/Lib/test/test_long.py Sat Jul 13 04:05:42 2013 -0400 +++ b/Lib/test/test_long.py Sun Jul 14 15:34:25 2013 +0300 @@ -375,11 +375,6 @@ self.assertRaises(ValueError, long, '08', 0) self.assertRaises(ValueError, long, '-012395', 0) - # SF patch #1638879: embedded NULs were not detected with - # explicit base - self.assertRaises(ValueError, long, '123\0', 10) - self.assertRaises(ValueError, long, '123\x00 245', 20) - self.assertEqual(long('100000000000000000000000000000000', 2), 4294967296) self.assertEqual(long('102002022201221111211', 3), 4294967296) diff -r ba4c826848d5 Misc/NEWS --- a/Misc/NEWS Sat Jul 13 04:05:42 2013 -0400 +++ b/Misc/NEWS Sun Jul 14 15:34:25 2013 +0300 @@ -9,6 +9,8 @@ Core and Builtins ----------------- +- Issue #16741: Fix an error reporting in int(). + - Issue #18184: PyUnicode_FromFormat() and PyUnicode_FromFormatV() now raise OverflowError when an argument of %c format is out of range. diff -r ba4c826848d5 Objects/abstract.c --- a/Objects/abstract.c Sat Jul 13 04:05:42 2013 -0400 +++ b/Objects/abstract.c Sun Jul 14 15:34:25 2013 +0300 @@ -1463,25 +1463,6 @@ return type_error("bad operand type for abs(): '%.200s'", o); } -/* Add a check for embedded NULL-bytes in the argument. */ -static PyObject * -int_from_string(const char *s, Py_ssize_t len) -{ - char *end; - PyObject *x; - - x = PyInt_FromString((char*)s, &end, 10); - if (x == NULL) - return NULL; - if (end != s + len) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for int()"); - Py_DECREF(x); - return NULL; - } - return x; -} - /* Return a Python Int or Long from the object item Raise TypeError if the result is not an int-or-long or if the object cannot be interpreted as an index. @@ -1658,8 +1639,8 @@ PyErr_Clear(); /* It's not an error if o.__trunc__ doesn't exist. */ if (PyString_Check(o)) - return int_from_string(PyString_AS_STRING(o), - PyString_GET_SIZE(o)); + return _PyInt_FromBytes(PyString_AS_STRING(o), + PyString_GET_SIZE(o), 10); #ifdef Py_USING_UNICODE if (PyUnicode_Check(o)) return PyInt_FromUnicode(PyUnicode_AS_UNICODE(o), @@ -1667,31 +1648,12 @@ 10); #endif if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) - return int_from_string((char*)buffer, buffer_len); + return _PyInt_FromBytes(buffer, buffer_len, 10); return type_error("int() argument must be a string or a " "number, not '%.200s'", o); } -/* Add a check for embedded NULL-bytes in the argument. */ -static PyObject * -long_from_string(const char *s, Py_ssize_t len) -{ - char *end; - PyObject *x; - - x = PyLong_FromString((char*)s, &end, 10); - if (x == NULL) - return NULL; - if (end != s + len) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for long()"); - Py_DECREF(x); - return NULL; - } - return x; -} - PyObject * PyNumber_Long(PyObject *o) { @@ -1753,11 +1715,11 @@ if (PyString_Check(o)) /* need to do extra error checking that PyLong_FromString() - * doesn't do. In particular long('9.5') must raise an - * exception, not truncate the float. + * doesn't do. In particular int('9\x005') must raise an + * exception, not truncate at the null. */ - return long_from_string(PyString_AS_STRING(o), - PyString_GET_SIZE(o)); + return _PyLong_FromBytes(PyString_AS_STRING(o), + PyString_GET_SIZE(o), 10); #ifdef Py_USING_UNICODE if (PyUnicode_Check(o)) /* The above check is done in PyLong_FromUnicode(). */ @@ -1766,7 +1728,7 @@ 10); #endif if (!PyObject_AsCharBuffer(o, &buffer, &buffer_len)) - return long_from_string(buffer, buffer_len); + return _PyLong_FromBytes(buffer, buffer_len, 10); return type_error("long() argument must be a string or a " "number, not '%.200s'", o); diff -r ba4c826848d5 Objects/intobject.c --- a/Objects/intobject.c Sat Jul 13 04:05:42 2013 -0400 +++ b/Objects/intobject.c Sun Jul 14 15:34:25 2013 +0300 @@ -5,6 +5,9 @@ #include #include +#undef MIN +#define MIN(x, y) ((x) > (y) ? (y) : (x)) + static PyObject *int_int(PyIntObject *v); long @@ -352,13 +355,21 @@ } #endif +/* Parses an int from a string. Leading and trailing whitespace will be + * ignored. + * + * If successful, a PyInt object will be returned and 'pend' will be pointing + * to the first unused byte unless it's NULL. + * + * If unsuccessful, NULL will be returned. + */ PyObject * PyInt_FromString(char *s, char **pend, int base) { - char *end; + char *start = s, *end; long x; Py_ssize_t slen; - PyObject *sobj, *srepr; + PyObject *result, *sobj, *srepr; if ((base != 0 && base < 2) || base > 36) { PyErr_SetString(PyExc_ValueError, @@ -382,8 +393,10 @@ end++; if (*end != '\0') { bad: - slen = strlen(s) < 200 ? strlen(s) : 200; - sobj = PyString_FromStringAndSize(s, slen); + if (pend != NULL) + *pend = end; + slen = strlen(start) < 200 ? strlen(start) : 200; + sobj = PyString_FromStringAndSize(start, slen); if (sobj == NULL) return NULL; srepr = PyObject_Repr(sobj); @@ -397,29 +410,78 @@ return NULL; } else if (errno != 0) - return PyLong_FromString(s, pend, base); - if (pend) + return PyLong_FromString(start, pend, base); + result = PyInt_FromLong(x); + if (result != NULL && pend != NULL) *pend = end; - return PyInt_FromLong(x); + return result; +} + +/* Since PyInt_FromString doesn't have a length parameter, + * check here for possible NULs in the string. + * + * Reports an invalid literal as a bytes object. + */ +PyObject * +_PyInt_FromBytes(const char *s, Py_ssize_t len, int base) +{ + PyObject *result, *strobj, *repr; + char *end = NULL; + + result = PyInt_FromString((char*)s, &end, base); + if (end == NULL || (result != NULL && end == s + len)) + return result; + Py_XDECREF(result); + strobj = PyBytes_FromStringAndSize(s, MIN(len, 200)); + if (strobj != NULL) { + repr = PyObject_Repr(strobj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %s", + base, PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + Py_DECREF(strobj); + } + return NULL; } #ifdef Py_USING_UNICODE PyObject * -PyInt_FromUnicode(Py_UNICODE *s, Py_ssize_t length, int base) +PyInt_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) { - PyObject *result; - char *buffer = (char *)PyMem_MALLOC(length+1); + PyObject *result, *strobj, *repr; + char *buffer = (char *)PyMem_MALLOC(length+1), *end = NULL; if (buffer == NULL) return PyErr_NoMemory(); - if (PyUnicode_EncodeDecimal(s, length, buffer, NULL)) { + if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) { PyMem_FREE(buffer); - return NULL; + if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + return NULL; } - result = PyInt_FromString(buffer, NULL, base); - PyMem_FREE(buffer); - return result; + else { + result = PyInt_FromString(buffer, &end, base); + if (end == NULL || (result != NULL && end == buffer + length)) { + PyMem_FREE(buffer); + return result; + } + PyMem_FREE(buffer); + Py_XDECREF(result); + } + strobj = PyUnicode_FromUnicode(u, MIN(length, 200)); + if (strobj != NULL) { + repr = PyObject_Repr(strobj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %s", + base, PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + Py_DECREF(strobj); + } + return NULL; } #endif @@ -1084,23 +1146,9 @@ if (base == -909) return PyNumber_Int(x); if (PyString_Check(x)) { - /* Since PyInt_FromString doesn't have a length parameter, - * check here for possible NULs in the string. */ - char *string = PyString_AS_STRING(x); - if (strlen(string) != PyString_Size(x)) { - /* create a repr() of the input string, - * just like PyInt_FromString does */ - PyObject *srepr; - srepr = PyObject_Repr(x); - if (srepr == NULL) - return NULL; - PyErr_Format(PyExc_ValueError, - "invalid literal for int() with base %d: %s", - base, PyString_AS_STRING(srepr)); - Py_DECREF(srepr); - return NULL; - } - return PyInt_FromString(string, NULL, base); + return _PyInt_FromBytes(PyString_AS_STRING(x), + PyString_GET_SIZE(x), + base); } #ifdef Py_USING_UNICODE if (PyUnicode_Check(x)) diff -r ba4c826848d5 Objects/longobject.c --- a/Objects/longobject.c Sat Jul 13 04:05:42 2013 -0400 +++ b/Objects/longobject.c Sun Jul 14 15:34:25 2013 +0300 @@ -1711,6 +1711,14 @@ return long_normalize(z); } +/* Parses a long from a string. Leading and trailing whitespace will be + * ignored. + * + * If successful, a PyLong object will be returned and 'pend' will be pointing + * to the first unused byte unless it's NULL. + * + * If unsuccessful, NULL will be returned. + */ PyObject * PyLong_FromString(char *str, char **pend, int base) { @@ -1972,11 +1980,13 @@ str++; if (*str != '\0') goto onError; - if (pend) + if (pend != NULL) *pend = str; return (PyObject *) z; onError: + if (pend != NULL) + *pend = str; Py_XDECREF(z); slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; strobj = PyString_FromStringAndSize(orig_str, slen); @@ -1993,23 +2003,71 @@ return NULL; } +/* Since PyLong_FromString doesn't have a length parameter, + * check here for possible NULs in the string. + * + * Reports an invalid literal as a bytes object. + */ +PyObject * +_PyLong_FromBytes(const char *s, Py_ssize_t len, int base) +{ + PyObject *result, *strobj, *repr; + char *end = NULL; + + result = PyLong_FromString((char*)s, &end, base); + if (end == NULL || (result != NULL && end == s + len)) + return result; + Py_XDECREF(result); + strobj = PyBytes_FromStringAndSize(s, MIN(len, 200)); + if (strobj != NULL) { + repr = PyObject_Repr(strobj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long() with base %d: %s", + base, PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + Py_DECREF(strobj); + } + return NULL; +} + #ifdef Py_USING_UNICODE PyObject * PyLong_FromUnicode(Py_UNICODE *u, Py_ssize_t length, int base) { - PyObject *result; - char *buffer = (char *)PyMem_MALLOC(length+1); + PyObject *result, *strobj, *repr; + char *buffer = (char *)PyMem_MALLOC(length+1), *end = NULL; if (buffer == NULL) - return NULL; + return PyErr_NoMemory(); if (PyUnicode_EncodeDecimal(u, length, buffer, NULL)) { PyMem_FREE(buffer); - return NULL; + if (!PyErr_ExceptionMatches(PyExc_UnicodeEncodeError)) + return NULL; } - result = PyLong_FromString(buffer, NULL, base); - PyMem_FREE(buffer); - return result; + else { + result = PyLong_FromString(buffer, &end, base); + if (end == NULL || (result != NULL && end == buffer + length)) { + PyMem_FREE(buffer); + return result; + } + PyMem_FREE(buffer); + Py_XDECREF(result); + } + strobj = PyUnicode_FromUnicode(u, MIN(length, 200)); + if (strobj != NULL) { + repr = PyObject_Repr(strobj); + if (repr != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for long() with base %d: %s", + base, PyString_AS_STRING(repr)); + Py_DECREF(repr); + } + Py_DECREF(strobj); + } + return NULL; } #endif @@ -4016,23 +4074,9 @@ if (base == -909) return PyNumber_Long(x); else if (PyString_Check(x)) { - /* Since PyLong_FromString doesn't have a length parameter, - * check here for possible NULs in the string. */ - char *string = PyString_AS_STRING(x); - if (strlen(string) != (size_t)PyString_Size(x)) { - /* create a repr() of the input string, - * just like PyLong_FromString does. */ - PyObject *srepr; - srepr = PyObject_Repr(x); - if (srepr == NULL) - return NULL; - PyErr_Format(PyExc_ValueError, - "invalid literal for long() with base %d: %s", - base, PyString_AS_STRING(srepr)); - Py_DECREF(srepr); - return NULL; - } - return PyLong_FromString(PyString_AS_STRING(x), NULL, base); + return _PyLong_FromBytes(PyString_AS_STRING(x), + PyString_GET_SIZE(x), + base); } #ifdef Py_USING_UNICODE else if (PyUnicode_Check(x))