# HG changeset patch # User MRAB # Date 1356834937 0 # Node ID ffce7af849a4e6a5c3af4ccac7c7460e44ad080e # Parent c59ee1ff6f277d9985aaf45e3b9ed3a887c0caef Issue #16741: `int()`, `float()`, etc think python strings are null-terminated diff -r c59ee1ff6f27 -r ffce7af849a4 Lib/test/test_int.py --- a/Lib/test/test_int.py Sat Dec 29 23:41:08 2012 +0200 +++ b/Lib/test/test_int.py Sun Dec 30 02:35:37 2012 +0000 @@ -217,6 +217,28 @@ self.assertEqual(int('2br45qc', 35), 4294967297) self.assertEqual(int('1z141z5', 36), 4294967297) + # Bug #16741: `int()`, `float()`, etc think python strings are null-terminated + s = "1\x01" + try: + int(s) + except ValueError as e: + self.assertEqual(repr(e), + repr(ValueError("invalid literal for int() with base 10: {!r}".format(s)))) + + s = "1\x00" + try: + int(s) + except ValueError as e: + self.assertEqual(repr(e), + repr(ValueError("invalid literal for int() with base 10: {!r}".format(s)))) + + s = "1" + " " * 198 + "\u03C0" + try: + int(s) + except ValueError as e: + self.assertEqual(repr(e), + repr(ValueError("invalid literal for int() with base 10: {!r}".format(s)))) + @support.cpython_only def test_small_ints(self): # Bug #3236: Return small longs from PyLong_FromString diff -r c59ee1ff6f27 -r ffce7af849a4 Objects/longobject.c --- a/Objects/longobject.c Sat Dec 29 23:41:08 2012 +0200 +++ b/Objects/longobject.c Sun Dec 30 02:35:37 2012 +0000 @@ -1976,20 +1976,21 @@ return long_normalize(z); } -PyObject * -PyLong_FromString(char *str, char **pend, int base) +/* Parses a long from a bytestring. Leading and trailing whitespace will be + * ignored. + * + * If successful, a PyLong object will be returned and 'pend' will be pointing + * to the first unused byte unless it's NULL. + * + * If unsuccessful, NULL will be returned. + */ +static PyObject * +long_from_string(char *str, char **pend, int base) { int sign = 1, error_if_nonzero = 0; char *start, *orig_str = str; PyLongObject *z = NULL; - PyObject *strobj; - Py_ssize_t slen; - - if ((base != 0 && base < 2) || base > 36) { - PyErr_SetString(PyExc_ValueError, - "int() arg 2 must be >= 2 and <= 36"); - return NULL; - } + while (*str != '\0' && isspace(Py_CHARMASK(*str))) str++; if (*str == '+') @@ -2238,21 +2239,45 @@ str++; if (*str != '\0') goto onError; - if (pend) + if (pend != NULL) *pend = str; long_normalize(z); return (PyObject *) maybe_small_long(z); onError: Py_XDECREF(z); - slen = strlen(orig_str) < 200 ? strlen(orig_str) : 200; - strobj = PyUnicode_FromStringAndSize(orig_str, slen); - if (strobj == NULL) + return NULL; +} + +PyObject * +PyLong_FromString(char *str, char **pend, int base) +{ + PyObject *result; + Py_ssize_t slen; + PyObject *strobj; + + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() base must be >= 2 and <= 36"); return NULL; - PyErr_Format(PyExc_ValueError, - "invalid literal for int() with base %d: %R", - base, strobj); - Py_DECREF(strobj); + } + result = long_from_string(str, pend, base); + if (result != NULL && (pend == NULL || **pend == '\0')) + return result; + slen = strlen(str); + if (slen > 200) { + slen = 200; + /* Don't truncate in the middle of a UTF-8 multibyte sequence. */ + while ((str[slen] & 0xC0) == 0x80) + slen--; + } + strobj = PyUnicode_FromStringAndSize(str, slen); + if (strobj != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %R", + base, strobj); + Py_DECREF(strobj); + } return NULL; } @@ -2274,7 +2299,13 @@ PyObject *asciidig; char *buffer, *end; Py_ssize_t buflen; - + PyObject *strobj; + + if ((base != 0 && base < 2) || base > 36) { + PyErr_SetString(PyExc_ValueError, + "int() base must be >= 2 and <= 36"); + return NULL; + } asciidig = _PyUnicode_TransformDecimalAndSpaceToASCII(u); if (asciidig == NULL) return NULL; @@ -2283,11 +2314,17 @@ Py_DECREF(asciidig); return NULL; } - result = PyLong_FromString(buffer, &end, base); - if (result != NULL && end != buffer + buflen) { - PyErr_SetString(PyExc_ValueError, - "null byte in argument for int()"); - Py_DECREF(result); + result = long_from_string(buffer, &end, base); + if (result == NULL || end != buffer + buflen) { + strobj = PySequence_GetSlice(u, 0, 200); + if (strobj != NULL) { + PyErr_Format(PyExc_ValueError, + "invalid literal for int() with base %d: %R", + base, strobj); + Py_DECREF(strobj); + } + if (result != NULL) + Py_DECREF(result); result = NULL; } Py_DECREF(asciidig);