# HG changeset patch # Parent 53f2a1d311cd10adf35f2b13194df6e344a556e3 Issue #24802: Copy bytes-like objects to null-terminated buffers if necessary This avoids possible buffer overreads when int(), float(), compile(), exec() and eval() are passed bytes-like objects. Similar code removed from the complex() constructor, where it was not reachable. Patch by John Leitch, Serhiy Storchaka and Martin Panter. diff -r 53f2a1d311cd Lib/test/test_builtin.py --- a/Lib/test/test_builtin.py Wed Nov 04 09:03:53 2015 +0100 +++ b/Lib/test/test_builtin.py Thu Nov 05 03:45:03 2015 +0000 @@ -347,6 +347,26 @@ rv = ns['f']() self.assertEqual(rv, (debugval, docstring)) + def test_compile_null_terminated(self): + # The source code is null-terminated internally, but bytes-like + # objects are accepted, which may not be null-terminated. + with self.assertRaisesRegex(ValueError, "cannot contain null"): + compile("123\x00", "", "eval") + with self.assertRaisesRegex(ValueError, "cannot contain null"): + compile(memoryview(b"123\x00"), "", "eval") + code = compile(memoryview(b"123\x00")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"1234")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + code = compile(memoryview(b"$23$")[1:-1], "", "eval") + self.assertEqual(eval(code), 23) + + # Also test when eval() and exec() do the compilation step + self.assertEqual(eval(memoryview(b"1234")[1:-1]), 23) + namespace = dict() + exec(memoryview(b"ax = 123")[1:-1], namespace) + self.assertEqual(namespace['x'], 12) + def test_delattr(self): sys.spam = 1 delattr(sys, 'spam') diff -r 53f2a1d311cd Lib/test/test_float.py --- a/Lib/test/test_float.py Wed Nov 04 09:03:53 2015 +0100 +++ b/Lib/test/test_float.py Thu Nov 05 03:45:03 2015 +0000 @@ -31,7 +31,6 @@ self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) self.assertEqual(float(" 3.14 "), 3.14) - self.assertEqual(float(b" 3.14 "), 3.14) self.assertRaises(ValueError, float, " 0x3.1 ") self.assertRaises(ValueError, float, " -0x3.p-1 ") self.assertRaises(ValueError, float, " +0x3.p-1 ") @@ -43,7 +42,6 @@ self.assertRaises(ValueError, float, "+.inf") self.assertRaises(ValueError, float, ".") self.assertRaises(ValueError, float, "-.") - self.assertRaises(ValueError, float, b"-") self.assertRaises(TypeError, float, {}) self.assertRaisesRegex(TypeError, "not 'dict'", float, {}) # Lone surrogate @@ -57,6 +55,42 @@ float(b'.' + b'1'*1000) float('.' + '1'*1000) + def test_non_numeric_input_types(self): + # Test possible non-numeric types for the argument x, including + # subclasses of the explicitly documented accepted types. + class CustomStr(str): pass + class CustomBytes(bytes): pass + class CustomByteArray(bytearray): pass + + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) + + for f in factories: + x = f(b" 3.14 ") + with self.subTest(type(x)): + self.assertEqual(float(x), 3.14) + with self.assertRaisesRegex(ValueError, "could not convert"): + float(f(b'A' * 0x10)) + + def test_float_memoryview(self): + self.assertEqual(float(memoryview(b'12.3')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3\x00')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3 ')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.3A')[1:4]), 2.3) + self.assertEqual(float(memoryview(b'12.34')[1:4]), 2.3) + def test_error_message(self): testlist = ('\xbd', '123\xbd', ' 123 456 ') for s in testlist: diff -r 53f2a1d311cd Lib/test/test_int.py --- a/Lib/test/test_int.py Wed Nov 04 09:03:53 2015 +0100 +++ b/Lib/test/test_int.py Thu Nov 05 03:45:03 2015 +0000 @@ -276,16 +276,40 @@ class CustomBytes(bytes): pass class CustomByteArray(bytearray): pass - values = [b'100', - bytearray(b'100'), - CustomStr('100'), - CustomBytes(b'100'), - CustomByteArray(b'100')] + factories = [ + bytes, + bytearray, + lambda b: CustomStr(b.decode()), + CustomBytes, + CustomByteArray, + memoryview, + ] + try: + from array import array + except ImportError: + pass + else: + factories.append(lambda b: array('B', b)) - for x in values: - msg = 'x has type %s' % type(x).__name__ - self.assertEqual(int(x), 100, msg=msg) - self.assertEqual(int(x, 2), 4, msg=msg) + for f in factories: + x = f(b'100') + with self.subTest(type(x)): + self.assertEqual(int(x), 100) + if isinstance(x, (str, bytes, bytearray)): + self.assertEqual(int(x, 2), 4) + else: + msg = "can't convert non-string" + with self.assertRaisesRegex(TypeError, msg): + int(x, 2) + with self.assertRaisesRegex(ValueError, 'invalid literal'): + int(f(b'A' * 0x10)) + + def test_int_memoryview(self): + self.assertEqual(int(memoryview(b'123')[1:3]), 23) + self.assertEqual(int(memoryview(b'123\x00')[1:3]), 23) + self.assertEqual(int(memoryview(b'123 ')[1:3]), 23) + self.assertEqual(int(memoryview(b'123A')[1:3]), 23) + self.assertEqual(int(memoryview(b'1234')[1:3]), 23) def test_string_float(self): self.assertRaises(ValueError, int, '1.2') diff -r 53f2a1d311cd Misc/NEWS --- a/Misc/NEWS Wed Nov 04 09:03:53 2015 +0100 +++ b/Misc/NEWS Thu Nov 05 03:45:03 2015 +0000 @@ -10,6 +10,10 @@ Core and Builtins ----------------- +- Issue #24802: Avoid buffer overreads when int(), float(), compile(), exec() + and eval() are passed bytes-like objects. These objects are not necessarily + terminated by a null byte, but the functions assumed they were. + - Issue #25395: Fixed crash when highly nested OrderedDict structures were garbage collected. diff -r 53f2a1d311cd Objects/abstract.c --- a/Objects/abstract.c Wed Nov 04 09:03:53 2015 +0100 +++ b/Objects/abstract.c Thu Nov 05 03:45:03 2015 +0000 @@ -1309,12 +1309,30 @@ /* The below check is done in PyLong_FromUnicode(). */ return PyLong_FromUnicodeObject(o, 10); - if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + if (PyBytes_Check(o)) /* need to do extra error checking that PyLong_FromString() * doesn't do. In particular int('9\x005') must raise an * exception, not truncate at the null. */ - PyObject *result = _PyLong_FromBytes(view.buf, view.len, 10); + return _PyLong_FromBytes(PyBytes_AS_STRING(o), + PyBytes_GET_SIZE(o), 10); + + if (PyByteArray_Check(o)) + return _PyLong_FromBytes(PyByteArray_AS_STRING(o), + PyByteArray_GET_SIZE(o), 10); + + if (PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) == 0) { + PyObject *result, *bytes; + + /* Copy to NUL-terminated buffer. */ + bytes = PyBytes_FromStringAndSize((const char *)view.buf, view.len); + if (bytes == NULL) { + PyBuffer_Release(&view); + return NULL; + } + result = _PyLong_FromBytes(PyBytes_AS_STRING(bytes), + PyBytes_GET_SIZE(bytes), 10); + Py_DECREF(bytes); PyBuffer_Release(&view); return result; } diff -r 53f2a1d311cd Objects/complexobject.c --- a/Objects/complexobject.c Wed Nov 04 09:03:53 2015 +0100 +++ b/Objects/complexobject.c Thu Nov 05 03:45:03 2015 +0000 @@ -767,7 +767,6 @@ int got_bracket=0; PyObject *s_buffer = NULL; Py_ssize_t len; - Py_buffer view = {NULL, NULL}; if (PyUnicode_Check(v)) { s_buffer = _PyUnicode_TransformDecimalAndSpaceToASCII(v); @@ -777,10 +776,6 @@ if (s == NULL) goto error; } - else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { - s = (const char *)view.buf; - len = view.len; - } else { PyErr_Format(PyExc_TypeError, "complex() argument must be a string or a number, not '%.200s'", @@ -895,7 +890,6 @@ if (s-start != len) goto parse_error; - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return complex_subtype_from_doubles(type, x, y); @@ -903,7 +897,6 @@ PyErr_SetString(PyExc_ValueError, "complex() arg is a malformed string"); error: - PyBuffer_Release(&view); Py_XDECREF(s_buffer); return NULL; } diff -r 53f2a1d311cd Objects/floatobject.c --- a/Objects/floatobject.c Wed Nov 04 09:03:53 2015 +0100 +++ b/Objects/floatobject.c Thu Nov 05 03:45:03 2015 +0000 @@ -144,9 +144,24 @@ return NULL; } } + else if (PyBytes_Check(v)) { + s = PyBytes_AS_STRING(v); + len = PyBytes_GET_SIZE(v); + } + else if (PyByteArray_Check(v)) { + s = PyByteArray_AS_STRING(v); + len = PyByteArray_GET_SIZE(v); + } else if (PyObject_GetBuffer(v, &view, PyBUF_SIMPLE) == 0) { s = (const char *)view.buf; len = view.len; + /* Copy to NUL-terminated buffer. */ + s_buffer = PyBytes_FromStringAndSize(s, len); + if (s_buffer == NULL) { + PyBuffer_Release(&view); + return NULL; + } + s = PyBytes_AS_STRING(s_buffer); } else { PyErr_Format(PyExc_TypeError, diff -r 53f2a1d311cd Python/bltinmodule.c --- a/Python/bltinmodule.c Wed Nov 04 09:03:53 2015 +0100 +++ b/Python/bltinmodule.c Thu Nov 05 03:45:03 2015 +0000 @@ -599,20 +599,37 @@ static const char * -source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, Py_buffer *view) +source_as_string(PyObject *cmd, const char *funcname, const char *what, PyCompilerFlags *cf, PyObject **cmd_copy) { const char *str; Py_ssize_t size; + Py_buffer view; + *cmd_copy = NULL; if (PyUnicode_Check(cmd)) { cf->cf_flags |= PyCF_IGNORE_COOKIE; str = PyUnicode_AsUTF8AndSize(cmd, &size); if (str == NULL) return NULL; } - else if (PyObject_GetBuffer(cmd, view, PyBUF_SIMPLE) == 0) { - str = (const char *)view->buf; - size = view->len; + else if (PyBytes_Check(cmd)) { + str = PyBytes_AS_STRING(cmd); + size = PyBytes_GET_SIZE(cmd); + } + else if (PyByteArray_Check(cmd)) { + str = PyByteArray_AS_STRING(cmd); + size = PyByteArray_GET_SIZE(cmd); + } + else if (PyObject_GetBuffer(cmd, &view, PyBUF_SIMPLE) == 0) { + /* Copy to NUL-terminated buffer. */ + *cmd_copy = PyBytes_FromStringAndSize( + (const char *)view.buf, view.len); + PyBuffer_Release(&view); + if (*cmd_copy == NULL) { + return NULL; + } + str = PyBytes_AS_STRING(*cmd_copy); + size = PyBytes_GET_SIZE(*cmd_copy); } else { PyErr_Format(PyExc_TypeError, @@ -624,7 +641,7 @@ if (strlen(str) != (size_t)size) { PyErr_SetString(PyExc_ValueError, "source code string cannot contain null bytes"); - PyBuffer_Release(view); + Py_CLEAR(*cmd_copy); return NULL; } return str; @@ -660,7 +677,7 @@ int dont_inherit, int optimize) /*[clinic end generated code: output=31881762c1bb90c4 input=9d53e8cfb3c86414]*/ { - Py_buffer view = {NULL, NULL}; + PyObject *source_copy; const char *str; int compile_mode = -1; int is_ast; @@ -732,12 +749,12 @@ goto finally; } - str = source_as_string(source, "compile", "string, bytes or AST", &cf, &view); + str = source_as_string(source, "compile", "string, bytes or AST", &cf, &source_copy); if (str == NULL) goto error; result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize); - PyBuffer_Release(&view); + Py_XDECREF(source_copy); goto finally; error: @@ -812,8 +829,7 @@ PyObject *locals) /*[clinic end generated code: output=7284501fb7b4d666 input=11ee718a8640e527]*/ { - PyObject *result, *tmp = NULL; - Py_buffer view = {NULL, NULL}; + PyObject *result, *source_copy; const char *str; PyCompilerFlags cf; @@ -861,7 +877,7 @@ } cf.cf_flags = PyCF_SOURCE_IS_UTF8; - str = source_as_string(source, "eval", "string, bytes or code", &cf, &view); + str = source_as_string(source, "eval", "string, bytes or code", &cf, &source_copy); if (str == NULL) return NULL; @@ -870,8 +886,7 @@ (void)PyEval_MergeCompilerFlags(&cf); result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf); - PyBuffer_Release(&view); - Py_XDECREF(tmp); + Py_XDECREF(source_copy); return result; } @@ -942,12 +957,13 @@ v = PyEval_EvalCode(source, globals, locals); } else { - Py_buffer view = {NULL, NULL}; + PyObject *source_copy; const char *str; PyCompilerFlags cf; cf.cf_flags = PyCF_SOURCE_IS_UTF8; str = source_as_string(source, "exec", - "string, bytes or code", &cf, &view); + "string, bytes or code", &cf, + &source_copy); if (str == NULL) return NULL; if (PyEval_MergeCompilerFlags(&cf)) @@ -955,7 +971,7 @@ locals, &cf); else v = PyRun_String(str, Py_file_input, globals, locals); - PyBuffer_Release(&view); + Py_XDECREF(source_copy); } if (v == NULL) return NULL;