Index: Objects/longobject.c =================================================================== --- Objects/longobject.c (revision 74315) +++ Objects/longobject.c (working copy) @@ -4070,6 +4070,214 @@ } #endif + +PyDoc_STRVAR(long_tobytes_doc, +"int.tobytes(length, byteorder, *, signed=False) -> bytes\n\ +\n\ +Return an array of bytes representing an integer.\n\ +\n\ +The integer is represented using fixed_length bytes. An OverflowError is\n\ +raised if the integer is not representable using the given fixed number\n\ +of bytes.\n\ +\n\ +The byteorder argument determines the byte order used to represent the \n\ +integer. If byteorder is 'big', the most significant byte is at the \n\ +beginning of the byte array. If byteorder is 'little', the most \n\ +significant byte is at the end of the byte array. To request the native \n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument determines whether two's complement is \n\ +used to represent the integer. If signed is False and a negative integer \n\ +is given, an OverflowError is raised."); + +static PyObject * +long_tobytes(PyLongObject *v, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + PyObject *length_obj; + Py_ssize_t length; + int little_endian; + int is_signed; + PyObject *bytes; + static PyObject *little_str = NULL, *big_str = NULL; + static char *kwlist[] = {"length", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O:tobytes", kwlist, + &length_obj, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + if (little_str == NULL) { + little_str = PyUnicode_InternFromString("little"); + big_str = PyUnicode_InternFromString("big"); + if (little_str == NULL || big_str == NULL) + return NULL; + } + + if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ)) + little_endian = 1; + else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ)) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + if (length_obj && PyLong_Check(length_obj)) { + /* The size of the byte array is set by the user. */ + length = PyLong_AsSsize_t(length_obj); + if (length == -1 && PyErr_Occurred()) + return NULL; + if (length < 0) { + PyErr_SetString(PyExc_ValueError, + "length argument must be non-negative"); + return NULL; + } + } + else { + PyErr_Format(PyExc_TypeError, + "length must be an integer, not %.200s", + Py_TYPE(length_obj)->tp_name); + return NULL; + } + + bytes = PyBytes_FromStringAndSize(NULL, length); + if (!bytes) + return NULL; + + if (_PyLong_AsByteArray(v, (unsigned char *)PyBytes_AS_STRING(bytes), + length, little_endian, is_signed) < 0) { + Py_DECREF(bytes); + return NULL; + } + + return bytes; +} + +PyDoc_STRVAR(long_frombytes_doc, +"int.frombytes(bytes, byteorder, *, signed=False) -> int\n\ +\n\ +Return the integer represented by the given array of bytes.\n\ +\n\ +The bytes argument must either support the buffer protocol or be an\n\ +iterable object producing bytes. Builtin objects that support the buffer\n\ +protocol include bytes and bytearray.\n\ +\n\ +The byteorder argument determines the byte order used to represent the \n\ +integer. If byteorder is 'big', the most significant byte is at the \n\ +beginning of the byte array. If byteorder is 'little', the most \n\ +significant byte is at the end of the byte array. To request the native \n\ +byte order of the host system, use `sys.byteorder' as the byte order value.\n\ +\n\ +The signed keyword-only argument indicates whether two's complement is \n\ +used to represent the integer."); + +static PyObject * +long_frombytes(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyObject *byteorder_str; + PyObject *is_signed_obj = NULL; + int little_endian; + int is_signed; + PyObject *obj; + PyObject *bytes; + PyObject *long_obj; + static PyObject *little_str = NULL, *big_str = NULL; + static char *kwlist[] = {"bytes", "byteorder", "signed", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|O:frombytes", kwlist, + &obj, &byteorder_str, + &is_signed_obj)) + return NULL; + + if (args != NULL && Py_SIZE(args) > 2) { + PyErr_SetString(PyExc_TypeError, + "'signed' is a keyword-only argument"); + return NULL; + } + if (little_str == NULL) { + little_str = PyUnicode_InternFromString("little"); + big_str = PyUnicode_InternFromString("big"); + if (little_str == NULL || big_str == NULL) + return NULL; + } + + if (PyObject_RichCompareBool(byteorder_str, little_str, Py_EQ)) + little_endian = 1; + else if (PyObject_RichCompareBool(byteorder_str, big_str, Py_EQ)) + little_endian = 0; + else { + PyErr_SetString(PyExc_ValueError, + "byteorder must be either 'little' or 'big'"); + return NULL; + } + + if (is_signed_obj) { + int cmp = PyObject_IsTrue(is_signed_obj); + if (cmp < 0) + return NULL; + is_signed = cmp ? 1 : 0; + } + else { + /* If the signed argument was omitted, use False as the + default. */ + is_signed = 0; + } + + bytes = PyObject_Bytes(obj); + if (!bytes) + return NULL; + + long_obj = _PyLong_FromByteArray( + (unsigned char *)PyBytes_AS_STRING(bytes), Py_SIZE(bytes), + little_endian, is_signed); + Py_DECREF(bytes); + + /* If frombytes() was used on subclass, allocate new subclass + * instance, initialize it with decoded long value and return it. + */ + if (type != &PyLong_Type && PyType_IsSubtype(type, &PyLong_Type)) { + PyLongObject *newobj; + int i; + Py_ssize_t n = ABS(Py_SIZE(long_obj)); + + newobj = (PyLongObject *)type->tp_alloc(type, n); + if (newobj == NULL) { + Py_DECREF(long_obj); + return NULL; + } + assert(PyLong_Check(newobj)); + Py_SIZE(newobj) = Py_SIZE(long_obj); + for (i = 0; i < n; i++) { + newobj->ob_digit[i] = + ((PyLongObject *)long_obj)->ob_digit[i]; + } + Py_DECREF(long_obj); + return (PyObject *)newobj; + } + + return long_obj; +} + static PyMethodDef long_methods[] = { {"conjugate", (PyCFunction)long_long, METH_NOARGS, "Returns self, the complex conjugate of any int."}, @@ -4079,6 +4287,10 @@ {"is_finite", (PyCFunction)long_is_finite, METH_NOARGS, "Returns always True."}, #endif + {"tobytes", (PyCFunction)long_tobytes, + METH_VARARGS|METH_KEYWORDS, long_tobytes_doc}, + {"frombytes", (PyCFunction)long_frombytes, + METH_VARARGS|METH_KEYWORDS|METH_CLASS, long_frombytes_doc}, {"__trunc__", (PyCFunction)long_long, METH_NOARGS, "Truncating an Integral returns itself."}, {"__floor__", (PyCFunction)long_long, METH_NOARGS, Index: Doc/c-api/object.rst =================================================================== --- Doc/c-api/object.rst (revision 74315) +++ Doc/c-api/object.rst (working copy) @@ -142,11 +142,12 @@ .. index:: builtin: bytes - Compute a bytes representation of object *o*. *NULL* is returned on failure - and a bytes object on success. This is equivalent to the Python expression - ``bytes(o)``. + Compute a bytes representation of object *o*. *NULL* is returned on + failure and a bytes object on success. This is equivalent to the Python + expression ``bytes(o)``, when *o* is not an integer. Unlike ``bytes(o)``, + a TypeError is raised when *o* is an integer instead of a zero-initialized + bytes object. - .. cfunction:: int PyObject_IsInstance(PyObject *inst, PyObject *cls) Returns ``1`` if *inst* is an instance of the class *cls* or a subclass of Index: Doc/library/stdtypes.rst =================================================================== --- Doc/library/stdtypes.rst (revision 74315) +++ Doc/library/stdtypes.rst (working copy) @@ -457,7 +457,70 @@ .. versionadded:: 3.1 + .. method:: int.tobytes(length, byteorder, [\*, signed=False]) + Return an array of bytes representing an integer. + + >>> (1024).tobytes(2, byteorder='big') + b'\x04\x00' + >>> (1024).tobytes(10, byteorder='big') + b'\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + >>> (-1024).tobytes(10, byteorder='big', signed=True) + b'\xff\xff\xff\xff\xff\xff\xff\xff\xfc\x00' + >>> x = 1000 + >>> x.tobytes((x.bit_length() // 8) + 1, byteorder='little') + b'\xe8\x03' + + The integer is represented using *length* bytes. An + :exc:`OverflowError` is raised if the integer is not representable using + the given fixed number of bytes. + + The *byteorder* argument determines the byte order used to represent the + integer. If *byteorder* is ``"big"``, the most significant byte is at the + beginning of the byte array. If *byteorder* is ``"little"``, the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use :data:`sys.byteorder` as the byte order + value. + + The *signed* argument determines whether two's complement is used to + represent the integer. If *signed* is ``False`` and a negative integer is + given, an :exc:`OverflowError` is raised. The default value for *signed* + is ``False``. + + .. versionadded:: 3.2 + + .. classmethod:: int.frombytes(bytes, byteorder, [\*, signed=False]]) + + Return the integer represented by the given array of bytes. + + >>> int.frombytes(b'\x00\x10', byteorder='big') + 16 + >>> int.frombytes(b'\x00\x10', byteorder='little') + 4096 + >>> int.frombytes(b'\xfc\x00', byteorder='big', signed=True) + -1024 + >>> int.frombytes(b'\xfc\x00', byteorder='big', signed=False) + 64512 + >>> int.frombytes([255, 0, 0], byteorder='big') + -65536 + + The argument *bytes* must either support the buffer protocol or be an + iterable producing bytes. Builtin objects that support the buffer + protocol include :class:`bytes` and :class:`bytearray`. + + The *byteorder* argument determines the byte order used to represent the + integer. If *byteorder* is ``"big"``, the most significant byte is at the + beginning of the byte array. If *byteorder* is ``"little"``, the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use :data:`sys.byteorder` as the byte order + value. + + The *signed* argument indicates whether two's complement is used to + represent the integer. + + .. versionadded:: 3.2 + + Additional Methods on Float --------------------------- Index: Lib/test/test_long.py =================================================================== --- Lib/test/test_long.py (revision 74315) +++ Lib/test/test_long.py (working copy) @@ -4,6 +4,7 @@ import random import math +import array # Used for lazy formatting of failure messages class Frm(object): @@ -1058,8 +1059,240 @@ for e in bad_exponents: self.assertRaises(TypeError, round, 3, e) + def test_tobytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + test.tobytes(len(expected), byteorder, signed=signed), + expected) + except Exception as err: + raise AssertionError( + "failed to convert {0} with byteorder={1} and signed={2}" + .format(test, byteorder, signed)) from err + + # Convert integers to signed big-endian byte arrays. + tests1 = { + 0: b'\x00', + 1: b'\x01', + -1: b'\xff', + -127: b'\x81', + -128: b'\x80', + -129: b'\xff\x7f', + 127: b'\x7f', + 129: b'\x00\x81', + -255: b'\xff\x01', + -256: b'\xff\x00', + 255: b'\x00\xff', + 256: b'\x01\x00', + 32767: b'\x7f\xff', + -32768: b'\xff\x80\x00', + 65535: b'\x00\xff\xff', + -65536: b'\xff\x00\x00', + -8388608: b'\x80\x00\x00' + } + check(tests1, 'big', signed=True) + # Convert integers to signed little-endian byte arrays. + tests2 = { + 0: b'\x00', + 1: b'\x01', + -1: b'\xff', + -127: b'\x81', + -128: b'\x80', + -129: b'\x7f\xff', + 127: b'\x7f', + 129: b'\x81\x00', + -255: b'\x01\xff', + -256: b'\x00\xff', + 255: b'\xff\x00', + 256: b'\x00\x01', + 32767: b'\xff\x7f', + -32768: b'\x00\x80', + 65535: b'\xff\xff\x00', + -65536: b'\x00\x00\xff', + -8388608: b'\x00\x00\x80' + } + check(tests2, 'little', signed=True) + # Convert integers to unsigned big-endian byte arrays. + tests3 = { + 0: b'\x00', + 1: b'\x01', + 127: b'\x7f', + 128: b'\x80', + 255: b'\xff', + 256: b'\x01\x00', + 32767: b'\x7f\xff', + 32768: b'\x80\x00', + 65535: b'\xff\xff', + 65536: b'\x01\x00\x00' + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + 0: b'\x00', + 1: b'\x01', + 127: b'\x7f', + 128: b'\x80', + 255: b'\xff', + 256: b'\x00\x01', + 32767: b'\xff\x7f', + 32768: b'\x00\x80', + 65535: b'\xff\xff', + 65536: b'\x00\x00\x01' + } + check(tests4, 'little', signed=False) + + self.assertRaises(OverflowError, (256).tobytes, 1, 'big', signed=False) + self.assertRaises(OverflowError, (256).tobytes, 1, 'big', signed=True) + self.assertRaises(OverflowError, (256).tobytes, 1, 'little', signed=False) + self.assertRaises(OverflowError, (256).tobytes, 1, 'little', signed=True) + self.assertRaises(OverflowError, (-1).tobytes, 2, 'big', signed=False), + self.assertRaises(OverflowError, (-1).tobytes, 2, 'little', signed=False) + self.assertEqual((0).tobytes(0, 'big'), b'') + self.assertEqual((1).tobytes(5, 'big'), b'\x00\x00\x00\x00\x01') + self.assertEqual((0).tobytes(5, 'big'), b'\x00\x00\x00\x00\x00') + self.assertEqual((-1).tobytes(5, 'big', signed=True), + b'\xff\xff\xff\xff\xff') + self.assertRaises(OverflowError, (1).tobytes, 0, 'big') + + def test_frombytes(self): + def check(tests, byteorder, signed=False): + for test, expected in tests.items(): + try: + self.assertEqual( + int.frombytes(test, byteorder, signed=signed), + expected) + except Exception as err: + raise AssertionError( + "failed to convert {0} with byteorder={1!r} and signed={2}" + .format(test, byteorder, signed)) from err + + # Convert signed big-endian byte arrays to integers. + tests1 = { + b'': 0, + b'\x00': 0, + b'\x00\x00': 0, + b'\x01': 1, + b'\x00\x01': 1, + b'\xff': -1, + b'\xff\xff': -1, + b'\x81': -127, + b'\x80': -128, + b'\xff\x7f': -129, + b'\x7f': 127, + b'\x00\x81': 129, + b'\xff\x01': -255, + b'\xff\x00': -256, + b'\x00\xff': 255, + b'\x01\x00': 256, + b'\x7f\xff': 32767, + b'\x80\x00': -32768, + b'\x00\xff\xff': 65535, + b'\xff\x00\x00': -65536, + b'\x80\x00\x00': -8388608 + } + check(tests1, 'big', signed=True) + + # Convert signed little-endian byte arrays to integers. + tests2 = { + b'': 0, + b'\x00': 0, + b'\x00\x00': 0, + b'\x01': 1, + b'\x00\x01': 256, + b'\xff': -1, + b'\xff\xff': -1, + b'\x81': -127, + b'\x80': -128, + b'\x7f\xff': -129, + b'\x7f': 127, + b'\x81\x00': 129, + b'\x01\xff': -255, + b'\x00\xff': -256, + b'\xff\x00': 255, + b'\x00\x01': 256, + b'\xff\x7f': 32767, + b'\x00\x80': -32768, + b'\xff\xff\x00': 65535, + b'\x00\x00\xff': -65536, + b'\x00\x00\x80': -8388608 + } + check(tests2, 'little', signed=True) + + # Convert unsigned big-endian byte arrays to integers. + tests3 = { + b'': 0, + b'\x00': 0, + b'\x01': 1, + b'\x7f': 127, + b'\x80': 128, + b'\xff': 255, + b'\x01\x00': 256, + b'\x7f\xff': 32767, + b'\x80\x00': 32768, + b'\xff\xff': 65535, + b'\x01\x00\x00': 65536, + } + check(tests3, 'big', signed=False) + + # Convert integers to unsigned little-endian byte arrays. + tests4 = { + b'': 0, + b'\x00': 0, + b'\x01': 1, + b'\x7f': 127, + b'\x80': 128, + b'\xff': 255, + b'\x00\x01': 256, + b'\xff\x7f': 32767, + b'\x00\x80': 32768, + b'\xff\xff': 65535, + b'\x00\x00\x01': 65536, + } + check(tests4, 'little', signed=False) + + class myint(int): + pass + + self.assertTrue(type(myint.frombytes(b'\x00', 'big')) is myint) + self.assertEqual(myint.frombytes(b'\x01', 'big'), 1) + self.assertTrue( + type(myint.frombytes(b'\x00', 'big', signed=False)) is myint) + self.assertEqual(myint.frombytes(b'\x01', 'big', signed=False), 1) + self.assertTrue(type(myint.frombytes(b'\x00', 'little')) is myint) + self.assertEqual(myint.frombytes(b'\x01', 'little'), 1) + self.assertTrue(type(myint.frombytes( + b'\x00', 'little', signed=False)) is myint) + self.assertEqual(myint.frombytes(b'\x01', 'little', signed=False), 1) + self.assertEqual( + int.frombytes([255, 0, 0], 'big', signed=True), -65536) + self.assertEqual( + int.frombytes((255, 0, 0), 'big', signed=True), -65536) + self.assertEqual(int.frombytes( + bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.frombytes( + bytearray(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.frombytes( + array.array('B', b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertEqual(int.frombytes( + memoryview(b'\xff\x00\x00'), 'big', signed=True), -65536) + self.assertRaises(ValueError, int.frombytes, [256], 'big') + self.assertRaises(ValueError, int.frombytes, [0], 'big\x00') + self.assertRaises(ValueError, int.frombytes, [0], 'little\x00') + self.assertRaises(TypeError, int.frombytes, "", 'big') + self.assertRaises(TypeError, int.frombytes, "\x00", 'big') + self.assertRaises(TypeError, int.frombytes, 0, 'big') + self.assertRaises(TypeError, int.frombytes, 0, 'big', True) + self.assertRaises(TypeError, myint.frombytes, "", 'big') + self.assertRaises(TypeError, myint.frombytes, "\x00", 'big') + self.assertRaises(TypeError, myint.frombytes, 0, 'big') + self.assertRaises(TypeError, int.frombytes, 0, 'big', True) + + + def test_main(): support.run_unittest(LongTest) Index: Lib/pickle.py =================================================================== --- Lib/pickle.py (revision 74315) +++ Lib/pickle.py (working copy) @@ -1269,51 +1269,15 @@ b'\x7f' >>> """ - if x == 0: return b'' - if x > 0: - ashex = hex(x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # need an even # of nibbles for unhexlify - ashex = "0x0" + ashex[2:] - elif int(ashex[2], 16) >= 8: - # "looks negative", so need a byte of sign bits - ashex = "0x00" + ashex[2:] - else: - # Build the 256's-complement: (1L << nbytes) + x. The trick is - # to find the number of bytes in linear time (although that should - # really be a constant-time task). - ashex = hex(-x) - assert ashex.startswith("0x") - njunkchars = 2 + ashex.endswith('L') - nibbles = len(ashex) - njunkchars - if nibbles & 1: - # Extend to a full byte. - nibbles += 1 - nbits = nibbles * 4 - x += 1 << nbits - assert x > 0 - ashex = hex(x) - njunkchars = 2 + ashex.endswith('L') - newnibbles = len(ashex) - njunkchars - if newnibbles < nibbles: - ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:] - if int(ashex[2], 16) < 8: - # "looks positive", so need a byte of sign bits - ashex = "0xff" + ashex[2:] + nbytes = (x.bit_length() >> 3) + 1 + result = x.tobytes(nbytes, byteorder='little', signed=True) + if x < 0 and nbytes > 1: + if result[-1] == 0xff and (result[-2] & 0x80) != 0: + result = result[:-1] + return result - if ashex.endswith('L'): - ashex = ashex[2:-1] - else: - ashex = ashex[2:] - assert len(ashex) & 1 == 0, (x, ashex) - binary = _binascii.unhexlify(ashex) - return bytes(binary[::-1]) - def decode_long(data): r"""Decode a long from a two's complement little-endian binary string. @@ -1332,16 +1296,8 @@ >>> decode_long(b"\x7f") 127 """ + return int.frombytes(data, byteorder='little', signed=True) - nbytes = len(data) - if nbytes == 0: - return 0 - ashex = _binascii.hexlify(data[::-1]) - n = int(ashex, 16) # quadratic time before Python 2.3; linear now - if data[-1] >= 0x80: - n -= 1 << (nbytes * 8) - return n - # Use the faster _pickle if possible try: from _pickle import *