diff -r 90e41d965228 Include/bytesobject.h --- a/Include/bytesobject.h Wed Oct 14 10:10:00 2015 +0200 +++ b/Include/bytesobject.h Wed Oct 14 11:07:30 2015 +0200 @@ -67,6 +67,9 @@ PyAPI_FUNC(PyObject*) _PyBytes_FormatEx( Py_ssize_t format_len, PyObject *args, int use_bytearray); +PyAPI_FUNC(PyObject*) _PyBytes_FromHex( + PyObject *string, + int use_bytearray); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, diff -r 90e41d965228 Lib/test/test_bytes.py --- a/Lib/test/test_bytes.py Wed Oct 14 10:10:00 2015 +0200 +++ b/Lib/test/test_bytes.py Wed Oct 14 11:07:30 2015 +0200 @@ -301,6 +301,20 @@ class BaseBytesTest: self.assertRaises(ValueError, self.type2test.fromhex, '\x00') self.assertRaises(ValueError, self.type2test.fromhex, '12 \x00 34') + for data, pos in ( + # invalid first hexadecimal character + ('12 x4 56', 3), + # invalid second hexadecimal character + ('12 3x 56', 4), + # two invalid hexadecimal characters + ('12 xy 56', 3), + # test non-ASCII string + ('12 3\xff 56', 4), + ): + with self.assertRaises(ValueError) as cm: + self.type2test.fromhex(data) + self.assertIn('at position %s' % pos, str(cm.exception)) + def test_hex(self): self.assertRaises(TypeError, self.type2test.hex) self.assertRaises(TypeError, self.type2test.hex, 1) diff -r 90e41d965228 Objects/bytearrayobject.c --- a/Objects/bytearrayobject.c Wed Oct 14 10:10:00 2015 +0200 +++ b/Objects/bytearrayobject.c Wed Oct 14 11:07:30 2015 +0200 @@ -2823,48 +2823,7 @@ static PyObject * bytearray_fromhex_impl(PyObject*cls, PyObject *string) /*[clinic end generated code: output=df3da60129b3700c input=907bbd2d34d9367a]*/ { - PyObject *newbytes; - char *buf; - Py_ssize_t hexlen, byteslen, i, j; - int top, bot; - void *data; - unsigned int kind; - - assert(PyUnicode_Check(string)); - if (PyUnicode_READY(string)) - return NULL; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - hexlen = PyUnicode_GET_LENGTH(string); - - byteslen = hexlen/2; /* This overestimates if there are spaces */ - newbytes = PyByteArray_FromStringAndSize(NULL, byteslen); - if (!newbytes) - return NULL; - buf = PyByteArray_AS_STRING(newbytes); - for (i = j = 0; i < hexlen; i += 2) { - /* skip over spaces in the input */ - while (PyUnicode_READ(kind, data, i) == ' ') - i++; - if (i >= hexlen) - break; - top = hex_digit_to_int(PyUnicode_READ(kind, data, i)); - bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1)); - if (top == -1 || bot == -1) { - PyErr_Format(PyExc_ValueError, - "non-hexadecimal number found in " - "fromhex() arg at position %zd", i); - goto error; - } - buf[j++] = (top << 4) + bot; - } - if (PyByteArray_Resize(newbytes, j) < 0) - goto error; - return newbytes; - - error: - Py_DECREF(newbytes); - return NULL; + return _PyBytes_FromHex(string, 1); } PyDoc_STRVAR(hex__doc__, diff -r 90e41d965228 Objects/bytesobject.c --- a/Objects/bytesobject.c Wed Oct 14 10:10:00 2015 +0200 +++ b/Objects/bytesobject.c Wed Oct 14 11:07:30 2015 +0200 @@ -30,6 +30,10 @@ static PyBytesObject *nullstring; */ #define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) +/* Forward declaration */ +Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, + char *str); + /* For PyBytes_FromString(), the parameter `str' points to a null-terminated string containing exactly `size' bytes. @@ -3078,20 +3082,78 @@ bytes_splitlines_impl(PyBytesObject*self ); } -static int -hex_digit_to_int(Py_UCS4 c) +PyObject* +_PyBytes_FromHex(PyObject *string, int use_bytearray) { - if (c >= 128) - return -1; - if (Py_ISDIGIT(c)) - return c - '0'; - else { - if (Py_ISUPPER(c)) - c = Py_TOLOWER(c); - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; + char *buf; + Py_ssize_t hexlen, invalid_char; + unsigned int top, bot; + Py_UCS1 *str, *end; + _PyBytesWriter writer; + + _PyBytesWriter_Init(&writer); + writer.use_bytearray = use_bytearray; + + assert(PyUnicode_Check(string)); + if (PyUnicode_READY(string)) + return NULL; + hexlen = PyUnicode_GET_LENGTH(string); + + if (!PyUnicode_IS_ASCII(string)) { + void *data = PyUnicode_DATA(string); + unsigned int kind = PyUnicode_KIND(string); + Py_ssize_t i; + + /* search for the first non-ASCII character */ + for (i = 0; i < hexlen; i++) { + if (PyUnicode_READ(kind, data, i) >= 128) + break; + } + invalid_char = i; + goto error; } - return -1; + + assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND); + str = PyUnicode_1BYTE_DATA(string); + + /* This overestimates if there are spaces */ + buf = _PyBytesWriter_Alloc(&writer, hexlen / 2); + if (buf == NULL) + return NULL; + + end = str + hexlen; + while (str < end) { + /* skip over spaces in the input */ + while (*str == ' ') + str++; + if (str >= end) + break; + + top = _PyLong_DigitValue[*str]; + if (top >= 16) { + invalid_char = str - PyUnicode_1BYTE_DATA(string); + goto error; + } + str++; + + bot = _PyLong_DigitValue[*str]; + if (bot >= 16) { + invalid_char = str - PyUnicode_1BYTE_DATA(string); + goto error; + } + str++; + + *buf++ = (unsigned char)((top << 4) + bot); + } + + return _PyBytesWriter_Finish(&writer, buf); + + error: + PyErr_Format(PyExc_ValueError, + "non-hexadecimal number found in " + "fromhex() arg at position %zd", invalid_char); + _PyBytesWriter_Dealloc(&writer); + return NULL; } /*[clinic input] @@ -3111,48 +3173,7 @@ static PyObject * bytes_fromhex_impl(PyTypeObject *type, PyObject *string) /*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/ { - PyObject *newstring; - char *buf; - Py_ssize_t hexlen, byteslen, i, j; - int top, bot; - void *data; - unsigned int kind; - - assert(PyUnicode_Check(string)); - if (PyUnicode_READY(string)) - return NULL; - kind = PyUnicode_KIND(string); - data = PyUnicode_DATA(string); - hexlen = PyUnicode_GET_LENGTH(string); - - byteslen = hexlen/2; /* This overestimates if there are spaces */ - newstring = PyBytes_FromStringAndSize(NULL, byteslen); - if (!newstring) - return NULL; - buf = PyBytes_AS_STRING(newstring); - for (i = j = 0; i < hexlen; i += 2) { - /* skip over spaces in the input */ - while (PyUnicode_READ(kind, data, i) == ' ') - i++; - if (i >= hexlen) - break; - top = hex_digit_to_int(PyUnicode_READ(kind, data, i)); - bot = hex_digit_to_int(PyUnicode_READ(kind, data, i+1)); - if (top == -1 || bot == -1) { - PyErr_Format(PyExc_ValueError, - "non-hexadecimal number found in " - "fromhex() arg at position %zd", i); - goto error; - } - buf[j++] = (top << 4) + bot; - } - if (j != byteslen && _PyBytes_Resize(&newstring, j) < 0) - goto error; - return newstring; - - error: - Py_XDECREF(newstring); - return NULL; + return _PyBytes_FromHex(string, 0); } PyDoc_STRVAR(hex__doc__, @@ -3888,7 +3909,7 @@ Py_LOCAL_INLINE(char*) } Py_LOCAL_INLINE(Py_ssize_t) -_PyBytesWriter_GetPos(_PyBytesWriter *writer, char *str) +_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str) { char *start = _PyBytesWriter_AsString(writer); assert(str != NULL); @@ -3963,7 +3984,7 @@ void* allocated += allocated / OVERALLOCATE_FACTOR; } - pos = _PyBytesWriter_GetPos(writer, str); + pos = _PyBytesWriter_GetSize(writer, str); if (!writer->use_small_buffer) { if (writer->use_bytearray) { if (PyByteArray_Resize(writer->buffer, allocated)) @@ -4041,33 +4062,33 @@ void* PyObject * _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str) { - Py_ssize_t pos; + Py_ssize_t size; PyObject *result; _PyBytesWriter_CheckConsistency(writer, str); - pos = _PyBytesWriter_GetPos(writer, str); - if (pos == 0 && !writer->use_bytearray) { + size = _PyBytesWriter_GetSize(writer, str); + if (size == 0 && !writer->use_bytearray) { Py_CLEAR(writer->buffer); /* Get the empty byte string singleton */ result = PyBytes_FromStringAndSize(NULL, 0); } else if (writer->use_small_buffer) { - result = PyBytes_FromStringAndSize(writer->small_buffer, pos); + result = PyBytes_FromStringAndSize(writer->small_buffer, size); } else { result = writer->buffer; writer->buffer = NULL; - if (pos != writer->allocated) { + if (size != writer->allocated) { if (writer->use_bytearray) { - if (PyByteArray_Resize(result, pos)) { + if (PyByteArray_Resize(result, size)) { Py_DECREF(result); return NULL; } } else { - if (_PyBytes_Resize(&result, pos)) { + if (_PyBytes_Resize(&result, size)) { assert(result == NULL); return NULL; }