diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -20,8 +20,13 @@ higher-level modules. .. note:: - Encoding and decoding functions do not accept Unicode strings. Only bytestring - and bytearray objects can be processed. + ``a2b_*`` functions accept Unicode strings containing only ASCII characters. + Other functions only accept bytes and bytes-compatible objects (such as + bytearray objects and other objects implementing the buffer API). + + .. versionchanged:: 3.3 + ASCII-only unicode strings are now accepted by the ``a2b_*`` functions. + The :mod:`binascii` module defines the following functions: diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -208,9 +208,9 @@ class BinASCIITest(unittest.TestCase): except Exception as err: self.fail("{}({!r}) raises {!r}".format(func, empty, err)) - def test_unicode_strings(self): - # Unicode strings are not accepted. - for func in all_functions: + def test_unicode_b2a(self): + # Unicode strings are not accepted by b2a_* functions. + for func in set(all_functions) - set(a2b_functions) | {'rledecode_hqx'}: try: self.assertRaises(TypeError, getattr(binascii, func), "test") except Exception as err: @@ -218,6 +218,34 @@ class BinASCIITest(unittest.TestCase): # crc_hqx needs 2 arguments self.assertRaises(TypeError, binascii.crc_hqx, "test", 0) + def test_unicode_a2b(self): + # Unicode strings are accepted by b2a_* functions. + MAX_ALL = 45 + raw = self.rawdata[:MAX_ALL] + for fa, fb in zip(a2b_functions, b2a_functions): + if fa == 'rledecode_hqx': + # Takes non-ASCII data + continue + a2b = getattr(binascii, fa) + b2a = getattr(binascii, fb) + try: + a = b2a(self.type2test(raw)) + binary_res = a2b(a) + a = a.decode('ascii') + res = a2b(a) + except Exception as err: + self.fail("{}/{} conversion raises {!r}".format(fb, fa, err)) + if fb == 'b2a_hqx': + # b2a_hqx returns a tuple + res, _ = res + binary_res, _ = binary_res + self.assertEqual(res, raw, "{}/{} conversion: " + "{!r} != {!r}".format(fb, fa, res, raw)) + self.assertEqual(res, binary_res) + self.assertIsInstance(res, bytes) + # non-ASCII string + self.assertRaises(ValueError, a2b, "\x80") + class ArrayBinASCIITest(BinASCIITest): def type2test(self, s): diff --git a/Modules/binascii.c b/Modules/binascii.c --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -183,6 +183,44 @@ static unsigned short crctab_hqx[256] = 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0, }; +static int +ascii_buffer_converter(PyObject *arg, Py_buffer *buf) +{ + if (arg == NULL) { + PyBuffer_Release(buf); + return 1; + } + if (PyUnicode_Check(arg)) { + if (PyUnicode_READY(arg) < 0) + return 0; + if (!PyUnicode_IS_ASCII(arg)) { + PyErr_SetString(PyExc_ValueError, + "string argument should contain only ASCII characters"); + return 0; + } + assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND); + buf->buf = (void *) PyUnicode_1BYTE_DATA(arg); + buf->len = PyUnicode_GET_LENGTH(arg); + buf->obj = NULL; + return 1; + } + if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) { + PyErr_Format(PyExc_TypeError, + "argument should be bytes, buffer or ASCII string, " + "not %R", Py_TYPE(arg)); + return 0; + } + if (!PyBuffer_IsContiguous(buf, 'C')) { + PyErr_Format(PyExc_TypeError, + "argument should be a contiguous buffer, " + "not %R", Py_TYPE(arg)); + PyBuffer_Release(buf); + return 0; + } + return Py_CLEANUP_SUPPORTED; +} + + PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data"); static PyObject * @@ -196,7 +234,7 @@ binascii_a2b_uu(PyObject *self, PyObject PyObject *rv; Py_ssize_t ascii_len, bin_len; - if ( !PyArg_ParseTuple(args, "y*:a2b_uu", &pascii) ) + if ( !PyArg_ParseTuple(args, "O&:a2b_uu", ascii_buffer_converter, &pascii) ) return NULL; ascii_data = pascii.buf; ascii_len = pascii.len; @@ -370,7 +408,7 @@ binascii_a2b_base64(PyObject *self, PyOb Py_ssize_t ascii_len, bin_len; int quad_pos = 0; - if ( !PyArg_ParseTuple(args, "y*:a2b_base64", &pascii) ) + if ( !PyArg_ParseTuple(args, "O&:a2b_base64", ascii_buffer_converter, &pascii) ) return NULL; ascii_data = pascii.buf; ascii_len = pascii.len; @@ -546,7 +584,7 @@ binascii_a2b_hqx(PyObject *self, PyObjec Py_ssize_t len; int done = 0; - if ( !PyArg_ParseTuple(args, "y*:a2b_hqx", &pascii) ) + if ( !PyArg_ParseTuple(args, "O&:a2b_hqx", ascii_buffer_converter, &pascii) ) return NULL; ascii_data = pascii.buf; len = pascii.len; @@ -1119,7 +1157,7 @@ binascii_unhexlify(PyObject *self, PyObj char* retbuf; Py_ssize_t i, j; - if (!PyArg_ParseTuple(args, "y*:a2b_hex", &parg)) + if (!PyArg_ParseTuple(args, "O&:a2b_hex", ascii_buffer_converter, &parg)) return NULL; argbuf = parg.buf; arglen = parg.len; @@ -1197,8 +1235,8 @@ binascii_a2b_qp(PyObject *self, PyObject static char *kwlist[] = {"data", "header", NULL}; int header = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i", kwlist, &pdata, - &header)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|i:a2b_qp", kwlist, + ascii_buffer_converter, &pdata, &header)) return NULL; data = pdata.buf; datalen = pdata.len;