Index: Objects/bytesobject.c =================================================================== --- Objects/bytesobject.c (revision 58412) +++ Objects/bytesobject.c (working copy) @@ -37,15 +37,20 @@ static int _getbytevalue(PyObject* arg, int *value) { - PyObject *intarg = PyNumber_Int(arg); - if (! intarg) + long face_value; + + if (PyInt_Check(arg)) { + face_value = PyInt_AsLong(arg); + if (face_value < 0 || face_value >= 256) { + PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); + return 0; + } + } else { + PyErr_Format(PyExc_TypeError, "an integer is required"); return 0; - *value = PyInt_AsLong(intarg); - Py_DECREF(intarg); - if (*value < 0 || *value >= 256) { - PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)"); - return 0; } + + *value = face_value; return 1; } @@ -80,9 +85,7 @@ { PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer; - if (buffer == NULL || - PyUnicode_Check(obj) || - buffer->bf_getbuffer == NULL) + if (buffer == NULL || buffer->bf_getbuffer == NULL) { PyErr_Format(PyExc_TypeError, "Type %.100s doesn't support the buffer API", @@ -1088,6 +1102,23 @@ return res; } +/* TODO(gps): + * These methods need implementing (porting over from stringobject.c): + * + * .capitalize(), .center(), + * .expandtabs(), .isalnum(), .isalpha(), .isdigit(), + * .islower(), .isspace(), .istitle(), .isupper(), + * .rjust(), + * .splitlines(), .swapcase(), .title(), + * .upper(), .zfill() + * + * XXX(gps) the code is -shared- for so many of these, thats gross. I wish + * we had templates or generics or OO inheritance here. A .h file with the + * methods as big CPP macros as templates would work but is ugly (especially + * when debugging). Or can we do an (evil?) common substructure hack to + * allow us to write generic methods that work on both buffer (PyBytes_*) + * and bytes (PyString_*) objects? + */ PyDoc_STRVAR(find__doc__, "B.find(sub [,start [,end]]) -> int\n\ @@ -1118,27 +1149,25 @@ bytes_count(PyBytesObject *self, PyObject *args) { PyObject *sub_obj; - const char *str = PyBytes_AS_STRING(self), *sub; - Py_ssize_t sub_len; + const char *str = PyBytes_AS_STRING(self); Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; + Py_buffer vsub; + PyObject *count_obj; if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) return NULL; - if (PyBytes_Check(sub_obj)) { - sub = PyBytes_AS_STRING(sub_obj); - sub_len = PyBytes_GET_SIZE(sub_obj); - } - /* XXX --> use the modern buffer interface */ - else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) + if (_getbuffer(sub_obj, &vsub) < 0) return NULL; _adjust_indices(&start, &end, PyBytes_GET_SIZE(self)); - return PyInt_FromSsize_t( - stringlib_count(str + start, end - start, sub, sub_len) + count_obj = PyInt_FromSsize_t( + stringlib_count(str + start, end - start, vsub.buf, vsub.len) ); + PyObject_ReleaseBuffer(sub_obj, &vsub); + return count_obj; } @@ -1210,36 +1239,39 @@ Py_ssize_t end, int direction) { Py_ssize_t len = PyBytes_GET_SIZE(self); - Py_ssize_t slen; - const char* sub; const char* str; + Py_buffer vsubstr; + int rv; - if (PyBytes_Check(substr)) { - sub = PyBytes_AS_STRING(substr); - slen = PyBytes_GET_SIZE(substr); - } - /* XXX --> Use the modern buffer interface */ - else if (PyObject_AsCharBuffer(substr, &sub, &slen)) - return -1; str = PyBytes_AS_STRING(self); + if (_getbuffer(substr, &vsubstr) < 0) + return -1; + _adjust_indices(&start, &end, len); if (direction < 0) { /* startswith */ - if (start+slen > len) - return 0; + if (start+vsubstr.len > len) { + rv = 0; + goto done; + } } else { /* endswith */ - if (end-start < slen || start > len) - return 0; + if (end-start < vsubstr.len || start > len) { + rv = 0; + goto done; + } - if (end-slen > start) - start = end - slen; + if (end-vsubstr.len > start) + start = end - vsubstr.len; } - if (end-start >= slen) - return ! memcmp(str+start, sub, slen); - return 0; + if (end-start >= vsubstr.len) + rv = ! memcmp(str+start, vsubstr.buf, vsubstr.len); + +done: + PyObject_ReleaseBuffer(substr, &vsubstr); + return rv; } @@ -1340,53 +1372,47 @@ register const char *table; register Py_ssize_t i, c, changed = 0; PyObject *input_obj = (PyObject*)self; - const char *table1, *output_start, *del_table=NULL; - Py_ssize_t inlen, tablen, dellen = 0; + const char *output_start; + Py_ssize_t inlen; PyObject *result; int trans_table[256]; PyObject *tableobj, *delobj = NULL; + Py_buffer vtable, vdel; if (!PyArg_UnpackTuple(args, "translate", 1, 2, &tableobj, &delobj)) return NULL; - if (PyBytes_Check(tableobj)) { - table1 = PyBytes_AS_STRING(tableobj); - tablen = PyBytes_GET_SIZE(tableobj); - } - /* XXX -> Use the modern buffer interface */ - else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen)) + if (_getbuffer(tableobj, &vtable) < 0) return NULL; - if (tablen != 256) { + if (vtable.len != 256) { PyErr_SetString(PyExc_ValueError, "translation table must be 256 characters long"); - return NULL; + result = NULL; + goto done; } if (delobj != NULL) { - if (PyBytes_Check(delobj)) { - del_table = PyBytes_AS_STRING(delobj); - dellen = PyBytes_GET_SIZE(delobj); + if (_getbuffer(delobj, &vdel) < 0) { + result = NULL; + goto done; } - /* XXX -> use the modern buffer interface */ - else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) - return NULL; } else { - del_table = NULL; - dellen = 0; + vdel.buf = NULL; + vdel.len = 0; } - table = table1; + table = (const char *)vtable.buf; inlen = PyBytes_GET_SIZE(input_obj); result = PyBytes_FromStringAndSize((char *)NULL, inlen); if (result == NULL) - return NULL; + goto done; output_start = output = PyBytes_AsString(result); input = PyBytes_AS_STRING(input_obj); - if (dellen == 0) { + if (vdel.len == 0) { /* If no deletions are required, use faster code */ for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); @@ -1394,17 +1420,18 @@ changed = 1; } if (changed || !PyBytes_CheckExact(input_obj)) - return result; + goto done; Py_DECREF(result); Py_INCREF(input_obj); - return input_obj; + result = input_obj; + goto done; } for (i = 0; i < 256; i++) trans_table[i] = Py_CHARMASK(table[i]); - for (i = 0; i < dellen; i++) - trans_table[(int) Py_CHARMASK(del_table[i])] = -1; + for (i = 0; i < vdel.len; i++) + trans_table[(int) Py_CHARMASK( ((unsigned char*)vdel.buf)[i] )] = -1; for (i = inlen; --i >= 0; ) { c = Py_CHARMASK(*input++); @@ -1416,11 +1443,17 @@ if (!changed && PyBytes_CheckExact(input_obj)) { Py_DECREF(result); Py_INCREF(input_obj); - return input_obj; + result = input_obj; + goto done; } /* Fix the size of the resulting string */ if (inlen > 0) PyBytes_Resize(result, output - output_start); + +done: + PyObject_ReleaseBuffer(tableobj, &vtable); + if (delobj != NULL) + PyObject_ReleaseBuffer(delobj, &vdel); return result; } @@ -2264,6 +2297,8 @@ { PyObject *bytesep, *result; + /* XXX(gps) could this use _getbuffer instead of creating an entire new + * copy in the bytesep object? */ bytesep = PyBytes_FromObject(sep_obj); if (! bytesep) return NULL; @@ -2291,6 +2326,8 @@ { PyObject *bytesep, *result; + /* XXX(gps) could this use _getbuffer instead of creating an entire new + * copy in the bytesep object? */ bytesep = PyBytes_FromObject(sep_obj); if (! bytesep) return NULL; @@ -2459,6 +2496,9 @@ static PyObject * bytes_extend(PyBytesObject *self, PyObject *arg) { + /* XXX(gps): the docstring above says any iterable int will do but the + * bytes_setslice code really wants something supporting PEP 3118. + * Is a list or tuple of 0 <= ints <= 255 also supposed to work? */ if (bytes_setslice(self, Py_Size(self), Py_Size(self), arg) == -1) return NULL; Py_RETURN_NONE; @@ -2866,26 +2906,34 @@ static PyObject * bytes_fromhex(PyObject *cls, PyObject *args) { - PyObject *newbytes; - char *hex, *buf; - Py_ssize_t len, byteslen, i, j; + PyObject *newbytes, *hexobj; + char *buf; + unsigned char *hex; + Py_ssize_t byteslen, i, j; int top, bot; + Py_buffer vhex; - if (!PyArg_ParseTuple(args, "s#:fromhex", &hex, &len)) + if (!PyArg_ParseTuple(args, "O:fromhex", &hexobj)) return NULL; - byteslen = len / 2; /* max length if there are no spaces */ + if (_getbuffer(hexobj, &vhex) < 0) + return NULL; + byteslen = vhex.len / 2; /* max length if there are no spaces */ + hex = vhex.buf; + newbytes = PyBytes_FromStringAndSize(NULL, byteslen); - if (!newbytes) + if (!newbytes) { + PyObject_ReleaseBuffer(hexobj, &vhex); return NULL; + } buf = PyBytes_AS_STRING(newbytes); - for (i = j = 0; i < len; i += 2) { + for (i = j = 0; i < vhex.len; i += 2) { /* skip over spaces in the input */ - while (Py_CHARMASK(hex[i]) == ' ') + while (Py_CHARMASK( hex[i] ) == ' ') i++; - if (i >= len) + if (i >= vhex.len) break; top = hex_digit_to_int(Py_CHARMASK(hex[i])); bot = hex_digit_to_int(Py_CHARMASK(hex[i+1])); @@ -2900,10 +2948,12 @@ } if (PyBytes_Resize(newbytes, j) < 0) goto error; + PyObject_ReleaseBuffer(hexobj, &vhex); return newbytes; error: Py_DECREF(newbytes); + PyObject_ReleaseBuffer(hexobj, &vhex); return NULL; } Index: Lib/test/test_bytes.py =================================================================== --- Lib/test/test_bytes.py (revision 58412) +++ Lib/test/test_bytes.py (working copy) @@ -454,17 +454,18 @@ def test_fromhex(self): self.assertRaises(TypeError, bytes.fromhex) self.assertRaises(TypeError, bytes.fromhex, 1) - self.assertEquals(bytes.fromhex(''), bytes()) + self.assertEquals(bytes.fromhex(b''), bytes()) b = bytes([0x1a, 0x2b, 0x30]) - self.assertEquals(bytes.fromhex('1a2B30'), b) - self.assertEquals(bytes.fromhex(' 1A 2B 30 '), b) + self.assertEquals(bytes.fromhex(b'1a2B30'), b) + self.assertEquals(bytes.fromhex(b' 1A 2B 30 '), b) self.assertEquals(bytes.fromhex(memoryview(b'')), bytes()) self.assertEquals(bytes.fromhex(memoryview(b'0000')), bytes([0, 0])) - self.assertRaises(ValueError, bytes.fromhex, 'a') - self.assertRaises(ValueError, bytes.fromhex, 'rt') - self.assertRaises(ValueError, bytes.fromhex, '1a b cd') - self.assertRaises(ValueError, bytes.fromhex, '\x00') - self.assertRaises(ValueError, bytes.fromhex, '12 \x00 34') + self.assertRaises(TypeError, bytes.fromhex, '1B') + self.assertRaises(ValueError, bytes.fromhex, b'a') + self.assertRaises(ValueError, bytes.fromhex, b'rt') + self.assertRaises(ValueError, bytes.fromhex, b'1a b cd') + self.assertRaises(ValueError, bytes.fromhex, b'\x00') + self.assertRaises(ValueError, bytes.fromhex, b'12 \x00 34') def test_join(self): self.assertEqual(b"".join([]), bytes()) @@ -504,11 +505,12 @@ self.assertEqual(b, b'heo') self.assertRaises(ValueError, lambda: b.remove(ord('l'))) self.assertRaises(ValueError, lambda: b.remove(400)) - self.assertRaises(ValueError, lambda: b.remove('e')) + self.assertRaises(TypeError, lambda: b.remove('e')) # remove first and last b.remove(ord('o')) b.remove(ord('h')) self.assertEqual(b, b'e') + self.assertRaises(TypeError, lambda: b.remove(b'e')) def test_pop(self): b = b'world' @@ -542,6 +544,7 @@ b = bytes() b.append(ord('A')) self.assertEqual(len(b), 1) + self.assertRaises(TypeError, lambda: b.append(b'o')) def test_insert(self): b = b'msssspp' @@ -550,6 +553,7 @@ b.insert(-2, ord('i')) b.insert(1000, ord('i')) self.assertEqual(b, b'mississippi') + self.assertRaises(TypeError, lambda: b.insert(0, b'1')) def test_startswith(self): b = b'hello'