diff -r 82b58807f481 Objects/abstract.c --- a/Objects/abstract.c Sat Nov 16 19:10:57 2013 +0200 +++ b/Objects/abstract.c Thu Jan 16 15:22:37 2014 -0600 @@ -686,9 +686,9 @@ result = PyObject_CallFunctionObjArgs(meth, format_spec, NULL); Py_DECREF(meth); - if (result && !PyUnicode_Check(result)) { + if (result && !PyUnicode_Check(result) && !PyBytes_Check(result)) { PyErr_SetString(PyExc_TypeError, - "__format__ method did not return string"); + "__format__ method did not return string or bytes"); Py_DECREF(result); result = NULL; goto done; diff -r 82b58807f481 Objects/bytesobject.c --- a/Objects/bytesobject.c Sat Nov 16 19:10:57 2013 +0200 +++ b/Objects/bytesobject.c Thu Jan 16 15:22:37 2014 -0600 @@ -359,8 +359,433 @@ ret = PyBytes_FromFormatV(format, vargs); va_end(vargs); return ret; +} + +/* Helpers for formatstring */ + +static PyObject * +formatspec(char fill, char align, char sign, char alt, Py_ssize_t width, + int prec, char type) +{ + /* We are careful not to overflow this buffer. A 64-bit integer formatted + as a decimal is up to 19 bytes long. */ + char buf[50]; + char *p = buf; + if (fill) { + *p++ = fill; + if (!align) + align = '<'; + } + if (align) + *p++ = align; + if (sign) + *p++ = sign; + if (alt) + *p++ = alt; + if (width > 0) { + sprintf(p, "%" PY_FORMAT_SIZE_T "d", width); + p += strlen(p); + } + if (prec > 0) { + sprintf(p, ".%u", prec); + p += strlen(p); + } + *p++ = type; + *p++ = '\0'; + assert (p-buf < sizeof(buf)); + return PyBytes_FromString(buf); } +Py_LOCAL_INLINE(PyObject *) +getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) +{ + Py_ssize_t argidx = *p_argidx; + if (argidx < arglen) { + (*p_argidx)++; + if (arglen < 0) + return args; + else + return PyTuple_GetItem(args, argidx); + } + PyErr_SetString(PyExc_TypeError, + "not enough arguments for format string"); + return NULL; +} + +static PyObject * +formatbytes(PyObject *v) +{ + PyObject *result = NULL; + PyObject *func; + PyObject *spec; + _Py_IDENTIFIER(__bytes__); + _Py_IDENTIFIER(__format__); + + /* is it a bytes object? */ + if (PyBytes_Check(v)) { + result = v; + Py_INCREF(v); + goto pad; + } + + /* does it support __bytes__? */ + func = _PyObject_LookupSpecial(v, &PyId___bytes__); + if (func == NULL && PyErr_Occurred()) + return NULL; + if (func != NULL) { + result = PyObject_CallFunctionObjArgs(func, NULL); + Py_DECREF(func); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__bytes__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + goto pad; + } + + /* does it support __format__ with bytes argument */ + func = _PyObject_LookupSpecial(v, &PyId___format__); + if (func == NULL) { + if (!PyErr_Occurred()) + PyErr_Format(PyExc_TypeError, + "Type %.100s doesn't define __format__", + Py_TYPE(v)->tp_name); + return NULL; + } + spec = PyBytes_FromString(""); + if (spec == NULL) { + Py_DECREF(func); + return NULL; + } + result = PyObject_CallFunctionObjArgs(func, spec, NULL); + Py_DECREF(func); + Py_DECREF(spec); + if (result == NULL) + return NULL; + if (!PyBytes_Check(result)) { + PyErr_Format(PyExc_TypeError, + "__format__ returned non-bytes (type %.200s)", + Py_TYPE(result)->tp_name); + Py_DECREF(result); + return NULL; + } + +pad: + /* TODO: handle width/align */ + + return result; +} + + +PyObject * +PyBytes_Format(PyObject *format, PyObject *args) +{ + char *fmt, *res; + Py_ssize_t arglen, argidx; + Py_ssize_t reslen, rescnt, fmtcnt; + int args_owned = 0; + PyObject *result; + PyObject *dict = NULL; + if (format == NULL || !PyBytes_Check(format) || args == NULL) { + PyErr_BadInternalCall(); + return NULL; + } + fmt = PyBytes_AS_STRING(format); + fmtcnt = PyBytes_GET_SIZE(format); + reslen = rescnt = fmtcnt + 100; + result = PyBytes_FromStringAndSize((char *)NULL, reslen); + if (result == NULL) + return NULL; + res = PyBytes_AsString(result); + if (PyTuple_Check(args)) { + arglen = PyTuple_GET_SIZE(args); + argidx = 0; + } + else { + arglen = -1; + argidx = -2; + } + if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) && + !PyUnicode_Check(args)) + dict = args; + while (--fmtcnt >= 0) { + if (*fmt != '%') { + if (--rescnt < 0) { + rescnt = fmtcnt + 100; + reslen += rescnt; + if (_PyBytes_Resize(&result, reslen) < 0) + return NULL; + res = PyBytes_AS_STRING(result) + + reslen - rescnt; + --rescnt; + } + *res++ = *fmt++; + } + else { + /* Got a format specifier */ + PyObject *spec; + char align = 0; + char fill = 0; + char sign = 0; + char alt = 0; + Py_ssize_t width = -1; + int prec = -1; + + int c = '\0'; + PyObject *v = NULL; + PyObject *temp = NULL; + char *pbuf; + Py_ssize_t len; + char formatbuf[2]; /* for %c only */ + fmt++; + if (*fmt == '(') { + char *keystart; + Py_ssize_t keylen; + PyObject *key; + int pcount = 1; + + if (dict == NULL) { + PyErr_SetString(PyExc_TypeError, + "format requires a mapping"); + goto error; + } + ++fmt; + --fmtcnt; + keystart = fmt; + /* Skip over balanced parentheses */ + while (pcount > 0 && --fmtcnt >= 0) { + if (*fmt == ')') + --pcount; + else if (*fmt == '(') + ++pcount; + fmt++; + } + keylen = fmt - keystart - 1; + if (fmtcnt < 0 || pcount > 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format key"); + goto error; + } + key = PyUnicode_FromStringAndSize(keystart, + keylen); + if (key == NULL) + goto error; + if (args_owned) { + Py_DECREF(args); + args_owned = 0; + } + args = PyObject_GetItem(dict, key); + Py_DECREF(key); + if (args == NULL) { + goto error; + } + args_owned = 1; + arglen = -1; + argidx = -2; + } + while (--fmtcnt >= 0) { + switch (c = *fmt++) { + case '-': align = '<'; continue; + case '+': sign = '+'; continue; + case ' ': sign = ' '; continue; + case '#': alt = '#'; continue; + case '0': fill = '0'; continue; + } + break; + } + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString(PyExc_TypeError, + "* wants int"); + goto error; + } + width = PyLong_AsLong(v); + if (width < 0) { + align = '<'; + width = -width; + } + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + width = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if ((width*10) / 10 != width) { + PyErr_SetString( + PyExc_ValueError, + "width too big"); + goto error; + } + width = width*10 + (c - '0'); + } + } + if (c == '.') { + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + if (c == '*') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + if (!PyLong_Check(v)) { + PyErr_SetString( + PyExc_TypeError, + "* wants int"); + goto error; + } + prec = PyLong_AsLong(v); + if (prec < 0) + prec = 0; + if (--fmtcnt >= 0) + c = *fmt++; + } + else if (c >= 0 && isdigit(c)) { + prec = c - '0'; + while (--fmtcnt >= 0) { + c = Py_CHARMASK(*fmt++); + if (!isdigit(c)) + break; + if ((prec*10) / 10 != prec) { + PyErr_SetString( + PyExc_ValueError, + "prec too big"); + goto error; + } + prec = prec*10 + (c - '0'); + } + } + } /* prec */ + if (fmtcnt >= 0) { + if (c == 'h' || c == 'l' || c == 'L') { + if (--fmtcnt >= 0) + c = *fmt++; + } + } + if (fmtcnt < 0) { + PyErr_SetString(PyExc_ValueError, + "incomplete format"); + goto error; + } + if (c != '%') { + v = getnextarg(args, arglen, &argidx); + if (v == NULL) + goto error; + } + switch (c) { + case '%': + pbuf = "%"; + len = 1; + break; + case 's': + temp = formatbytes(v); + if (temp == NULL) + goto error; + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + if (prec >= 0 && len > prec) + len = prec; + break; + case 'i': + case 'd': + case 'u': + case 'o': + case 'x': + case 'X': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + spec = formatspec(fill, align, sign, alt, width, prec, c); + if (spec == NULL) + goto error; + temp = PyObject_Format(v, spec); + Py_DECREF(spec); + if (temp == NULL) + goto error; + if (!PyBytes_Check(temp)) { + PyErr_SetString(PyExc_TypeError, + "invalid type from __format__(), require bytes"); + Py_XDECREF(temp); + goto error; + } + pbuf = PyBytes_AS_STRING(temp); + len = PyBytes_GET_SIZE(temp); + break; + case 'c': + pbuf = formatbuf; + if (!PyArg_Parse(v, "b;%c requires int", &pbuf[0])) + goto error; + pbuf[1] = '\0'; + len = 1; + break; + default: + PyErr_Format(PyExc_ValueError, + "unsupported format character '%c' (0x%x) " + "at index %zd", + c, c, + (Py_ssize_t)(fmt - 1 - + PyBytes_AsString(format))); + goto error; + } + if (rescnt < len) { + reslen -= rescnt; + rescnt = len + 100; + reslen += rescnt; + if (reslen < 0) { + Py_DECREF(result); + Py_XDECREF(temp); + return PyErr_NoMemory(); + } + if (_PyBytes_Resize(&result, reslen) < 0) { + Py_XDECREF(temp); + return NULL; + } + res = PyBytes_AS_STRING(result) + reslen - rescnt; + } + Py_MEMCPY(res, pbuf, len); + res += len; + rescnt -= len; + if (dict && (argidx < arglen) && c != '%') { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + Py_XDECREF(temp); + goto error; + } + Py_XDECREF(temp); + } /* '%' */ + } /* until end */ + if (argidx < arglen && !dict) { + PyErr_SetString(PyExc_TypeError, + "not all arguments converted during string formatting"); + goto error; + } + if (args_owned) { + Py_DECREF(args); + } + _PyBytes_Resize(&result, reslen - rescnt); + return result; + + error: + Py_DECREF(result); + if (args_owned) { + Py_DECREF(args); + } + return NULL; +} + + static void bytes_dealloc(PyObject *op) { @@ -2443,6 +2868,21 @@ }; static PyObject * +bytes_mod(PyObject *v, PyObject *w) +{ + if (!PyBytes_Check(v)) + Py_RETURN_NOTIMPLEMENTED; + return PyBytes_Format(v, w); +} + +static PyNumberMethods bytes_as_number = { + 0, /*nb_add*/ + 0, /*nb_subtract*/ + 0, /*nb_multiply*/ + bytes_mod, /*nb_remainder*/ +}; + +static PyObject * str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); static PyObject * @@ -2733,7 +3173,7 @@ 0, /* tp_setattr */ 0, /* tp_reserved */ (reprfunc)bytes_repr, /* tp_repr */ - 0, /* tp_as_number */ + &bytes_as_number, /* tp_as_number */ &bytes_as_sequence, /* tp_as_sequence */ &bytes_as_mapping, /* tp_as_mapping */ (hashfunc)bytes_hash, /* tp_hash */ diff -r 82b58807f481 Objects/longobject.c --- a/Objects/longobject.c Sat Nov 16 19:10:57 2013 +0200 +++ b/Objects/longobject.c Thu Jan 16 15:22:37 2014 -0600 @@ -4432,13 +4432,29 @@ static PyObject * long__format__(PyObject *self, PyObject *args) { + int isbytes = 0; + PyObject *v, *r; PyObject *format_spec; _PyUnicodeWriter writer; int ret; - if (!PyArg_ParseTuple(args, "U:__format__", &format_spec)) + if (!PyArg_ParseTuple(args, "O:__format__", &v)) return NULL; + if (PyBytes_Check(v)) { + isbytes = 1; + format_spec = PyUnicode_FromEncodedObject(v, "latin-1", NULL); + if (format_spec == NULL) + return NULL; + } + else if (PyUnicode_Check(v)) { + format_spec = v; + } + else { + PyErr_Format(PyExc_TypeError, "must be str or bytes, not %.200s", + Py_TYPE(v)->tp_name); + return NULL; + } _PyUnicodeWriter_Init(&writer); ret = _PyLong_FormatAdvancedWriter( &writer, @@ -4448,7 +4464,16 @@ _PyUnicodeWriter_Dealloc(&writer); return NULL; } - return _PyUnicodeWriter_Finish(&writer); + r = _PyUnicodeWriter_Finish(&writer); + if (r != NULL && isbytes) { + PyObject *rr = PyUnicode_AsLatin1String(r); + Py_DECREF(r); + Py_DECREF(format_spec); + return rr; + } + else { + return r; + } } /* Return a pair (q, r) such that a = b * q + r, and